Index: projects/release-pkg/Makefile.inc1
===================================================================
--- projects/release-pkg/Makefile.inc1	(revision 293335)
+++ projects/release-pkg/Makefile.inc1	(revision 293336)
@@ -1,2541 +1,2541 @@
 #
 # $FreeBSD$
 #
 # Make command line options:
 #	-DNO_CLEANDIR run ${MAKE} clean, instead of ${MAKE} cleandir
 #	-DNO_CLEAN do not clean at all
 #	-DDB_FROM_SRC use the user/group databases in src/etc instead of
 #	    the system database when installing.
 #	-DNO_SHARE do not go into share subdir
 #	-DKERNFAST define NO_KERNEL{CONFIG,CLEAN,DEPEND,OBJ}
 #	-DNO_KERNELCONFIG do not run config in ${MAKE} buildkernel
 #	-DNO_KERNELCLEAN do not run ${MAKE} clean in ${MAKE} buildkernel
 #	-DNO_KERNELDEPEND do not run ${MAKE} depend in ${MAKE} buildkernel
 #	-DNO_KERNELOBJ do not run ${MAKE} obj in ${MAKE} buildkernel
 #	-DNO_PORTSUPDATE do not update ports in ${MAKE} update
 #	-DNO_ROOT install without using root privilege
 #	-DNO_DOCUPDATE do not update doc in ${MAKE} update
 #	-DWITHOUT_CTF do not run the DTrace CTF conversion tools on built objects
 #	LOCAL_DIRS="list of dirs" to add additional dirs to the SUBDIR list
 #	LOCAL_ITOOLS="list of tools" to add additional tools to the ITOOLS list
 #	LOCAL_LIB_DIRS="list of dirs" to add additional dirs to libraries target
 #	LOCAL_MTREE="list of mtree files" to process to allow local directories
 #	    to be created before files are installed
 #	LOCAL_TOOL_DIRS="list of dirs" to add additional dirs to the build-tools
 #	    list
 #	METALOG="path to metadata log" to write permission and ownership
 #	    when NO_ROOT is set.  (default: ${DESTDIR}/METALOG)
 #	TARGET="machine" to crossbuild world for a different machine type
 #	TARGET_ARCH= may be required when a TARGET supports multiple endians
 #	BUILDENV_SHELL= shell to launch for the buildenv target (def:${SHELL})
 #	WORLD_FLAGS= additional flags to pass to make(1) during buildworld
 #	KERNEL_FLAGS= additional flags to pass to make(1) during buildkernel
 #	SUBDIR_OVERRIDE="list of dirs" to build rather than everything.
 #	    All libraries and includes, and some build tools will still build.
 
 #
 # The intended user-driven targets are:
 # buildworld  - rebuild *everything*, including glue to help do upgrades
 # installworld- install everything built by "buildworld"
 # doxygen     - build API documentation of the kernel
 # update      - convenient way to update your source tree (eg: svn/svnup)
 #
 # Standard targets (not defined here) are documented in the makefiles in
 # /usr/share/mk.  These include:
 #		obj depend all install clean cleandepend cleanobj
 
 .if !defined(TARGET) || !defined(TARGET_ARCH)
 .error "Both TARGET and TARGET_ARCH must be defined."
 .endif
 
 LOCALBASE?=	/usr/local
 
 # Cross toolchain changes must be in effect before bsd.compiler.mk
 # so that gets the right CC, and pass CROSS_TOOLCHAIN to submakes.
 .if defined(CROSS_TOOLCHAIN)
 .include "${LOCALBASE}/share/toolchains/${CROSS_TOOLCHAIN}.mk"
 CROSSENV+=CROSS_TOOLCHAIN="${CROSS_TOOLCHAIN}"
 .endif
 .include <bsd.compiler.mk>		# don't depend on src.opts.mk doing it
 .include "share/mk/src.opts.mk"	
 
 # We must do lib/ and libexec/ before bin/ in case of a mid-install error to
 # keep the users system reasonably usable.  For static->dynamic root upgrades,
 # we don't want to install a dynamic binary without rtld and the needed
 # libraries.  More commonly, for dynamic root, we don't want to install a
 # binary that requires a newer library version that hasn't been installed yet.
 # This ordering is not a guarantee though.  The only guarantee of a working
 # system here would require fine-grained ordering of all components based
 # on their dependencies.
 SRCDIR?=	${.CURDIR}
 .if !empty(SUBDIR_OVERRIDE)
 SUBDIR=	${SUBDIR_OVERRIDE}
 .else
 SUBDIR=	lib libexec
 .if make(install*)
 # Ensure libraries are installed before progressing.
 SUBDIR+=.WAIT
 .endif
 SUBDIR+=bin
 .if ${MK_CDDL} != "no"
 SUBDIR+=cddl
 .endif
 SUBDIR+=gnu include
 .if ${MK_KERBEROS} != "no"
 SUBDIR+=kerberos5
 .endif
 .if ${MK_RESCUE} != "no"
 SUBDIR+=rescue
 .endif
 SUBDIR+=sbin
 .if ${MK_CRYPT} != "no"
 SUBDIR+=secure
 .endif
 .if !defined(NO_SHARE)
 SUBDIR+=share
 .endif
 SUBDIR+=sys usr.bin usr.sbin
 .if ${MK_TESTS} != "no"
 SUBDIR+=	tests
 .endif
 .if ${MK_OFED} != "no"
 SUBDIR+=contrib/ofed
 .endif
 
 # Local directories are last, since it is nice to at least get the base
 # system rebuilt before you do them.
 .for _DIR in ${LOCAL_DIRS}
 .if exists(${.CURDIR}/${_DIR}/Makefile)
 SUBDIR+=	${_DIR}
 .endif
 .endfor
 # Add LOCAL_LIB_DIRS, but only if they will not be picked up as a SUBDIR
 # of a LOCAL_DIRS directory.  This allows LOCAL_DIRS=foo and
 # LOCAL_LIB_DIRS=foo/lib to behave as expected.
 .for _DIR in ${LOCAL_DIRS:M*/} ${LOCAL_DIRS:N*/:S|$|/|}
 _REDUNDENT_LIB_DIRS+=    ${LOCAL_LIB_DIRS:M${_DIR}*}
 .endfor
 .for _DIR in ${LOCAL_LIB_DIRS}
 .if empty(_REDUNDENT_LIB_DIRS:M${_DIR}) && exists(${.CURDIR}/${_DIR}/Makefile)
 SUBDIR+=	${_DIR}
 .else
 .warning ${_DIR} not added to SUBDIR list.  See UPDATING 20141121.
 .endif
 .endfor
 
 # We must do etc/ last as it hooks into building the man whatis file
 # by calling 'makedb' in share/man.  This is only relevant for
 # install/distribute so they build the whatis file after every manpage is
 # installed.
 .if make(install*)
 SUBDIR+=.WAIT
 .endif
 SUBDIR+=etc
 
 .endif	# !empty(SUBDIR_OVERRIDE)
 
 .if defined(NOCLEAN)
 .warning NOCLEAN option is deprecated. Use NO_CLEAN instead.
 NO_CLEAN=	${NOCLEAN}
 .endif
 .if defined(NO_CLEANDIR)
 CLEANDIR=	clean cleandepend
 .else
 CLEANDIR=	cleandir
 .endif
 
 LOCAL_TOOL_DIRS?=
 PACKAGEDIR?=	${DESTDIR}/${DISTDIR}
 
 .if empty(SHELL:M*csh*)
 BUILDENV_SHELL?=${SHELL}
 .else
 BUILDENV_SHELL?=/bin/sh
 .endif
 
 SVN?=		/usr/local/bin/svn
 SVNFLAGS?=	-r HEAD
 
 MAKEOBJDIRPREFIX?=	/usr/obj
 .if !defined(OSRELDATE)
 .if exists(/usr/include/osreldate.h)
 OSRELDATE!=	awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \
 		/usr/include/osreldate.h
 .else
 OSRELDATE=	0
 .endif
 .export OSRELDATE
 .endif
 
 # Set VERSION for CTFMERGE to use via the default CTFFLAGS=-L VERSION.
 .if !defined(VERSION) && !make(showconfig)
 REVISION!=	${MAKE} -C ${SRCDIR}/release -V REVISION
 BRANCH!=	${MAKE} -C ${SRCDIR}/release -V BRANCH
 SRCRELDATE!=	awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \
 		${SRCDIR}/sys/sys/param.h
 VERSION=	FreeBSD ${REVISION}-${BRANCH:C/-p[0-9]+$//} ${TARGET_ARCH} ${SRCRELDATE}
 .export VERSION
 .endif
 
 .if !defined(PKG_VERSION)
 REVISION!=	${MAKE} -C ${SRCDIR}/release -V REVISION
 BRANCH!=	${MAKE} -C ${SRCDIR}/release -V BRANCH
 SRCRELDATE!=	awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \
 		${SRCDIR}/sys/sys/param.h
 .if ${BRANCH:MSTABLE*} || ${BRANCH:MCURRENT*}
 TIMENOW=	%Y%m%d%H%M%S
 EXTRA_REVISION=	.s${TIMENOW:gmtime}
 .endif
 .if ${BRANCH:M*-p*}
 EXTRA_REVISION=	_${BRANCH:C/.*-p([0-9]+$)/\1/}
 .endif
 PKG_VERSION=	${REVISION}${EXTRA_REVISION}
 .endif
 
 KNOWN_ARCHES?=	aarch64/arm64 \
 		amd64 \
 		arm \
 		armeb/arm \
 		armv6/arm \
 		armv6hf/arm \
 		i386 \
 		i386/pc98 \
 		mips \
 		mipsel/mips \
 		mips64el/mips \
 		mips64/mips \
 		mipsn32el/mips \
 		mipsn32/mips \
 		powerpc \
 		powerpc64/powerpc \
 		riscv64/riscv \
 		sparc64
 
 .if ${TARGET} == ${TARGET_ARCH}
 _t=		${TARGET}
 .else
 _t=		${TARGET_ARCH}/${TARGET}
 .endif
 .for _t in ${_t}
 .if empty(KNOWN_ARCHES:M${_t})
 .error Unknown target ${TARGET_ARCH}:${TARGET}.
 .endif
 .endfor
 
 .if ${TARGET} == ${MACHINE}
 TARGET_CPUTYPE?=${CPUTYPE}
 .else
 TARGET_CPUTYPE?=
 .endif
 
 .if !empty(TARGET_CPUTYPE)
 _TARGET_CPUTYPE=${TARGET_CPUTYPE}
 .else
 _TARGET_CPUTYPE=dummy
 .endif
 # Skip for showconfig as it is just wasted time and may invoke auto.obj.mk.
 .if !make(showconfig)
 _CPUTYPE!=	MAKEFLAGS= CPUTYPE=${_TARGET_CPUTYPE} ${MAKE} \
 		-f /dev/null -m ${.CURDIR}/share/mk -V CPUTYPE
 .if ${_CPUTYPE} != ${_TARGET_CPUTYPE}
 .error CPUTYPE global should be set with ?=.
 .endif
 .endif
 .if make(buildworld)
 BUILD_ARCH!=	uname -p
 .if ${MACHINE_ARCH} != ${BUILD_ARCH}
 .error To cross-build, set TARGET_ARCH.
 .endif
 .endif
 .if ${MACHINE} == ${TARGET} && ${MACHINE_ARCH} == ${TARGET_ARCH} && !defined(CROSS_BUILD_TESTING)
 OBJTREE=	${MAKEOBJDIRPREFIX}
 .else
 OBJTREE=	${MAKEOBJDIRPREFIX}/${TARGET}.${TARGET_ARCH}
 .endif
 WORLDTMP=	${OBJTREE}${.CURDIR}/tmp
 BPATH=		${WORLDTMP}/legacy/usr/sbin:${WORLDTMP}/legacy/usr/bin:${WORLDTMP}/legacy/bin
 XPATH=		${WORLDTMP}/usr/sbin:${WORLDTMP}/usr/bin
 STRICTTMPPATH=	${BPATH}:${XPATH}
 TMPPATH=	${STRICTTMPPATH}:${PATH}
 
 #
 # Avoid running mktemp(1) unless actually needed.
 # It may not be functional, e.g., due to new ABI
 # when in the middle of installing over this system.
 #
 .if make(distributeworld) || make(installworld) || make(stageworld)
 INSTALLTMP!=	/usr/bin/mktemp -d -u -t install
 .endif
 
 #
 # Building a world goes through the following stages
 #
 # 1. legacy stage [BMAKE]
 #	This stage is responsible for creating compatibility
 #	shims that are needed by the bootstrap-tools,
 #	build-tools and cross-tools stages. These are generally
 #	APIs that tools from one of those three stages need to
 #	build that aren't present on the host.
 # 1. bootstrap-tools stage [BMAKE]
 #	This stage is responsible for creating programs that
 #	are needed for backward compatibility reasons. They
 #	are not built as cross-tools.
 # 2. build-tools stage [TMAKE]
 #	This stage is responsible for creating the object
 #	tree and building any tools that are needed during
 #	the build process. Some programs are listed during
 #	this phase because they build binaries to generate
 #	files needed to build these programs. This stage also
 #	builds the 'build-tools' target rather than 'all'.
 # 3. cross-tools stage [XMAKE]
 #	This stage is responsible for creating any tools that
 #	are needed for building the system. A cross-compiler is one
 #	of them. This differs from build tools in two ways:
 #	1. the 'all' target is built rather than 'build-tools'
 #	2. these tools are installed into TMPPATH for stage 4.
 # 4. world stage [WMAKE]
 #	This stage actually builds the world.
 # 5. install stage (optional) [IMAKE]
 #	This stage installs a previously built world.
 #
 
 BOOTSTRAPPING?=	0
 
 # Common environment for world related stages
 CROSSENV+=	MAKEOBJDIRPREFIX=${OBJTREE} \
 		MACHINE_ARCH=${TARGET_ARCH} \
 		MACHINE=${TARGET} \
 		CPUTYPE=${TARGET_CPUTYPE}
 .if ${MK_GROFF} != "no"
 CROSSENV+=	GROFF_BIN_PATH=${WORLDTMP}/legacy/usr/bin \
 		GROFF_FONT_PATH=${WORLDTMP}/legacy/usr/share/groff_font \
 		GROFF_TMAC_PATH=${WORLDTMP}/legacy/usr/share/tmac
 .endif
 .if defined(TARGET_CFLAGS)
 CROSSENV+=	${TARGET_CFLAGS}
 .endif
 
 # bootstrap-tools stage
 BMAKEENV=	INSTALL="sh ${.CURDIR}/tools/install.sh" \
 		PATH=${BPATH}:${PATH} \
 		WORLDTMP=${WORLDTMP} \
 		MAKEFLAGS="-m ${.CURDIR}/tools/build/mk ${.MAKEFLAGS}"
 # need to keep this in sync with targets/pseudo/bootstrap-tools/Makefile
 BSARGS= 	DESTDIR= \
 		BOOTSTRAPPING=${OSRELDATE} \
 		SSP_CFLAGS= \
 		MK_HTML=no NO_LINT=yes MK_MAN=no \
 		-DNO_PIC MK_PROFILE=no -DNO_SHARED \
 		-DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \
 		MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \
 		MK_LLDB=no MK_TESTS=no \
 		MK_INCLUDES=yes
 
 BMAKE=		MAKEOBJDIRPREFIX=${WORLDTMP} \
 		${BMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \
 		${BSARGS}
 
 # build-tools stage
 TMAKE=		MAKEOBJDIRPREFIX=${OBJTREE} \
 		${BMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \
 		TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \
 		DESTDIR= \
 		BOOTSTRAPPING=${OSRELDATE} \
 		SSP_CFLAGS= \
 		-DNO_LINT \
 		-DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \
 		MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \
 		MK_LLDB=no MK_TESTS=no
 
 # cross-tools stage
 XMAKE=		TOOLS_PREFIX=${WORLDTMP} ${BMAKE} \
 		TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \
 		MK_GDB=no MK_TESTS=no
 
 # kernel-tools stage
 KTMAKEENV=	INSTALL="sh ${.CURDIR}/tools/install.sh" \
 		PATH=${BPATH}:${PATH} \
 		WORLDTMP=${WORLDTMP}
 KTMAKE=		TOOLS_PREFIX=${WORLDTMP} MAKEOBJDIRPREFIX=${WORLDTMP} \
 		${KTMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \
 		DESTDIR= \
 		BOOTSTRAPPING=${OSRELDATE} \
 		SSP_CFLAGS= \
 		MK_HTML=no -DNO_LINT MK_MAN=no \
 		-DNO_PIC MK_PROFILE=no -DNO_SHARED \
 		-DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no
 
 # world stage
 WMAKEENV=	${CROSSENV} \
 		_LDSCRIPTROOT= \
 		INSTALL="sh ${.CURDIR}/tools/install.sh" \
 		PATH=${TMPPATH}
 
 # make hierarchy
 HMAKE=		PATH=${TMPPATH} ${MAKE} LOCAL_MTREE=${LOCAL_MTREE:Q}
 .if defined(NO_ROOT)
 HMAKE+=		PATH=${TMPPATH} METALOG=${METALOG} -DNO_ROOT
 .endif
 
 .if defined(CROSS_TOOLCHAIN_PREFIX)
 CROSS_COMPILER_PREFIX?=${CROSS_TOOLCHAIN_PREFIX}
 CROSS_BINUTILS_PREFIX?=${CROSS_TOOLCHAIN_PREFIX}
 .endif
 
 # If we do not have a bootstrap binutils (because the in-tree one does not
 # support the target architecture), provide a default cross-binutils prefix.
 # This allows aarch64 builds, for example, to automatically use the
 # aarch64-binutils port or package.
 .if !make(showconfig)
 .if !empty(BROKEN_OPTIONS:MBINUTILS_BOOTSTRAP) && \
     !defined(CROSS_BINUTILS_PREFIX)
 CROSS_BINUTILS_PREFIX=/usr/local/${TARGET_ARCH}-freebsd/bin/
 .if !exists(${CROSS_BINUTILS_PREFIX})
 .error In-tree binutils does not support the ${TARGET_ARCH} architecture. Install the ${TARGET_ARCH}-binutils port or package or set CROSS_BINUTILS_PREFIX.
 .endif
 .endif
 .endif
 
 XCOMPILERS=	CC CXX CPP
 .for COMPILER in ${XCOMPILERS}
 .if defined(CROSS_COMPILER_PREFIX)
 X${COMPILER}?=	${CROSS_COMPILER_PREFIX}${${COMPILER}}
 .else
 X${COMPILER}?=	${${COMPILER}}
 .endif
 .endfor
 XBINUTILS=	AS AR LD NM OBJCOPY OBJDUMP RANLIB SIZE STRINGS
 .for BINUTIL in ${XBINUTILS}
 .if defined(CROSS_BINUTILS_PREFIX) && \
     exists(${CROSS_BINUTILS_PREFIX}${${BINUTIL}})
 X${BINUTIL}?=	${CROSS_BINUTILS_PREFIX}${${BINUTIL}}
 .else
 X${BINUTIL}?=	${${BINUTIL}}
 .endif
 .endfor
 CROSSENV+=	CC="${XCC} ${XCFLAGS}" CXX="${XCXX} ${XCFLAGS} ${XCXXFLAGS}" \
 		DEPFLAGS="${DEPFLAGS}" \
 		CPP="${XCPP} ${XCFLAGS}" \
 		AS="${XAS}" AR="${XAR}" LD="${XLD}" NM=${XNM} \
 		OBJDUMP=${XOBJDUMP} OBJCOPY="${XOBJCOPY}" \
 		RANLIB=${XRANLIB} STRINGS=${XSTRINGS} \
 		SIZE="${XSIZE}"
 
 .if ${XCC:N${CCACHE_BIN}:M/*}
 .if defined(CROSS_BINUTILS_PREFIX)
 # In the case of xdev-build tools, CROSS_BINUTILS_PREFIX won't be a
 # directory, but the compiler will look in the right place for it's
 # tools so we don't need to tell it where to look.
 .if exists(${CROSS_BINUTILS_PREFIX})
 BFLAGS+=	-B${CROSS_BINUTILS_PREFIX}
 .endif
 .else
 BFLAGS+=	-B${WORLDTMP}/usr/bin
 .endif
 .if ${TARGET} == "arm"
 .if ${TARGET_ARCH:M*hf*} != ""
 TARGET_ABI=	gnueabihf
 .else
 TARGET_ABI=	gnueabi
 .endif
 .endif
 .if defined(X_COMPILER_TYPE) && ${X_COMPILER_TYPE} == gcc
 XCFLAGS+=	-isystem ${WORLDTMP}/usr/include -L${WORLDTMP}/usr/lib
 XCXXFLAGS+=	-I${WORLDTMP}/usr/include/c++/v1 -std=gnu++11 -L${WORLDTMP}/../lib/libc++
 # XXX: DEPFLAGS is a workaround for not properly passing CXXFLAGS to sub-makes
 # due to CXX="${XCXX} ${XCXXFLAGS}".  bsd.dep.mk does use CXXFLAGS when
 # building C++ files so this can come out if passing CXXFLAGS down is fixed.
 DEPFLAGS+=	-I${WORLDTMP}/usr/include/c++/v1
 .else
 TARGET_ABI?=	unknown
 TARGET_TRIPLE?=	${TARGET_ARCH:C/amd64/x86_64/}-${TARGET_ABI}-freebsd11.0
 XCFLAGS+=	-target ${TARGET_TRIPLE}
 .endif
 XCFLAGS+=	--sysroot=${WORLDTMP} ${BFLAGS}
 XCXXFLAGS+=	--sysroot=${WORLDTMP} ${BFLAGS}
 .else
 .if defined(CROSS_BINUTILS_PREFIX) && exists(${CROSS_BINUTILS_PREFIX})
 BFLAGS+=	-B${CROSS_BINUTILS_PREFIX}
 XCFLAGS+=	${BFLAGS}
 XCXXFLAGS+=	${BFLAGS}
 .endif
 .endif # ${XCC:M/*}
 
 WMAKE=		${WMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 DESTDIR=${WORLDTMP}
 
 .if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "powerpc64"
 # 32 bit world
 LIB32_OBJTREE=	${OBJTREE}${.CURDIR}/world32
 LIB32TMP=	${OBJTREE}${.CURDIR}/lib32
 
 .if ${TARGET_ARCH} == "amd64"
 .if empty(TARGET_CPUTYPE)
 LIB32CPUFLAGS=	-march=i686 -mmmx -msse -msse2
 .else
 LIB32CPUFLAGS=	-march=${TARGET_CPUTYPE}
 .endif
 LIB32WMAKEENV=	MACHINE=i386 MACHINE_ARCH=i386 \
 		MACHINE_CPU="i686 mmx sse sse2"
 LIB32WMAKEFLAGS=	\
 		AS="${XAS} --32" \
 		LD="${XLD} -m elf_i386_fbsd -Y P,${LIB32TMP}/usr/lib32" \
 		OBJCOPY="${XOBJCOPY}"
 
 .elif ${TARGET_ARCH} == "powerpc64"
 .if empty(TARGET_CPUTYPE)
 LIB32CPUFLAGS=	-mcpu=powerpc
 .else
 LIB32CPUFLAGS=	-mcpu=${TARGET_CPUTYPE}
 .endif
 LIB32WMAKEENV=	MACHINE=powerpc MACHINE_ARCH=powerpc
 LIB32WMAKEFLAGS=	\
 		LD="${XLD} -m elf32ppc_fbsd" \
 		OBJCOPY="${XOBJCOPY}"
 .endif
 
 
 LIB32FLAGS=	-m32 ${LIB32CPUFLAGS} -DCOMPAT_32BIT \
 		-isystem ${LIB32TMP}/usr/include/ \
 		-L${LIB32TMP}/usr/lib32 \
 		-B${LIB32TMP}/usr/lib32
 .if ${XCC:N${CCACHE_BIN}:M/*}
 LIB32FLAGS+=		--sysroot=${WORLDTMP}
 .endif
 
 # Yes, the flags are redundant.
 LIB32WMAKEENV+=	MAKEOBJDIRPREFIX=${LIB32_OBJTREE} \
 		_LDSCRIPTROOT=${LIB32TMP} \
 		INSTALL="sh ${.CURDIR}/tools/install.sh" \
 		PATH=${TMPPATH} \
 		LIBDIR=/usr/lib32 \
 		SHLIBDIR=/usr/lib32 \
 		DTRACE="${DTRACE} -32"
 LIB32WMAKEFLAGS+= CC="${XCC} ${LIB32FLAGS}" \
 		CXX="${XCXX} ${LIB32FLAGS}" \
 		DESTDIR=${LIB32TMP} \
 		-DCOMPAT_32BIT \
 		-DLIBRARIES_ONLY \
 		-DNO_CPU_CFLAGS \
 		MK_CTF=no \
 		-DNO_LINT \
 		MK_TESTS=no
 
 LIB32WMAKE=	${LIB32WMAKEENV} ${MAKE} ${LIB32WMAKEFLAGS} \
 		MK_MAN=no MK_HTML=no
 LIB32IMAKE=	${LIB32WMAKE:NINSTALL=*:NDESTDIR=*:N_LDSCRIPTROOT=*} \
 		MK_TOOLCHAIN=no ${IMAKE_INSTALL}
 .endif
 
 IMAKEENV=	${CROSSENV:N_LDSCRIPTROOT=*}
 IMAKE=		${IMAKEENV} ${MAKE} -f Makefile.inc1 \
 		${IMAKE_INSTALL} ${IMAKE_MTREE}
 .if empty(.MAKEFLAGS:M-n)
 IMAKEENV+=	PATH=${STRICTTMPPATH}:${INSTALLTMP} \
 		LD_LIBRARY_PATH=${INSTALLTMP} \
 		PATH_LOCALE=${INSTALLTMP}/locale
 IMAKE+=		__MAKE_SHELL=${INSTALLTMP}/sh
 .else
 IMAKEENV+=	PATH=${TMPPATH}:${INSTALLTMP}
 .endif
 .if defined(DB_FROM_SRC)
 INSTALLFLAGS+=	-N ${.CURDIR}/etc
 MTREEFLAGS+=	-N ${.CURDIR}/etc
 .endif
 _INSTALL_DDIR=	${DESTDIR}/${DISTDIR}
 INSTALL_DDIR=	${_INSTALL_DDIR:S://:/:g:C:/$::}
 .if defined(NO_ROOT)
 METALOG?=	${DESTDIR}/${DISTDIR}/METALOG
 IMAKE+=		-DNO_ROOT METALOG=${METALOG}
 INSTALLFLAGS+=	-U -M ${METALOG} -D ${INSTALL_DDIR}
 MTREEFLAGS+=	-W
 .endif
 .if defined(BUILD_PKGS)
 INSTALLFLAGS+=	-h sha256
 .endif
 .if defined(DB_FROM_SRC) || defined(NO_ROOT)
 IMAKE_INSTALL=	INSTALL="install ${INSTALLFLAGS}"
 IMAKE_MTREE=	MTREE_CMD="mtree ${MTREEFLAGS}"
 .endif
 
 # kernel stage
 KMAKEENV=	${WMAKEENV}
 KMAKE=		${KMAKEENV} ${MAKE} ${.MAKEFLAGS} ${KERNEL_FLAGS} KERNEL=${INSTKERNNAME}
 
 #
 # buildworld
 #
 # Attempt to rebuild the entire system, with reasonable chance of
 # success, regardless of how old your existing system is.
 #
 _worldtmp: .PHONY
 .if ${.CURDIR:C/[^,]//g} != ""
 #	The m4 build of sendmail files doesn't like it if ',' is used
 #	anywhere in the path of it's files.
 	@echo
 	@echo "*** Error: path to source tree contains a comma ','"
 	@echo
 	false
 .endif
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Rebuilding the temporary build tree"
 	@echo "--------------------------------------------------------------"
 .if !defined(NO_CLEAN)
 	rm -rf ${WORLDTMP}
 .if defined(LIB32TMP)
 	rm -rf ${LIB32TMP}
 .endif
 .else
 	rm -rf ${WORLDTMP}/legacy/usr/include
 #	XXX - These three can depend on any header file.
 	rm -f ${OBJTREE}${.CURDIR}/usr.bin/kdump/ioctl.c
 	rm -f ${OBJTREE}${.CURDIR}/usr.bin/kdump/kdump_subr.c
 	rm -f ${OBJTREE}${.CURDIR}/usr.bin/truss/ioctl.c
 .endif
 .for _dir in \
     lib usr legacy/bin legacy/usr
 	mkdir -p ${WORLDTMP}/${_dir}
 .endfor
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${WORLDTMP}/legacy/usr >/dev/null
 .if ${MK_GROFF} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.groff.dist \
 	    -p ${WORLDTMP}/legacy/usr >/dev/null
 .endif
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${WORLDTMP}/usr >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \
 	    -p ${WORLDTMP}/usr/include >/dev/null
 	ln -sf ${.CURDIR}/sys ${WORLDTMP}
 .if ${MK_DEBUG_FILES} != "no"
 	# We could instead disable debug files for these build stages
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \
 	    -p ${WORLDTMP}/legacy/usr/lib >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \
 	    -p ${WORLDTMP}/usr/lib >/dev/null
 .endif
 .if ${MK_LIB32} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \
 	    -p ${WORLDTMP}/usr >/dev/null
 .if ${MK_DEBUG_FILES} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \
 	    -p ${WORLDTMP}/legacy/usr/lib/debug/usr >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \
 	    -p ${WORLDTMP}/usr/lib/debug/usr >/dev/null
 .endif
 .endif
 .if ${MK_TESTS} != "no"
 	mkdir -p ${WORLDTMP}${TESTSBASE}
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \
 	    -p ${WORLDTMP}${TESTSBASE} >/dev/null
 .if ${MK_DEBUG_FILES} != "no"
 	mkdir -p ${WORLDTMP}/usr/lib/debug/${TESTSBASE}
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \
 	    -p ${WORLDTMP}/usr/lib/debug/${TESTSBASE} >/dev/null
 .endif
 .endif
 .for _mtree in ${LOCAL_MTREE}
 	mtree -deU -f ${.CURDIR}/${_mtree} -p ${WORLDTMP} > /dev/null
 .endfor
 _legacy:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 1.1: legacy release compatibility shims"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${BMAKE} legacy
 _bootstrap-tools:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 1.2: bootstrap tools"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${BMAKE} bootstrap-tools
 _cleanobj:
 .if !defined(NO_CLEAN)
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.1: cleaning up the object tree"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${WMAKE} ${CLEANDIR}
 .if defined(LIB32TMP)
 	${_+_}cd ${.CURDIR}; ${LIB32WMAKE} -f Makefile.inc1 ${CLEANDIR}
 .endif
 .endif
 _obj:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.2: rebuilding the object tree"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${WMAKE} obj
 _build-tools:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.3: build tools"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${TMAKE} build-tools
 _cross-tools:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 3: cross tools"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${XMAKE} cross-tools
 	${_+_}cd ${.CURDIR}; ${XMAKE} kernel-tools
 _includes:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 4.1: building includes"
 	@echo "--------------------------------------------------------------"
 # Special handling for SUBDIR_OVERRIDE in buildworld as they most likely need
 # headers from default SUBDIR.  Do SUBDIR_OVERRIDE includes last.
 	${_+_}cd ${.CURDIR}; ${WMAKE} SUBDIR_OVERRIDE= SHARED=symlinks \
 	    includes
 .if !empty(SUBDIR_OVERRIDE) && make(buildworld)
 	${_+_}cd ${.CURDIR}; ${WMAKE} SHARED=symlinks includes
 .endif
 _libraries:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 4.2: building libraries"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; \
 	    ${WMAKE} -DNO_FSCHG MK_HTML=no -DNO_LINT MK_MAN=no \
 	    MK_PROFILE=no MK_TESTS=no MK_TESTS_SUPPORT=${MK_TESTS} libraries
 _depend:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 4.3: make dependencies"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${WMAKE} depend
 everything:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 4.4: building everything"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; _PARALLEL_SUBDIR_OK=1 ${WMAKE} all
 .if defined(LIB32TMP)
 build32: .PHONY
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 5.1: building 32 bit shim libraries"
 	@echo "--------------------------------------------------------------"
 	mkdir -p ${LIB32TMP}/usr/include
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${LIB32TMP}/usr >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \
 	    -p ${LIB32TMP}/usr/include >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \
 	    -p ${LIB32TMP}/usr >/dev/null
 .if ${MK_DEBUG_FILES} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \
 	    -p ${LIB32TMP}/usr/lib >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \
 	    -p ${LIB32TMP}/usr/lib/debug/usr >/dev/null
 .endif
 	mkdir -p ${WORLDTMP}
 	ln -sf ${.CURDIR}/sys ${WORLDTMP}
 .for _t in obj includes
 	${_+_}cd ${.CURDIR}/include; ${LIB32WMAKE} DIRPRFX=include/ ${_t}
 	${_+_}cd ${.CURDIR}/lib; ${LIB32WMAKE} DIRPRFX=lib/ ${_t}
 .if ${MK_CDDL} != "no"
 	${_+_}cd ${.CURDIR}/cddl/lib; ${LIB32WMAKE} DIRPRFX=cddl/lib/ ${_t}
 .endif
 	${_+_}cd ${.CURDIR}/gnu/lib; ${LIB32WMAKE} DIRPRFX=gnu/lib/ ${_t}
 .if ${MK_CRYPT} != "no"
 	${_+_}cd ${.CURDIR}/secure/lib; ${LIB32WMAKE} DIRPRFX=secure/lib/ ${_t}
 .endif
 .if ${MK_KERBEROS} != "no"
 	${_+_}cd ${.CURDIR}/kerberos5/lib; ${LIB32WMAKE} DIRPRFX=kerberos5/lib ${_t}
 .endif
 .endfor
 .for _dir in usr.bin/lex/lib
 	${_+_}cd ${.CURDIR}/${_dir}; ${LIB32WMAKE} DIRPRFX=${_dir}/ obj
 .endfor
 .for _dir in lib/ncurses/ncurses lib/ncurses/ncursesw lib/libmagic
 	${_+_}cd ${.CURDIR}/${_dir}; \
 	    WORLDTMP=${WORLDTMP} \
 	    MAKEFLAGS="-m ${.CURDIR}/tools/build/mk ${.MAKEFLAGS}" \
 	    MAKEOBJDIRPREFIX=${LIB32_OBJTREE} ${MAKE} SSP_CFLAGS= DESTDIR= \
 	    DIRPRFX=${_dir}/ -DNO_LINT -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \
 	    build-tools
 .endfor
 	${_+_}cd ${.CURDIR}; \
 	    ${LIB32WMAKE} -f Makefile.inc1 -DNO_FSCHG libraries
 .for _t in obj depend all
 	${_+_}cd ${.CURDIR}/libexec/rtld-elf; PROG=ld-elf32.so.1 ${LIB32WMAKE} \
 	    -DNO_FSCHG DIRPRFX=libexec/rtld-elf/ ${_t}
 	${_+_}cd ${.CURDIR}/usr.bin/ldd; PROG=ldd32 ${LIB32WMAKE} \
 	    DIRPRFX=usr.bin/ldd ${_t}
 .endfor
 
 distribute32 install32: .MAKE .PHONY
 	${_+_}cd ${.CURDIR}/lib; ${LIB32IMAKE} ${.TARGET:S/32$//}
 .if ${MK_CDDL} != "no"
 	${_+_}cd ${.CURDIR}/cddl/lib; ${LIB32IMAKE} ${.TARGET:S/32$//}
 .endif
 	${_+_}cd ${.CURDIR}/gnu/lib; ${LIB32IMAKE} ${.TARGET:S/32$//}
 .if ${MK_CRYPT} != "no"
 	${_+_}cd ${.CURDIR}/secure/lib; ${LIB32IMAKE} ${.TARGET:S/32$//}
 .endif
 .if ${MK_KERBEROS} != "no"
 	${_+_}cd ${.CURDIR}/kerberos5/lib; ${LIB32IMAKE} ${.TARGET:S/32$//}
 .endif
 	${_+_}cd ${.CURDIR}/libexec/rtld-elf; \
 	    PROG=ld-elf32.so.1 ${LIB32IMAKE} ${.TARGET:S/32$//}
 	${_+_}cd ${.CURDIR}/usr.bin/ldd; PROG=ldd32 ${LIB32IMAKE} \
 	    ${.TARGET:S/32$//}
 .endif
 
 WMAKE_TGTS=
 WMAKE_TGTS+=	_worldtmp _legacy
 .if empty(SUBDIR_OVERRIDE)
 WMAKE_TGTS+=	_bootstrap-tools
 .endif
 WMAKE_TGTS+=	_cleanobj _obj _build-tools _cross-tools
 WMAKE_TGTS+=	_includes _libraries _depend everything
 .if defined(LIB32TMP) && ${MK_LIB32} != "no" && empty(SUBDIR_OVERRIDE)
 WMAKE_TGTS+=	build32
 .endif
 
 buildworld: buildworld_prologue ${WMAKE_TGTS} buildworld_epilogue
 .ORDER: buildworld_prologue ${WMAKE_TGTS} buildworld_epilogue
 
 buildworld_prologue:
 	@echo "--------------------------------------------------------------"
 	@echo ">>> World build started on `LC_ALL=C date`"
 	@echo "--------------------------------------------------------------"
 
 buildworld_epilogue:
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> World build completed on `LC_ALL=C date`"
 	@echo "--------------------------------------------------------------"
 
 #
 # We need to have this as a target because the indirection between Makefile
 # and Makefile.inc1 causes the correct PATH to be used, rather than a
 # modification of the current environment's PATH.  In addition, we need
 # to quote multiword values.
 #
 buildenvvars: .PHONY
 	@echo ${WMAKEENV:Q} ${.MAKE.EXPORTED:@v@$v=\"${$v}\"@}
 
 .if ${.TARGETS:Mbuildenv}
 .if ${.MAKEFLAGS:M-j}
 .error The buildenv target is incompatible with -j
 .endif
 .endif
 BUILDENV_DIR?=	${.CURDIR}
 buildenv: .PHONY
 	@echo Entering world for ${TARGET_ARCH}:${TARGET}
 .if ${BUILDENV_SHELL:M*zsh*}
 	@echo For ZSH you must run: export CPUTYPE=${TARGET_CPUTYPE}
 .endif
 	@cd ${BUILDENV_DIR} && env ${WMAKEENV} BUILDENV=1 ${BUILDENV_SHELL} \
 	    || true
 
 TOOLCHAIN_TGTS=	${WMAKE_TGTS:N_depend:Neverything:Nbuild32}
 toolchain: ${TOOLCHAIN_TGTS}
 kernel-toolchain: ${TOOLCHAIN_TGTS:N_includes:N_libraries}
 
 #
 # installcheck
 #
 # Checks to be sure system is ready for installworld/installkernel.
 #
 installcheck: _installcheck_world _installcheck_kernel
 _installcheck_world:
 _installcheck_kernel:
 
 #
 # Require DESTDIR to be set if installing for a different architecture or
 # using the user/group database in the source tree.
 #
 .if ${TARGET_ARCH} != ${MACHINE_ARCH} || ${TARGET} != ${MACHINE} || \
     defined(DB_FROM_SRC)
 .if !make(distributeworld)
 _installcheck_world: __installcheck_DESTDIR
 _installcheck_kernel: __installcheck_DESTDIR
 __installcheck_DESTDIR:
 .if !defined(DESTDIR) || empty(DESTDIR)
 	@echo "ERROR: Please set DESTDIR!"; \
 	false
 .endif
 .endif
 .endif
 
 .if !defined(DB_FROM_SRC)
 #
 # Check for missing UIDs/GIDs.
 #
 CHECK_UIDS=	auditdistd
 CHECK_GIDS=	audit
 .if ${MK_SENDMAIL} != "no"
 CHECK_UIDS+=	smmsp
 CHECK_GIDS+=	smmsp
 .endif
 .if ${MK_PF} != "no"
 CHECK_UIDS+=	proxy
 CHECK_GIDS+=	proxy authpf
 .endif
 .if ${MK_UNBOUND} != "no"
 CHECK_UIDS+=	unbound
 CHECK_GIDS+=	unbound
 .endif
 _installcheck_world: __installcheck_UGID
 __installcheck_UGID:
 .for uid in ${CHECK_UIDS}
 	@if ! `id -u ${uid} >/dev/null 2>&1`; then \
 		echo "ERROR: Required ${uid} user is missing, see /usr/src/UPDATING."; \
 		false; \
 	fi
 .endfor
 .for gid in ${CHECK_GIDS}
 	@if ! `find / -prune -group ${gid} >/dev/null 2>&1`; then \
 		echo "ERROR: Required ${gid} group is missing, see /usr/src/UPDATING."; \
 		false; \
 	fi
 .endfor
 .endif
 
 #
 # Required install tools to be saved in a scratch dir for safety.
 #
 .if ${MK_ZONEINFO} != "no"
 _zoneinfo=	zic tzsetup
 .endif
 
 ITOOLS=	[ awk cap_mkdb cat chflags chmod chown cmp cp \
 	date echo egrep find grep id install ${_install-info} \
 	ln make mkdir mtree mv pwd_mkdb \
 	rm sed services_mkdb sh strip sysctl test true uname wc ${_zoneinfo} \
 	${LOCAL_ITOOLS}
 
 # Needed for share/man
 .if ${MK_MAN} != "no"
 ITOOLS+=makewhatis
 .endif
 
 #
 # distributeworld
 #
 # Distributes everything compiled by a `buildworld'.
 #
 # installworld
 #
 # Installs everything compiled by a 'buildworld'.
 #
 
 # Non-base distributions produced by the base system
 EXTRA_DISTRIBUTIONS=	doc
 .if defined(LIB32TMP) && ${MK_LIB32} != "no"
 EXTRA_DISTRIBUTIONS+=	lib32
 .endif
 .if ${MK_TESTS} != "no"
 EXTRA_DISTRIBUTIONS+=	tests
 .endif
 
 DEBUG_DISTRIBUTIONS=
 .if ${MK_DEBUG_FILES} != "no"
 DEBUG_DISTRIBUTIONS+=	base ${EXTRA_DISTRIBUTIONS:S,doc,,:S,tests,,}
 .endif
 
 MTREE_MAGIC?=	mtree 2.0
 
 distributeworld installworld stageworld: _installcheck_world
 	mkdir -p ${INSTALLTMP}
 	progs=$$(for prog in ${ITOOLS}; do \
 		if progpath=`which $$prog`; then \
 			echo $$progpath; \
 		else \
 			echo "Required tool $$prog not found in PATH." >&2; \
 			exit 1; \
 		fi; \
 	    done); \
 	libs=$$(ldd -f "%o %p\n" -f "%o %p\n" $$progs 2>/dev/null | sort -u | \
 	    while read line; do \
 		set -- $$line; \
 		if [ "$$2 $$3" != "not found" ]; then \
 			echo $$2; \
 		else \
 			echo "Required library $$1 not found." >&2; \
 			exit 1; \
 		fi; \
 	    done); \
 	cp $$libs $$progs ${INSTALLTMP}
 	cp -R $${PATH_LOCALE:-"/usr/share/locale"} ${INSTALLTMP}/locale
 .if defined(NO_ROOT)
 	echo "#${MTREE_MAGIC}" > ${METALOG}
 .endif
 .if make(distributeworld)
 .for dist in ${EXTRA_DISTRIBUTIONS}
 	-mkdir ${DESTDIR}/${DISTDIR}/${dist}
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.root.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist} >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}/usr >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}/usr/include >/dev/null
 .if ${MK_DEBUG_FILES} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib >/dev/null
 .endif
 .if ${MK_LIB32} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}/usr >/dev/null
 .if ${MK_DEBUG_FILES} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib/debug/usr >/dev/null
 .endif
 .endif
 .if ${MK_TESTS} != "no" && ${dist} == "tests"
 	-mkdir -p ${DESTDIR}/${DISTDIR}/${dist}${TESTSBASE}
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}${TESTSBASE} >/dev/null
 .if ${MK_DEBUG_FILES} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \
 	    -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib/debug/${TESTSBASE} >/dev/null
 .endif
 .endif
 .if defined(NO_ROOT)
 	${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.root.dist | \
 	    sed -e 's#^\./#./${dist}/#' >> ${METALOG}
 	${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.usr.dist | \
 	    sed -e 's#^\./#./${dist}/usr/#' >> ${METALOG}
 	${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.include.dist | \
 	    sed -e 's#^\./#./${dist}/usr/include/#' >> ${METALOG}
 .if ${MK_LIB32} != "no"
 	${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.lib32.dist | \
 	    sed -e 's#^\./#./${dist}/usr/#' >> ${METALOG}
 .endif
 .endif
 .endfor
 	-mkdir ${DESTDIR}/${DISTDIR}/base
 	${_+_}cd ${.CURDIR}/etc; ${CROSSENV} PATH=${TMPPATH} ${MAKE} \
 	    METALOG=${METALOG} ${IMAKE_INSTALL} ${IMAKE_MTREE} \
 	    DISTBASE=/base DESTDIR=${DESTDIR}/${DISTDIR}/base \
 	    LOCAL_MTREE=${LOCAL_MTREE:Q} distrib-dirs
 .endif
 	${_+_}cd ${.CURDIR}; ${IMAKE} re${.TARGET:S/world$//}; \
 	    ${IMAKEENV} rm -rf ${INSTALLTMP}
 .if make(distributeworld)
 .for dist in ${EXTRA_DISTRIBUTIONS}
 	find ${DESTDIR}/${DISTDIR}/${dist} -mindepth 1 -empty -delete
 .endfor
 .if defined(NO_ROOT)
 .for dist in base ${EXTRA_DISTRIBUTIONS}
 	@# For each file that exists in this dist, print the corresponding
 	@# line from the METALOG.  This relies on the fact that
 	@# a line containing only the filename will sort immediatly before
 	@# the relevant mtree line.
 	cd ${DESTDIR}/${DISTDIR}; \
 	find ./${dist} | sort -u ${METALOG} - | \
 	awk 'BEGIN { print "#${MTREE_MAGIC}" } !/ type=/ { file = $$1 } / type=/ { if ($$1 == file) { sub(/^\.\/${dist}\//, "./"); print } }' > \
 	${DESTDIR}/${DISTDIR}/${dist}.meta
 .endfor
 .for dist in ${DEBUG_DISTRIBUTIONS}
 	@# For each file that exists in this dist, print the corresponding
 	@# line from the METALOG.  This relies on the fact that
 	@# a line containing only the filename will sort immediatly before
 	@# the relevant mtree line.
 	cd ${DESTDIR}/${DISTDIR}; \
 	find ./${dist}/usr/lib/debug | sort -u ${METALOG} - | \
 	awk 'BEGIN { print "#${MTREE_MAGIC}" } !/ type=/ { file = $$1 } / type=/ { if ($$1 == file) { sub(/^\.\/${dist}\//, "./"); print } }' > \
 	${DESTDIR}/${DISTDIR}/${dist}.debug.meta
 .endfor
 .endif
 .endif
 
 packageworld:
 .for dist in base ${EXTRA_DISTRIBUTIONS}
 .if defined(NO_ROOT)
 	${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \
 	    tar cvf - --exclude usr/lib/debug \
 	    @${DESTDIR}/${DISTDIR}/${dist}.meta | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/${dist}.txz
 .else
 	${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \
 	    tar cvf - --exclude usr/lib/debug . | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/${dist}.txz
 .endif
 .endfor
 
 .for dist in ${DEBUG_DISTRIBUTIONS}
 . if defined(NO_ROOT)
 	${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \
 	    tar cvf - @${DESTDIR}/${DISTDIR}/${dist}.debug.meta | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/${dist}-dbg.txz
 . else
 	${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \
 	    tar cvLf - usr/lib/debug | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/${dist}-dbg.txz
 . endif
 .endfor
 
 #
 # reinstall
 #
 # If you have a build server, you can NFS mount the source and obj directories
 # and do a 'make reinstall' on the *client* to install new binaries from the
 # most recent server build.
 #
 reinstall: .MAKE .PHONY
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Making hierarchy"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 \
 	    LOCAL_MTREE=${LOCAL_MTREE:Q} hierarchy
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Installing everything"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 install
 .if defined(LIB32TMP) && ${MK_LIB32} != "no"
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 install32
 .endif
 
 restage: .MAKE .PHONY
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Making hierarchy"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 \
 	    LOCAL_MTREE=${LOCAL_MTREE:Q} hierarchy distribution
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Installing everything"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 install
 .if defined(LIB32TMP) && ${MK_LIB32} != "no"
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 install32
 .endif
 
 redistribute: .MAKE .PHONY
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Distributing everything"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 distribute
 .if defined(LIB32TMP) && ${MK_LIB32} != "no"
 	${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 distribute32 \
 	    DISTRIBUTION=lib32
 .endif
 
 distrib-dirs: .MAKE .PHONY
 	${_+_}cd ${.CURDIR}/etc; ${CROSSENV} PATH=${TMPPATH} ${MAKE} \
 	    ${IMAKE_INSTALL} ${IMAKE_MTREE} METALOG=${METALOG} ${.TARGET}
 
 distribution: .MAKE .PHONY
 	${_+_}cd ${.CURDIR}/etc; ${CROSSENV} PATH=${TMPPATH} ${MAKE} \
 	    ${IMAKE_INSTALL} ${IMAKE_MTREE} METALOG=${METALOG} ${.TARGET}
 	${_+_}cd ${.CURDIR}; ${CROSSENV} PATH=${TMPPATH} \
 		${MAKE} -f Makefile.inc1 ${IMAKE_INSTALL} \
 		METALOG=${METALOG} installconfig
 
 #
 # buildkernel and installkernel
 #
 # Which kernels to build and/or install is specified by setting
 # KERNCONF. If not defined a GENERIC kernel is built/installed.
 # Only the existing (depending TARGET) config files are used
 # for building kernels and only the first of these is designated
 # as the one being installed.
 #
 # Note that we have to use TARGET instead of TARGET_ARCH when
 # we're in kernel-land. Since only TARGET_ARCH is (expected) to
 # be set to cross-build, we have to make sure TARGET is set
 # properly.
 
 .if defined(KERNFAST)
 NO_KERNELCLEAN=	t
 NO_KERNELCONFIG=	t
 NO_KERNELDEPEND=	t
 NO_KERNELOBJ=		t
 # Shortcut for KERNCONF=Blah -DKERNFAST is now KERNFAST=Blah
 .if !defined(KERNCONF) && ${KERNFAST} != "1"
 KERNCONF=${KERNFAST}
 .endif
 .endif
 .if ${TARGET_ARCH} == "powerpc64"
 KERNCONF?=	GENERIC64
 .else
 KERNCONF?=	GENERIC
 .endif
 INSTKERNNAME?=	kernel
 
 KERNSRCDIR?=	${.CURDIR}/sys
 KRNLCONFDIR=	${KERNSRCDIR}/${TARGET}/conf
 KRNLOBJDIR=	${OBJTREE}${KERNSRCDIR}
 KERNCONFDIR?=	${KRNLCONFDIR}
 
 BUILDKERNELS=
 INSTALLKERNEL=
 .if defined(NO_INSTALLKERNEL)
 # All of the BUILDKERNELS loops start at index 1.
 BUILDKERNELS+= dummy
 .endif
 .for _kernel in ${KERNCONF}
 .if exists(${KERNCONFDIR}/${_kernel})
 BUILDKERNELS+=	${_kernel}
 .if empty(INSTALLKERNEL) && !defined(NO_INSTALLKERNEL)
 INSTALLKERNEL= ${_kernel}
 .endif
 .endif
 .endfor
 
 ${WMAKE_TGTS:N_worldtmp:Nbuild32} ${.ALLTARGETS:M_*:N_worldtmp}: .MAKE .PHONY
 
 #
 # buildkernel
 #
 # Builds all kernels defined by BUILDKERNELS.
 #
 buildkernel: .MAKE .PHONY
 .if empty(BUILDKERNELS:Ndummy)
 	@echo "ERROR: Missing kernel configuration file(s) (${KERNCONF})."; \
 	false
 .endif
 	@echo
 .for _kernel in ${BUILDKERNELS:Ndummy}
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Kernel build for ${_kernel} started on `LC_ALL=C date`"
 	@echo "--------------------------------------------------------------"
 	@echo "===> ${_kernel}"
 	mkdir -p ${KRNLOBJDIR}
 .if !defined(NO_KERNELCONFIG)
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 1: configuring the kernel"
 	@echo "--------------------------------------------------------------"
 	cd ${KRNLCONFDIR}; \
 		PATH=${TMPPATH} \
 		    config ${CONFIGARGS} -d ${KRNLOBJDIR}/${_kernel} \
 			-I '${KERNCONFDIR}' '${KERNCONFDIR}/${_kernel}'
 .endif
 .if !defined(NO_CLEAN) && !defined(NO_KERNELCLEAN)
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.1: cleaning up the object tree"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} ${CLEANDIR}
 .endif
 .if !defined(NO_KERNELOBJ)
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.2: rebuilding the object tree"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} obj
 .endif
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 2.3: build tools"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${.CURDIR}; ${KTMAKE} kernel-tools
 .if !defined(NO_KERNELDEPEND)
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 3.1: making dependencies"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} depend -DNO_MODULES_OBJ
 .endif
 	@echo
 	@echo "--------------------------------------------------------------"
 	@echo ">>> stage 3.2: building everything"
 	@echo "--------------------------------------------------------------"
 	${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} all -DNO_MODULES_OBJ
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Kernel build for ${_kernel} completed on `LC_ALL=C date`"
 	@echo "--------------------------------------------------------------"
 .endfor
 
 #
 # installkernel, etc.
 #
 # Install the kernel defined by INSTALLKERNEL
 #
 installkernel installkernel.debug \
 reinstallkernel reinstallkernel.debug: _installcheck_kernel
 .if !defined(NO_INSTALLKERNEL)
 .if empty(INSTALLKERNEL)
 	@echo "ERROR: No kernel \"${KERNCONF}\" to install."; \
 	false
 .endif
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Installing kernel ${INSTALLKERNEL}"
 	@echo "--------------------------------------------------------------"
 	cd ${KRNLOBJDIR}/${INSTALLKERNEL}; \
 	    ${CROSSENV} PATH=${TMPPATH} \
 	    ${MAKE} ${IMAKE_INSTALL} KERNEL=${INSTKERNNAME} ${.TARGET:S/kernel//}
 .endif
-.if ${BUILDKERNELS:[#]} > 1
+.if ${BUILDKERNELS:[#]} > 1 && !defined(NO_INSTALLEXTRAKERNELS)
 .for _kernel in ${BUILDKERNELS:[2..-1]}
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Installing kernel ${_kernel}"
 	@echo "--------------------------------------------------------------"
 	cd ${KRNLOBJDIR}/${_kernel}; \
 	    ${CROSSENV} PATH=${TMPPATH} \
 	    ${MAKE} ${IMAKE_INSTALL} KERNEL=${INSTKERNNAME}.${_kernel} ${.TARGET:S/kernel//}
 .endfor
 .endif
 
 distributekernel distributekernel.debug:
 .if !defined(NO_INSTALLKERNEL)
 .if empty(INSTALLKERNEL)
 	@echo "ERROR: No kernel \"${KERNCONF}\" to install."; \
 	false
 .endif
 	mkdir -p ${DESTDIR}/${DISTDIR}
 .if defined(NO_ROOT)
 	echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.premeta
 .endif
 	cd ${KRNLOBJDIR}/${INSTALLKERNEL}; \
 	    ${IMAKEENV} ${IMAKE_INSTALL:S/METALOG/kernel.premeta/} \
 	    ${IMAKE_MTREE} PATH=${TMPPATH} ${MAKE} KERNEL=${INSTKERNNAME} \
 	    DESTDIR=${INSTALL_DDIR}/kernel \
 	    ${.TARGET:S/distributekernel/install/}
 .if defined(NO_ROOT)
 	sed -e 's|^./kernel|.|' ${DESTDIR}/${DISTDIR}/kernel.premeta > \
 	    ${DESTDIR}/${DISTDIR}/kernel.meta
 .endif
 .endif
-.if ${BUILDKERNELS:[#]} > 1
+.if ${BUILDKERNELS:[#]} > 1 && !defined(NO_INSTALLEXTRAKERNELS)
 .for _kernel in ${BUILDKERNELS:[2..-1]}
 .if defined(NO_ROOT)
 	echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta
 .endif
 	cd ${KRNLOBJDIR}/${_kernel}; \
 	    ${IMAKEENV} ${IMAKE_INSTALL:S/METALOG/kernel.${_kernel}.premeta/} \
 	    ${IMAKE_MTREE} PATH=${TMPPATH} ${MAKE} \
 	    KERNEL=${INSTKERNNAME}.${_kernel} \
 	    DESTDIR=${INSTALL_DDIR}/kernel.${_kernel} \
 	    ${.TARGET:S/distributekernel/install/}
 .if defined(NO_ROOT)
 	sed -e 's|^./kernel|.|' \
 	    ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta > \
 	    ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta
 .endif
 .endfor
 .endif
 
 packagekernel:
 .if defined(NO_ROOT)
 .if !defined(NO_INSTALLKERNEL)
 	cd ${DESTDIR}/${DISTDIR}/kernel; \
 	    tar cvf - --exclude '*.debug' \
 	    @${DESTDIR}/${DISTDIR}/kernel.meta | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/kernel.txz
 .endif
 	cd ${DESTDIR}/${DISTDIR}/kernel; \
 	    tar cvf - --include '*/*/*.debug' \
 	    @${DESTDIR}/${DISTDIR}/kernel.meta | \
 	    ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel-dbg.txz
-.if ${BUILDKERNELS:[#]} > 1
+.if ${BUILDKERNELS:[#]} > 1 && !defined(NO_INSTALLEXTRAKERNELS)
 .for _kernel in ${BUILDKERNELS:[2..-1]}
 	cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \
 	    tar cvf - --exclude '*.debug' \
 	    @${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/kernel.${_kernel}.txz
 	cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \
 	    tar cvf - --include '*/*/*.debug' \
 	    @${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta | \
 	    ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}-dbg.txz
 .endfor
 .endif
 .else
 .if !defined(NO_INSTALLKERNEL)
 	cd ${DESTDIR}/${DISTDIR}/kernel; \
 	    tar cvf - --exclude '*.debug' . | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/kernel.txz
 .endif
 	cd ${DESTDIR}/${DISTDIR}/kernel; \
 	    tar cvf - --include '*/*/*.debug' $$(eval find .) | \
 	    ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel-dbg.txz
-.if ${BUILDKERNELS:[#]} > 1
+.if ${BUILDKERNELS:[#]} > 1 && !defined(NO_INSTALLEXTRAKERNELS)
 .for _kernel in ${BUILDKERNELS:[2..-1]}
 	cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \
 	    tar cvf - --exclude '*.debug' . | \
 	    ${XZ_CMD} > ${PACKAGEDIR}/kernel.${_kernel}.txz
 	cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \
 	    tar cvf - --include '*/*/*.debug' $$(eval find .) | \
 	    ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}-dbg.txz
 .endfor
 .endif
 .endif
 
 create-world-packages:
 	@rm -f ${DESTDIR}/*.plist 2>/dev/null || :
 	@cd ${DESTDIR} ; \
 		awk -f ${SRCDIR}/release/scripts/mtree-to-plist.awk \
 		${DESTDIR}/METALOG
 	@for plist in ${DESTDIR}/*.plist; do \
 		plist=$${plist##*/} ; \
 		test -f ${SRCDIR}/release/packages/$${plist%.plist}.ucl || \
 			( echo "Unkown package FreeBSD-$${plist%.plist}" ; false ) ; \
 	done
 	@cap_arg=`cd ${SRCDIR}/etc ; ${MAKE} -VCAP_MKDB_ENDIAN` ; \
 	pwd_arg=`cd ${SRCDIR}/etc ; ${MAKE} -VPWD_MKDB_ENDIAN` ; \
 	for plist in ${DESTDIR}/*.plist; do \
 		plist=$${plist##*/} ; \
 		pkgname=$${plist%.plist} ; \
 		sed -e "s/%VERSION%/${PKG_VERSION}/" \
 			-e "s/%PKGNAME%/$${pkgname}/" \
 			-e "s/%COMMENT%/Generic comment for $${pkgname}/" \
 			-e "s/%DESC%/Generic description for $${pkgname}/" \
 			-e "s/%CAP_MKDB_ENDIAN%/$${cap_arg}/g" \
 			-e "s/%PWD_MKDB_ENDIAN%/$${pwd_arg}/g" \
 			${SRCDIR}/release/packages/$${pkgname}.ucl \
 			> ${DESTDIR}/$${pkgname}.ucl ; \
 		awk -F\" ' \
 			/^name/ { printf("===> Creating %s-", $$2); next } \
 			/^version/ { print $$2; next } \
 			' ${DESTDIR}/$${pkgname}.ucl ; \
 		pkg -o ABI_FILE=${DESTDIR}/bin/sh \
 			create -M ${DESTDIR}/$${pkgname}.ucl \
 			-p ${DESTDIR}/$${pkgname}.plist \
 			-r ${DESTDIR} -o ${DESTDIR} ; \
 	done
 
 STAGEDIR=	${MAKEOBJDIRPREFIX}${.CURDIR}/stage
 
 packages:
 	@mkdir -p ${MAKEOBJDIRPREFIX}${.CURDIR}/stage
 	${_+_}@cd ${.CURDIR}; \
 		${MAKE} DESTDIR=${DESTDIR:U${STAGEDIR}} -DNO_ROOT -B stageworld ; \
 		${MAKE} DESTDIR=${DESTDIR:U${STAGEDIR}} create-world-packages
 
 create-kernel-packages:
 	@cd ${DESTDIR}/${DISTDIR} ; \
 		awk -f ${SRCDIR}/release/scripts/mtree-to-plist.awk \
 		${DESTDIR}/${DISTDIR}/kernel.meta
 .for flavor in release debug
 .if exists(${DESTDIR}/${DISTDIR}/${flavor}.plist)
 	@rm -rf ${DESTDIR}/${DISTDIR}/${flavor}-manifestdir
 	@cp -r ${SRCDIR}/release/packages/kernel \
 		${DESTDIR}/${DISTDIR}/${flavor}-manifestdir
 	@cd ${DESTDIR}/${DISTDIR} ; \
 		sed -i '' -e "s/%VERSION%/${PKG_VERSION}/" \
 		-e "s/%PKGNAME%/kernel-${flavor}/" \
 		-e "s/%COMMENT%/FreeBSD ${KERNCONF} kernel ${flavor}/" \
 		-e "s/%DESC%/FreeBSD ${KERNCONF} kernel ${flavor}/" \
 		${DESTDIR}/${DISTDIR}/${flavor}-manifestdir/+MANIFEST
 	@awk -F\" '/name/ { printf("===> Creating %s-", $$2) } /version/ {print $$2 }' \
 		${DESTDIR}/${DISTDIR}/${flavor}-manifestdir/+MANIFEST
 	@pkg create -m ${DESTDIR}/${DISTDIR}/${flavor}-manifestdir \
 		-p ${DESTDIR}/${DISTDIR}/${flavor}.plist \
 		-r ${DESTDIR}/${DISTDIR}/kernel \
 		-o ${DESTDIR}
 .endif
 .endfor
 .for _kernel in ${BUILDKERNELS:S/${INSTALLKERNEL}//}
 	@cd ${DESTDIR}/${DISTDIR} ; \
 		awk -f ${SRCDIR}/release/scripts/mtree-to-plist.awk \
 		${DESTDIR}/${DISTDIR}/kernel${_kernel}.meta
 .for flavor in release debug
 .if exists(${DESTDIR}/${DISTDIR}/${flavor}.plist)
 	@rm -rf ${DESTDIR}/${DISTDIR}/${flavor}-manifestdir
 	@cp -r ${SRCDIR}/release/packages/kernel \
 		${DESTDIR}/${DISTDIR}/${flavor}-manifestdir
 	@cd ${DESTDIR}/${DISTDIR} ; \
 		sed -i '' -e "s/%VERSION%/${PKG_VERSION}/" \
 		-e "s/%PKGNAME%/kernel-${flavor}/" \
 		-e "s/%COMMENT%/FreeBSD ${KERNCONF} kernel ${flavor}/" \
 		-e "s/%DESC%/FreeBSD ${KERNCONF} kernel ${flavor}/" \
 		${DESTDIR}/${DISTDIR}/${flavor}-manifestdir/+MANIFEST
 	@awk -F\" '/name/ { printf("===> Creating %s-", $$2) } /version/ {print $$2 }' \
 		${DESTDIR}/${DISTDIR}/${flavor}-manifestdir/+MANIFEST
 	@pkg create -m ${DESTDIR}/${DISTDIR}/${flavor}-manifestdir \
 		-p ${DESTDIR}/${DISTDIR}/${flavor}.plist \
 		-r ${DESTDIR}/${DISTDIR}/kernel.${_kernel} \
 		-o ${DESTDIR}
 .endif
 .endfor
 .endfor
 
 #
 # doxygen
 #
 # Build the API documentation with doxygen
 #
 doxygen: .PHONY
 	@if [ ! -x ${LOCALBASE}/bin/doxygen ]; then \
 		echo "You need doxygen (devel/doxygen) to generate the API documentation of the kernel." | /usr/bin/fmt; \
 		exit 1; \
 	fi
 	${_+_}cd ${.CURDIR}/tools/kerneldoc/subsys; ${MAKE} obj all
 
 #
 # update
 #
 # Update the source tree(s), by running svn/svnup to update to the
 # latest copy.
 #
 update:
 .if (defined(CVS_UPDATE) || defined(SUP_UPDATE)) && !defined(SVN_UPDATE)
 	@echo "--------------------------------------------------------------"
 	@echo "CVS_UPDATE and SUP_UPDATE are no longer supported."
 	@echo "Please see: https://wiki.freebsd.org/CvsIsDeprecated"
 	@echo "--------------------------------------------------------------"
 	@exit 1
 .endif
 .if defined(SVN_UPDATE)
 	@echo "--------------------------------------------------------------"
 	@echo ">>> Updating ${.CURDIR} using Subversion"
 	@echo "--------------------------------------------------------------"
 	@(cd ${.CURDIR}; ${SVN} update ${SVNFLAGS})
 .endif
 
 #
 # ------------------------------------------------------------------------
 #
 # From here onwards are utility targets used by the 'make world' and
 # related targets.  If your 'world' breaks, you may like to try to fix
 # the problem and manually run the following targets to attempt to
 # complete the build.  Beware, this is *not* guaranteed to work, you
 # need to have a pretty good grip on the current state of the system
 # to attempt to manually finish it.  If in doubt, 'make world' again.
 #
 
 #
 # legacy: Build compatibility shims for the next three targets. This is a
 # minimal set of tools and shims necessary to compensate for older systems
 # which don't have the APIs required by the targets built in bootstrap-tools,
 # build-tools or cross-tools.
 #
 
 # ELF Tool Chain libraries are needed for ELF tools and dtrace tools.
 .if ${BOOTSTRAPPING} < 1100006
 _elftoolchain_libs= lib/libelf lib/libdwarf
 .endif
 
 legacy:
 .if ${BOOTSTRAPPING} < 800107 && ${BOOTSTRAPPING} != 0
 	@echo "ERROR: Source upgrades from versions prior to 8.0 are not supported."; \
 	false
 .endif
 .for _tool in tools/build ${_elftoolchain_libs}
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,includes,depend,all,install)"; \
 	    cd ${.CURDIR}/${_tool}; \
 	    ${MAKE} DIRPRFX=${_tool}/ obj; \
 	    ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX}/legacy includes; \
 	    ${MAKE} DIRPRFX=${_tool}/ depend; \
 	    ${MAKE} DIRPRFX=${_tool}/ all; \
 	    ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX}/legacy install
 .endfor
 
 #
 # bootstrap-tools: Build tools needed for compatibility. These are binaries that
 # are built to build other binaries in the system. However, the focus of these
 # binaries is usually quite narrow. Bootstrap tools use the host's compiler and
 # libraries, augmented by -legacy.
 #
 _bt=		_bootstrap-tools
 
 .if ${MK_GAMES} != "no"
 _strfile=	usr.bin/fortune/strfile
 .endif
 
 .if ${MK_GCC} != "no" && ${MK_CXX} != "no"
 _gperf=		gnu/usr.bin/gperf
 .endif
 
 .if ${MK_GROFF} != "no"
 _groff=		gnu/usr.bin/groff \
 		usr.bin/soelim
 .endif
 
 .if ${MK_VT} != "no"
 _vtfontcvt=	usr.bin/vtfontcvt
 .endif
 
 .if ${BOOTSTRAPPING} < 900002
 _sed=		usr.bin/sed
 .endif
 
 .if ${BOOTSTRAPPING} < 1000002
 _libopenbsd=	lib/libopenbsd
 _m4=		usr.bin/m4
 
 ${_bt}-usr.bin/m4: ${_bt}-lib/libopenbsd
 .endif
 
 .if ${BOOTSTRAPPING} < 1000026
 _nmtree=	lib/libnetbsd \
 		usr.sbin/nmtree
 
 ${_bt}-usr.sbin/nmtree: ${_bt}-lib/libnetbsd
 .endif
 
 .if ${BOOTSTRAPPING} < 1000027
 _cat=		bin/cat
 .endif
 
 .if ${BOOTSTRAPPING} < 1000033
 _lex=		usr.bin/lex
 
 ${_bt}-usr.bin/lex: ${_bt}-usr.bin/m4
 .endif
 
 # r277259 crunchide: Correct 64-bit section header offset
 # r281674 crunchide: always include both 32- and 64-bit ELF support
 # r285986 crunchen: use STRIPBIN rather than STRIP
 .if ${BOOTSTRAPPING} < 1100078
 _crunch=	usr.sbin/crunch
 .endif
 
 .if ${BOOTSTRAPPING} >= 900040 && ${BOOTSTRAPPING} < 900041
 _awk=		usr.bin/awk
 .endif
 
 _yacc=		lib/liby \
 		usr.bin/yacc
 
 ${_bt}-usr.bin/yacc: ${_bt}-lib/liby
 
 .if ${MK_BSNMP} != "no"
 _gensnmptree=	usr.sbin/bsnmpd/gensnmptree
 .endif
 
 # We need to build tblgen when we're building clang either as
 # the bootstrap compiler, or as the part of the normal build.
 .if ${MK_CLANG_BOOTSTRAP} != "no" || ${MK_CLANG} != "no"
 _clang_tblgen= \
 	lib/clang/libllvmsupport \
 	lib/clang/libllvmtablegen \
 	usr.bin/clang/tblgen \
 	usr.bin/clang/clang-tblgen
 
 ${_bt}-usr.bin/clang/clang-tblgen: ${_bt}-lib/clang/libllvmtablegen ${_bt}-lib/clang/libllvmsupport
 ${_bt}-usr.bin/clang/tblgen: ${_bt}-lib/clang/libllvmtablegen ${_bt}-lib/clang/libllvmsupport
 .endif
 
 # Default to building the GPL DTC, but build the BSDL one if users explicitly
 # request it.
 _dtc= usr.bin/dtc
 .if ${MK_GPL_DTC} != "no"
 _dtc= gnu/usr.bin/dtc
 .endif
 
 .if ${MK_KERBEROS} != "no"
 _kerberos5_bootstrap_tools= \
 	kerberos5/tools/make-roken \
 	kerberos5/lib/libroken \
 	kerberos5/lib/libvers \
 	kerberos5/tools/asn1_compile \
 	kerberos5/tools/slc \
 	usr.bin/compile_et
 
 .ORDER: ${_kerberos5_bootstrap_tools:C/^/${_bt}-/g}
 .endif
 
 .if ${MK_MANDOCDB} != "no"
 _libopenbsd?=	lib/libopenbsd
 _makewhatis=	lib/libsqlite3 \
 		usr.bin/mandoc
 ${_bt}-usr.bin/mandoc: ${_bt}-lib/libopenbsd ${_bt}-lib/libsqlite3
 .else
 _makewhatis=usr.bin/makewhatis
 .endif
 
 bootstrap-tools: .PHONY
 
 #	Please document (add comment) why something is in 'bootstrap-tools'.
 #	Try to bound the building of the bootstrap-tool to just the
 #	FreeBSD versions that need the tool built at this stage of the build.
 .for _tool in \
     ${_clang_tblgen} \
     ${_kerberos5_bootstrap_tools} \
     ${_strfile} \
     ${_gperf} \
     ${_groff} \
     ${_dtc} \
     ${_awk} \
     ${_cat} \
     usr.bin/lorder \
     ${_libopenbsd} \
     ${_makewhatis} \
     usr.bin/rpcgen \
     ${_sed} \
     ${_yacc} \
     ${_m4} \
     ${_lex} \
     usr.bin/xinstall \
     ${_gensnmptree} \
     usr.sbin/config \
     ${_crunch} \
     ${_nmtree} \
     ${_vtfontcvt} \
     usr.bin/localedef
 ${_bt}-${_tool}: .PHONY .MAKE
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,depend,all,install)"; \
 		cd ${.CURDIR}/${_tool}; \
 		${MAKE} DIRPRFX=${_tool}/ obj; \
 		${MAKE} DIRPRFX=${_tool}/ depend; \
 		${MAKE} DIRPRFX=${_tool}/ all; \
 		${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX}/legacy install
 
 bootstrap-tools: ${_bt}-${_tool}
 .endfor
 
 #
 # build-tools: Build special purpose build tools
 #
 .if !defined(NO_SHARE)
 _share=	share/syscons/scrnmaps
 .endif
 
 .if ${MK_GCC} != "no"
 _gcc_tools= gnu/usr.bin/cc/cc_tools
 .endif
 
 .if ${MK_RESCUE} != "no"
 # rescue includes programs that have build-tools targets
 _rescue=rescue/rescue
 .endif
 
 .for _tool in \
     bin/csh \
     bin/sh \
     ${LOCAL_TOOL_DIRS} \
     lib/ncurses/ncurses \
     lib/ncurses/ncursesw \
     ${_rescue} \
     ${_share} \
     usr.bin/awk \
     lib/libmagic \
     usr.bin/mkesdb_static \
     usr.bin/mkcsmapper_static \
     usr.bin/vi/catalog
 build-tools_${_tool}: .PHONY
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,build-tools)"; \
 		cd ${.CURDIR}/${_tool}; \
 		${MAKE} DIRPRFX=${_tool}/ obj; \
 		${MAKE} DIRPRFX=${_tool}/ build-tools
 build-tools: build-tools_${_tool}
 .endfor
 .for _tool in \
     ${_gcc_tools}
 build-tools_${_tool}: .PHONY
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,depend,all)"; \
 		cd ${.CURDIR}/${_tool}; \
 		${MAKE} DIRPRFX=${_tool}/ obj; \
 		${MAKE} DIRPRFX=${_tool}/ depend; \
 		${MAKE} DIRPRFX=${_tool}/ all
 build-tools: build-tools_${_tool}
 .endfor
 
 #
 # kernel-tools: Build kernel-building tools
 #
 kernel-tools:
 	mkdir -p ${MAKEOBJDIRPREFIX}/usr
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${MAKEOBJDIRPREFIX}/usr >/dev/null
 
 #
 # cross-tools: All the tools needed to build the rest of the system after
 # we get done with the earlier stages. It is the last set of tools needed
 # to begin building the target binaries.
 #
 .if ${TARGET_ARCH} != ${MACHINE_ARCH}
 .if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "i386"
 _btxld=		usr.sbin/btxld
 .endif
 .endif
 
 # Rebuild ctfconvert and ctfmerge to avoid difficult-to-diagnose failures
 # resulting from missing bug fixes or ELF Toolchain updates.
 .if ${MK_CDDL} != "no"
 _dtrace_tools= cddl/lib/libctf cddl/usr.bin/ctfconvert \
     cddl/usr.bin/ctfmerge
 .endif
 
 # If we're given an XAS, don't build binutils.
 .if ${XAS:M/*} == ""
 .if ${MK_BINUTILS_BOOTSTRAP} != "no"
 _binutils=	gnu/usr.bin/binutils
 .endif
 .if ${MK_ELFTOOLCHAIN_BOOTSTRAP} != "no"
 _elftctools=	lib/libelftc \
 		usr.bin/elfcopy \
 		usr.bin/nm \
 		usr.bin/size \
 		usr.bin/strings
 # These are not required by the build, but can be useful for developers who
 # cross-build on a FreeBSD 10 host:
 _elftctools+=	usr.bin/addr2line
 .endif
 .elif ${TARGET_ARCH} != ${MACHINE_ARCH} && ${MK_ELFTOOLCHAIN_BOOTSTRAP} != "no"
 # If cross-building with an external binutils we still need to build strip for
 # the target (for at least crunchide).
 _elftctools=	lib/libelftc \
 		usr.bin/elfcopy
 .endif
 
 # If an full path to an external cross compiler is given, don't build
 # a cross compiler.
 .if ${XCC:N${CCACHE_BIN}:M/*} == "" && ${MK_CROSS_COMPILER} != "no"
 .if ${MK_CLANG_BOOTSTRAP} != "no"
 _clang=		usr.bin/clang
 _clang_libs=	lib/clang
 .endif
 .if ${MK_GCC_BOOTSTRAP} != "no"
 _cc=		gnu/usr.bin/cc
 .endif
 .endif
 .if ${MK_USB} != "no"
 _usb_tools=	sys/boot/usb/tools
 .endif
 
 cross-tools: .MAKE .PHONY
 .for _tool in \
     ${_clang_libs} \
     ${_clang} \
     ${_binutils} \
     ${_elftctools} \
     ${_dtrace_tools} \
     ${_cc} \
     ${_btxld} \
     ${_crunchide} \
     ${_usb_tools}
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,depend,all,install)"; \
 		cd ${.CURDIR}/${_tool}; \
 		${MAKE} DIRPRFX=${_tool}/ obj; \
 		${MAKE} DIRPRFX=${_tool}/ depend; \
 		${MAKE} DIRPRFX=${_tool}/ all; \
 		${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX} install
 .endfor
 
 NXBDESTDIR=	${OBJTREE}/nxb-bin
 NXBENV=		MAKEOBJDIRPREFIX=${OBJTREE}/nxb \
 		INSTALL="sh ${.CURDIR}/tools/install.sh" \
 		PATH=${PATH}:${OBJTREE}/gperf_for_gcc/usr/bin
 NXBMAKE=	${NXBENV} ${MAKE} \
 		TBLGEN=${NXBDESTDIR}/usr/bin/tblgen \
 		CLANG_TBLGEN=${NXBDESTDIR}/usr/bin/clang-tblgen \
 		MACHINE=${TARGET} MACHINE_ARCH=${TARGET_ARCH} \
 		MK_GDB=no MK_TESTS=no \
 		SSP_CFLAGS= \
 		MK_HTML=no NO_LINT=yes MK_MAN=no \
 		-DNO_PIC MK_PROFILE=no -DNO_SHARED \
 		-DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \
 		MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \
 		MK_LLDB=no MK_DEBUG_FILES=no
 
 # native-xtools is the current target for qemu-user cross builds of ports
 # via poudriere and the imgact_binmisc kernel module.
 # For non-clang enabled targets that are still using the in tree gcc
 # we must build a gperf binary for one instance of its Makefiles.  On
 # clang-enabled systems, the gperf binary is obsolete.
 native-xtools: .PHONY
 .if ${MK_GCC_BOOTSTRAP} != "no"
 	mkdir -p ${OBJTREE}/gperf_for_gcc/usr/bin
 	${_+_}@${ECHODIR} "===> ${_gperf} (obj,depend,all,install)"; \
 	cd ${.CURDIR}/${_gperf}; \
 	${NXBMAKE} DIRPRFX=${_gperf}/ obj; \
 	${NXBMAKE} DIRPRFX=${_gperf}/ depend; \
 	${NXBMAKE} DIRPRFX=${_gperf}/ all; \
 	${NXBMAKE} DIRPRFX=${_gperf}/ DESTDIR=${OBJTREE}/gperf_for_gcc install
 .endif
 	mkdir -p ${NXBDESTDIR}/bin ${NXBDESTDIR}/sbin ${NXBDESTDIR}/usr
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${NXBDESTDIR}/usr >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \
 	    -p ${NXBDESTDIR}/usr/include >/dev/null
 .if ${MK_DEBUG_FILES} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \
 	    -p ${NXBDESTDIR}/usr/lib >/dev/null
 .endif
 .for _tool in \
     bin/cat \
     bin/chmod \
     bin/cp \
     bin/csh \
     bin/echo \
     bin/expr \
     bin/hostname \
     bin/ln \
     bin/ls \
     bin/mkdir \
     bin/mv \
     bin/ps \
     bin/realpath \
     bin/rm \
     bin/rmdir \
     bin/sh \
     bin/sleep \
     ${_clang_tblgen} \
     usr.bin/ar \
     ${_binutils} \
     ${_elftctools} \
     ${_cc} \
     ${_gcc_tools} \
     ${_clang_libs} \
     ${_clang} \
     sbin/md5 \
     sbin/sysctl \
     gnu/usr.bin/diff \
     usr.bin/awk \
     usr.bin/basename \
     usr.bin/bmake \
     usr.bin/bzip2 \
     usr.bin/cmp \
     usr.bin/dirname \
     usr.bin/env \
     usr.bin/fetch \
     usr.bin/find \
     usr.bin/grep \
     usr.bin/gzip \
     usr.bin/id \
     usr.bin/lex \
     usr.bin/lorder \
     usr.bin/mktemp \
     usr.bin/mt \
     usr.bin/patch \
     usr.bin/sed \
     usr.bin/sort \
     usr.bin/tar \
     usr.bin/touch \
     usr.bin/tr \
     usr.bin/true \
     usr.bin/uniq \
     usr.bin/unzip \
     usr.bin/xargs \
     usr.bin/xinstall \
     usr.bin/xz \
     usr.bin/yacc \
     usr.sbin/chown
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,depend,all,install)"; \
 		cd ${.CURDIR}/${_tool}; \
 		${NXBMAKE} DIRPRFX=${_tool}/ obj; \
 		${NXBMAKE} DIRPRFX=${_tool}/ depend; \
 		${NXBMAKE} DIRPRFX=${_tool}/ all; \
 		${NXBMAKE} DIRPRFX=${_tool}/ DESTDIR=${NXBDESTDIR} install
 .endfor
 
 #
 # hierarchy - ensure that all the needed directories are present
 #
 hierarchy hier: .MAKE .PHONY
 	${_+_}cd ${.CURDIR}/etc; ${HMAKE} distrib-dirs
 
 #
 # libraries - build all libraries, and install them under ${DESTDIR}.
 #
 # The list of libraries with dependents (${_prebuild_libs}) and their
 # interdependencies (__L) are built automatically by the
 # ${.CURDIR}/tools/make_libdeps.sh script.
 #
 libraries: .MAKE .PHONY
 	${_+_}cd ${.CURDIR}; \
 	    ${MAKE} -f Makefile.inc1 _prereq_libs; \
 	    ${MAKE} -f Makefile.inc1 _startup_libs; \
 	    ${MAKE} -f Makefile.inc1 _prebuild_libs; \
 	    ${MAKE} -f Makefile.inc1 _generic_libs
 
 #
 # static libgcc.a prerequisite for shared libc
 #
 _prereq_libs= gnu/lib/libssp/libssp_nonshared gnu/lib/libgcc lib/libcompiler_rt
 
 # These dependencies are not automatically generated:
 #
 # gnu/lib/csu, gnu/lib/libgcc, lib/csu and lib/libc must be built before
 # all shared libraries for ELF.
 #
 _startup_libs=	gnu/lib/csu
 _startup_libs+=	lib/csu
 _startup_libs+=	gnu/lib/libgcc
 _startup_libs+=	lib/libcompiler_rt
 _startup_libs+=	lib/libc
 _startup_libs+=	lib/libc_nonshared
 .if ${MK_LIBCPLUSPLUS} != "no"
 _startup_libs+=	lib/libcxxrt
 .endif
 
 gnu/lib/libgcc__L: lib/libc__L
 gnu/lib/libgcc__L: lib/libc_nonshared__L
 .if ${MK_LIBCPLUSPLUS} != "no"
 lib/libcxxrt__L: gnu/lib/libgcc__L
 .endif
 
 _prebuild_libs=	${_kerberos5_lib_libasn1} \
 		${_kerberos5_lib_libhdb} \
 		${_kerberos5_lib_libheimbase} \
 		${_kerberos5_lib_libheimntlm} \
 		${_libsqlite3} \
 		${_kerberos5_lib_libheimipcc} \
 		${_kerberos5_lib_libhx509} ${_kerberos5_lib_libkrb5} \
 		${_kerberos5_lib_libroken} \
 		${_kerberos5_lib_libwind} \
 		lib/libbz2 ${_libcom_err} lib/libcrypt \
 		lib/libelf lib/libexpat \
 		lib/libfigpar \
 		${_lib_libgssapi} \
 		lib/libkiconv lib/libkvm lib/liblzma lib/libmd lib/libnv \
 		${_lib_libcapsicum} \
 		lib/ncurses/ncurses lib/ncurses/ncursesw \
 		lib/libopie lib/libpam ${_lib_libthr} \
 		${_lib_libradius} lib/libsbuf lib/libtacplus \
 		lib/libgeom \
 		${_cddl_lib_libumem} ${_cddl_lib_libnvpair} \
 		${_cddl_lib_libuutil} \
 		${_cddl_lib_libavl} \
 		${_cddl_lib_libzfs_core} \
 		${_cddl_lib_libctf} \
 		lib/libutil lib/libpjdlog ${_lib_libypclnt} lib/libz lib/msun \
 		${_secure_lib_libcrypto} ${_lib_libldns} \
 		${_secure_lib_libssh} ${_secure_lib_libssl} \
 		gnu/lib/libdialog
 .if ${MK_GNUCXX} != "no"
 _prebuild_libs+= gnu/lib/libstdc++ gnu/lib/libsupc++
 gnu/lib/libstdc++__L: lib/msun__L
 gnu/lib/libsupc++__L: gnu/lib/libstdc++__L
 .endif
 
 .if ${MK_LIBCPLUSPLUS} != "no"
 _prebuild_libs+= lib/libc++
 .endif
 
 lib/libgeom__L: lib/libexpat__L
 lib/libkvm__L: lib/libelf__L
 
 .if ${MK_LIBTHR} != "no"
 _lib_libthr=	lib/libthr
 .endif
 
 .if ${MK_RADIUS_SUPPORT} != "no"
 _lib_libradius=	lib/libradius
 .endif
 
 .if ${MK_OFED} != "no"
 _ofed_lib=	contrib/ofed/usr.lib/
 .endif
 
 .if ${MK_CASPER} != "no"
 _lib_libcapsicum=lib/libcapsicum
 .endif
 
 lib/libcapsicum__L: lib/libnv__L
 lib/libpjdlog__L: lib/libutil__L
 lib/liblzma__L: lib/libthr__L
 
 _generic_libs=	${_cddl_lib} gnu/lib ${_kerberos5_lib} lib ${_secure_lib} usr.bin/lex/lib ${_ofed_lib}
 .for _DIR in ${LOCAL_LIB_DIRS}
 .if exists(${.CURDIR}/${_DIR}/Makefile)
 _generic_libs+= ${_DIR}
 .endif
 .endfor
 
 lib/libopie__L lib/libtacplus__L: lib/libmd__L
 
 .if ${MK_CDDL} != "no"
 _cddl_lib_libumem= cddl/lib/libumem
 _cddl_lib_libnvpair= cddl/lib/libnvpair
 _cddl_lib_libavl= cddl/lib/libavl
 _cddl_lib_libuutil= cddl/lib/libuutil
 _cddl_lib_libzfs_core= cddl/lib/libzfs_core
 _cddl_lib_libctf= cddl/lib/libctf
 _cddl_lib= cddl/lib
 cddl/lib/libzfs_core__L: cddl/lib/libnvpair__L
 cddl/lib/libzfs__L: lib/libgeom__L
 cddl/lib/libctf__L: lib/libz__L
 .endif
 # cddl/lib/libdtrace requires lib/libproc and lib/librtld_db; it's only built
 # on select architectures though (see cddl/lib/Makefile)
 .if ${MACHINE_CPUARCH} != "sparc64"
 _prebuild_libs+=	lib/libproc lib/librtld_db
 .endif
 
 .if ${MK_CRYPT} != "no"
 .if ${MK_OPENSSL} != "no"
 _secure_lib_libcrypto= secure/lib/libcrypto
 _secure_lib_libssl= secure/lib/libssl
 lib/libradius__L secure/lib/libssl__L: secure/lib/libcrypto__L
 .if ${MK_LDNS} != "no"
 _lib_libldns= lib/libldns
 lib/libldns__L: secure/lib/libcrypto__L
 .endif
 .if ${MK_OPENSSH} != "no"
 _secure_lib_libssh= secure/lib/libssh
 secure/lib/libssh__L: lib/libz__L secure/lib/libcrypto__L lib/libcrypt__L
 .if ${MK_LDNS} != "no"
 secure/lib/libssh__L: lib/libldns__L
 .endif
 .if ${MK_KERBEROS_SUPPORT} != "no"
 secure/lib/libssh__L: lib/libgssapi__L kerberos5/lib/libkrb5__L \
     kerberos5/lib/libhx509__L kerberos5/lib/libasn1__L lib/libcom_err__L \
     lib/libmd__L kerberos5/lib/libroken__L
 .endif
 .endif
 .endif
 _secure_lib=	secure/lib
 .endif
 
 .if ${MK_KERBEROS} != "no"
 kerberos5/lib/libasn1__L: lib/libcom_err__L kerberos5/lib/libroken__L
 kerberos5/lib/libhdb__L: kerberos5/lib/libasn1__L lib/libcom_err__L \
     kerberos5/lib/libkrb5__L kerberos5/lib/libroken__L \
     kerberos5/lib/libwind__L lib/libsqlite3__L
 kerberos5/lib/libheimntlm__L: secure/lib/libcrypto__L kerberos5/lib/libkrb5__L \
     kerberos5/lib/libroken__L lib/libcom_err__L
 kerberos5/lib/libhx509__L: kerberos5/lib/libasn1__L lib/libcom_err__L \
     secure/lib/libcrypto__L kerberos5/lib/libroken__L kerberos5/lib/libwind__L
 kerberos5/lib/libkrb5__L: kerberos5/lib/libasn1__L lib/libcom_err__L \
     lib/libcrypt__L secure/lib/libcrypto__L kerberos5/lib/libhx509__L \
     kerberos5/lib/libroken__L kerberos5/lib/libwind__L \
     kerberos5/lib/libheimbase__L kerberos5/lib/libheimipcc__L
 kerberos5/lib/libroken__L: lib/libcrypt__L
 kerberos5/lib/libwind__L: kerberos5/lib/libroken__L lib/libcom_err__L
 kerberos5/lib/libheimbase__L: lib/libthr__L
 kerberos5/lib/libheimipcc__L: kerberos5/lib/libroken__L kerberos5/lib/libheimbase__L lib/libthr__L
 .endif
 
 lib/libsqlite3__L: lib/libthr__L
 
 .if ${MK_GSSAPI} != "no"
 _lib_libgssapi=	lib/libgssapi
 .endif
 
 .if ${MK_KERBEROS} != "no"
 _kerberos5_lib=	kerberos5/lib
 _kerberos5_lib_libasn1= kerberos5/lib/libasn1
 _kerberos5_lib_libhdb= kerberos5/lib/libhdb
 _kerberos5_lib_libheimbase= kerberos5/lib/libheimbase
 _kerberos5_lib_libkrb5= kerberos5/lib/libkrb5
 _kerberos5_lib_libhx509= kerberos5/lib/libhx509
 _kerberos5_lib_libroken= kerberos5/lib/libroken
 _kerberos5_lib_libheimntlm= kerberos5/lib/libheimntlm
 _libsqlite3= lib/libsqlite3
 _kerberos5_lib_libheimipcc= kerberos5/lib/libheimipcc
 _kerberos5_lib_libwind= kerberos5/lib/libwind
 _libcom_err= lib/libcom_err
 .endif
 
 .if ${MK_NIS} != "no"
 _lib_libypclnt=	lib/libypclnt
 .endif
 
 .if ${MK_OPENSSL} == "no"
 lib/libradius__L: lib/libmd__L
 .endif
 
 lib/libproc__L: \
     ${_cddl_lib_libctf:D${_cddl_lib_libctf}__L} lib/libelf__L lib/librtld_db__L lib/libutil__L
 .if ${MK_CXX} != "no"
 .if ${MK_LIBCPLUSPLUS} != "no"
 lib/libproc__L: lib/libcxxrt__L
 .else # This implies MK_GNUCXX != "no"; see lib/libproc
 lib/libproc__L: gnu/lib/libsupc++__L
 .endif
 .endif
 
 gnu/lib/libdialog__L: lib/msun__L lib/ncurses/ncursesw__L
 
 .for _lib in ${_prereq_libs}
 ${_lib}__PL: .PHONY .MAKE
 .if exists(${.CURDIR}/${_lib})
 	${_+_}@${ECHODIR} "===> ${_lib} (obj,depend,all,install)"; \
 		cd ${.CURDIR}/${_lib}; \
 		${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ obj; \
 		${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ depend; \
 		${MAKE} MK_TESTS=no MK_PROFILE=no -DNO_PIC \
 		    DIRPRFX=${_lib}/ all; \
 		${MAKE} MK_TESTS=no MK_PROFILE=no -DNO_PIC \
 		    DIRPRFX=${_lib}/ install
 .endif
 .endfor
 
 .for _lib in ${_startup_libs} ${_prebuild_libs:Nlib/libpam} ${_generic_libs}
 ${_lib}__L: .PHONY .MAKE
 .if exists(${.CURDIR}/${_lib})
 	${_+_}@${ECHODIR} "===> ${_lib} (obj,depend,all,install)"; \
 		cd ${.CURDIR}/${_lib}; \
 		${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ obj; \
 		${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ depend; \
 		${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ all; \
 		${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ install
 .endif
 .endfor
 
 # libpam is special: we need to build static PAM modules before
 # static PAM library, and dynamic PAM library before dynamic PAM
 # modules.
 lib/libpam__L: .PHONY .MAKE
 	${_+_}@${ECHODIR} "===> lib/libpam (obj,depend,all,install)"; \
 		cd ${.CURDIR}/lib/libpam; \
 		${MAKE} MK_TESTS=no DIRPRFX=lib/libpam/ obj; \
 		${MAKE} MK_TESTS=no DIRPRFX=lib/libpam/ depend; \
 		${MAKE} MK_TESTS=no DIRPRFX=lib/libpam/ \
 		    -D_NO_LIBPAM_SO_YET all; \
 		${MAKE} MK_TESTS=no DIRPRFX=lib/libpam/ \
 		    -D_NO_LIBPAM_SO_YET install
 
 _prereq_libs: ${_prereq_libs:S/$/__PL/}
 _startup_libs: ${_startup_libs:S/$/__L/}
 _prebuild_libs: ${_prebuild_libs:S/$/__L/}
 _generic_libs: ${_generic_libs:S/$/__L/}
 
 # Enable SUBDIR_PARALLEL when not calling 'make all', unless called from
 # 'everything' with _PARALLEL_SUBDIR_OK set.  This is because it is unlikely
 # that running 'make all' from the top-level, especially with a SUBDIR_OVERRIDE
 # or LOCAL_DIRS set, will have a reliable build if SUBDIRs are built in
 # parallel.  This is safe for the world stage of buildworld though since it has
 # already built libraries in a proper order and installed includes into
 # WORLDTMP. Special handling is done for SUBDIR ordering for 'install*' to
 # avoid trashing a system if it crashes mid-install.
 .if !make(all) || defined(_PARALLEL_SUBDIR_OK)
 SUBDIR_PARALLEL=
 .endif
 
 .include <bsd.subdir.mk>
 
 .if make(check-old) || make(check-old-dirs) || \
     make(check-old-files) || make(check-old-libs) || \
     make(delete-old) || make(delete-old-dirs) || \
     make(delete-old-files) || make(delete-old-libs)
 
 #
 # check for / delete old files section
 #
 
 .include "ObsoleteFiles.inc"
 
 OLD_LIBS_MESSAGE="Please be sure no application still uses those libraries, \
 else you can not start such an application. Consult UPDATING for more \
 information regarding how to cope with the removal/revision bump of a \
 specific library."
 
 .if !defined(BATCH_DELETE_OLD_FILES)
 RM_I=-i
 .else
 RM_I=-v
 .endif
 
 delete-old-files:
 	@echo ">>> Removing old files (only deletes safe to delete libs)"
 # Ask for every old file if the user really wants to remove it.
 # It's annoying, but better safe than sorry.
 # NB: We cannot pass the list of OLD_FILES as a parameter because the
 # argument list will get too long. Using .for/.endfor make "loops" will make
 # the Makefile parser segfault.
 	@exec 3<&0; \
 	cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_FILES -V "OLD_FILES:Musr/share/*.gz:R" | xargs -n1 | \
 	while read file; do \
 		if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \
 			chflags noschg "${DESTDIR}/$${file}" 2>/dev/null || true; \
 			rm ${RM_I} "${DESTDIR}/$${file}" <&3; \
 		fi; \
 		for ext in debug symbols; do \
 		  if ! [ -e "${DESTDIR}/$${file}" ] && [ -f \
 		      "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \
 			  rm ${RM_I} "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" \
 			      <&3; \
 		  fi; \
 		done; \
 	done
 # Remove catpages without corresponding manpages.
 	@exec 3<&0; \
 	find ${DESTDIR}/usr/share/man/cat* ! -type d | \
 	sed -ep -e's:${DESTDIR}/usr/share/man/cat:${DESTDIR}/usr/share/man/man:' | \
 	while read catpage; do \
 		read manpage; \
 		if [ ! -e "$${manpage}" ]; then \
 			rm ${RM_I} $${catpage} <&3; \
 	        fi; \
 	done
 	@echo ">>> Old files removed"
 
 check-old-files:
 	@echo ">>> Checking for old files"
 	@cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_FILES -V "OLD_FILES:Musr/share/*.gz:R" | xargs -n1 | \
 	while read file; do \
 		if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \
 		 	echo "${DESTDIR}/$${file}"; \
 		fi; \
 		for ext in debug symbols; do \
 		  if [ -f "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \
 			  echo "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}"; \
 		  fi; \
 		done; \
 	done
 # Check for catpages without corresponding manpages.
 	@find ${DESTDIR}/usr/share/man/cat* ! -type d | \
 	sed -ep -e's:${DESTDIR}/usr/share/man/cat:${DESTDIR}/usr/share/man/man:' | \
 	while read catpage; do \
 		read manpage; \
 		if [ ! -e "$${manpage}" ]; then \
 			echo $${catpage}; \
 	        fi; \
 	done
 
 delete-old-libs:
 	@echo ">>> Removing old libraries"
 	@echo "${OLD_LIBS_MESSAGE}" | fmt
 	@exec 3<&0; \
 	cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_LIBS | xargs -n1 | \
 	while read file; do \
 		if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \
 			chflags noschg "${DESTDIR}/$${file}" 2>/dev/null || true; \
 			rm ${RM_I} "${DESTDIR}/$${file}" <&3; \
 		fi; \
 		for ext in debug symbols; do \
 		  if ! [ -e "${DESTDIR}/$${file}" ] && [ -f \
 		      "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \
 			  rm ${RM_I} "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" \
 			      <&3; \
 		  fi; \
 		done; \
 	done
 	@echo ">>> Old libraries removed"
 
 check-old-libs:
 	@echo ">>> Checking for old libraries"
 	@cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_LIBS | xargs -n1 | \
 	while read file; do \
 		if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \
 			echo "${DESTDIR}/$${file}"; \
 		fi; \
 		for ext in debug symbols; do \
 		  if [ -f "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \
 			  echo "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}"; \
 		  fi; \
 		done; \
 	done
 
 delete-old-dirs:
 	@echo ">>> Removing old directories"
 	@cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_DIRS | xargs -n1 | sort -r | \
 	while read dir; do \
 		if [ -d "${DESTDIR}/$${dir}" ]; then \
 			rmdir -v "${DESTDIR}/$${dir}" || true; \
 		elif [ -L "${DESTDIR}/$${dir}" ]; then \
 			echo "${DESTDIR}/$${dir} is a link, please remove everything manually."; \
 		fi; \
 	done
 	@echo ">>> Old directories removed"
 
 check-old-dirs:
 	@echo ">>> Checking for old directories"
 	@cd ${.CURDIR}; \
 	${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \
 	    -V OLD_DIRS | xargs -n1 | \
 	while read dir; do \
 		if [ -d "${DESTDIR}/$${dir}" ]; then \
 			echo "${DESTDIR}/$${dir}"; \
 		elif [ -L "${DESTDIR}/$${dir}" ]; then \
 			echo "${DESTDIR}/$${dir} is a link, please remove everything manually."; \
 		fi; \
 	done
 
 delete-old: delete-old-files delete-old-dirs
 	@echo "To remove old libraries run '${MAKE} delete-old-libs'."
 
 check-old: check-old-files check-old-libs check-old-dirs
 	@echo "To remove old files and directories run '${MAKE} delete-old'."
 	@echo "To remove old libraries run '${MAKE} delete-old-libs'."
 
 .endif
 
 #
 # showconfig - show build configuration.
 #
 showconfig:
 	@(${MAKE} -n -f ${.CURDIR}/sys/conf/kern.opts.mk -V dummy -dg1; \
 	  ${MAKE} -n -f ${.CURDIR}/share/mk/src.opts.mk -V dummy -dg1) 2>&1 | grep ^MK_ | sort -u
 
 .if !empty(KRNLOBJDIR) && !empty(KERNCONF)
 DTBOUTPUTPATH= ${KRNLOBJDIR}/${KERNCONF}/
 
 .if !defined(FDT_DTS_FILE) || empty(FDT_DTS_FILE)
 .if exists(${KERNCONFDIR}/${KERNCONF})
 FDT_DTS_FILE!= awk 'BEGIN {FS="="} /^makeoptions[[:space:]]+FDT_DTS_FILE/ {print $$2}' \
 	'${KERNCONFDIR}/${KERNCONF}' ; echo
 .endif
 .endif
 
 .endif
 
 .if !defined(DTBOUTPUTPATH) || !exists(${DTBOUTPUTPATH})
 DTBOUTPUTPATH= ${.CURDIR}
 .endif
 
 #
 # Build 'standalone' Device Tree Blob
 #
 builddtb:
 	@PATH=${TMPPATH} MACHINE=${TARGET} \
 	${.CURDIR}/sys/tools/fdt/make_dtb.sh ${.CURDIR}/sys \
 	    "${FDT_DTS_FILE}" ${DTBOUTPUTPATH}
 
 ###############
 
 # cleanworld
 # In the following, the first 'rm' in a series will usually remove all
 # files and directories.  If it does not, then there are probably some
 # files with file flags set, so this unsets them and tries the 'rm' a
 # second time.  There are situations where this target will be cleaning
 # some directories via more than one method, but that duplication is
 # needed to correctly handle all the possible situations.  Removing all
 # files without file flags set in the first 'rm' instance saves time,
 # because 'chflags' will need to operate on fewer files afterwards.
 #
 # It is expected that BW_CANONICALOBJDIR == the CANONICALOBJDIR as would be
 # created by bsd.obj.mk, except that we don't want to .include that file
 # in this makefile.
 #
 BW_CANONICALOBJDIR:=${OBJTREE}${.CURDIR}
 cleanworld: .PHONY
 .if exists(${BW_CANONICALOBJDIR}/)
 	-rm -rf ${BW_CANONICALOBJDIR}/*
 	-chflags -R 0 ${BW_CANONICALOBJDIR}
 	rm -rf ${BW_CANONICALOBJDIR}/*
 .endif
 .if ${.CURDIR} == ${.OBJDIR} || ${.CURDIR}/obj == ${.OBJDIR}
 	#   To be safe in this case, fall back to a 'make cleandir'
 	${_+_}@cd ${.CURDIR}; ${MAKE} cleandir
 .endif
 
 .if defined(TARGET) && defined(TARGET_ARCH)
 
 .if ${TARGET} == ${MACHINE} && ${TARGET_ARCH} == ${MACHINE_ARCH}
 XDEV_CPUTYPE?=${CPUTYPE}
 .else
 XDEV_CPUTYPE?=${TARGET_CPUTYPE}
 .endif
 
 NOFUN=-DNO_FSCHG MK_HTML=no -DNO_LINT \
 	MK_MAN=no MK_NLS=no MK_PROFILE=no \
 	MK_KERBEROS=no MK_RESCUE=no MK_TESTS=no MK_WARNS=no \
 	TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \
 	CPUTYPE=${XDEV_CPUTYPE}
 
 XDDIR=${TARGET_ARCH}-freebsd
 XDTP?=/usr/${XDDIR}
 .if ${XDTP:N/*}
 .error XDTP variable should be an absolute path
 .endif
 
 CDBENV=MAKEOBJDIRPREFIX=${MAKEOBJDIRPREFIX}/${XDDIR} \
 	INSTALL="sh ${.CURDIR}/tools/install.sh"
 CDENV= ${CDBENV} \
 	TOOLS_PREFIX=${XDTP}
 CD2CFLAGS=-isystem ${XDDESTDIR}/usr/include -L${XDDESTDIR}/usr/lib \
 	--sysroot=${XDDESTDIR}/ -B${XDDESTDIR}/usr/libexec \
 	-B${XDDESTDIR}/usr/bin -B${XDDESTDIR}/usr/lib
 CD2ENV=${CDENV} CC="${CC} ${CD2CFLAGS}" CXX="${CXX} ${CD2CFLAGS}" \
 	CPP="${CPP} ${CD2CFLAGS}" \
 	MACHINE=${TARGET} MACHINE_ARCH=${TARGET_ARCH}
 
 CDTMP=	${MAKEOBJDIRPREFIX}/${XDDIR}/${.CURDIR}/tmp
 CDMAKE=${CDENV} PATH=${CDTMP}/usr/bin:${PATH} ${MAKE} ${NOFUN}
 CD2MAKE=${CD2ENV} PATH=${CDTMP}/usr/bin:${XDDESTDIR}/usr/bin:${PATH} ${MAKE} ${NOFUN}
 XDDESTDIR=${DESTDIR}/${XDTP}
 .if !defined(OSREL)
 OSREL!= uname -r | sed -e 's/[-(].*//'
 .endif
 
 .ORDER: xdev-build xdev-install xdev-links
 xdev: xdev-build xdev-install
 
 .ORDER: _xb-worldtmp _xb-bootstrap-tools _xb-build-tools _xb-cross-tools
 xdev-build: _xb-worldtmp _xb-bootstrap-tools _xb-build-tools _xb-cross-tools
 
 _xb-worldtmp: .PHONY
 	mkdir -p ${CDTMP}/usr
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${CDTMP}/usr >/dev/null
 
 _xb-bootstrap-tools: .PHONY
 .for _tool in \
     ${_clang_tblgen} \
     ${_gperf}
 	${_+_}@${ECHODIR} "===> ${_tool} (obj,depend,all,install)"; \
 	cd ${.CURDIR}/${_tool}; \
 	${CDMAKE} DIRPRFX=${_tool}/ obj; \
 	${CDMAKE} DIRPRFX=${_tool}/ depend; \
 	${CDMAKE} DIRPRFX=${_tool}/ all; \
 	${CDMAKE} DIRPRFX=${_tool}/ DESTDIR=${CDTMP} install
 .endfor
 
 _xb-build-tools: .PHONY
 	${_+_}@cd ${.CURDIR}; \
 	${CDBENV} ${MAKE} -f Makefile.inc1 ${NOFUN} build-tools
 
 _xb-cross-tools: .PHONY
 .for _tool in \
     ${_binutils} \
     ${_elftctools} \
     usr.bin/ar \
     ${_clang_libs} \
     ${_clang} \
     ${_cc}
 	${_+_}@${ECHODIR} "===> xdev ${_tool} (obj,depend,all)"; \
 	cd ${.CURDIR}/${_tool}; \
 	${CDMAKE} DIRPRFX=${_tool}/ obj; \
 	${CDMAKE} DIRPRFX=${_tool}/ depend; \
 	${CDMAKE} DIRPRFX=${_tool}/ all
 .endfor
 
 _xi-mtree: .PHONY
 	${_+_}@${ECHODIR} "mtree populating ${XDDESTDIR}"
 	mkdir -p ${XDDESTDIR}
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.root.dist \
 	    -p ${XDDESTDIR} >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \
 	    -p ${XDDESTDIR}/usr >/dev/null
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \
 	    -p ${XDDESTDIR}/usr/include >/dev/null
 .if ${MK_LIB32} != "no"
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib32.dist \
 	    -p ${XDDESTDIR}/usr >/dev/null
 .endif
 .if ${MK_TESTS} != "no"
 	mkdir -p ${XDDESTDIR}${TESTSBASE}
 	mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \
 	    -p ${XDDESTDIR}${TESTSBASE} >/dev/null
 .endif
 
 .ORDER: xdev-build _xi-mtree _xi-cross-tools _xi-includes _xi-libraries
 xdev-install: xdev-build _xi-mtree _xi-cross-tools _xi-includes _xi-libraries
 
 _xi-cross-tools: .PHONY
 	@echo "_xi-cross-tools"
 .for _tool in \
     ${_binutils} \
     ${_elftctools} \
     usr.bin/ar \
     ${_clang_libs} \
     ${_clang} \
     ${_cc}
 	${_+_}@${ECHODIR} "===> xdev ${_tool} (install)"; \
 	cd ${.CURDIR}/${_tool}; \
 	${CDMAKE} DIRPRFX=${_tool}/ install DESTDIR=${XDDESTDIR}
 .endfor
 
 _xi-includes: .PHONY
 	${_+_}cd ${.CURDIR}; ${CD2MAKE} -f Makefile.inc1 includes \
 		DESTDIR=${XDDESTDIR}
 
 _xi-libraries: .PHONY
 	${_+_}cd ${.CURDIR}; ${CD2MAKE} -f Makefile.inc1 libraries \
 		DESTDIR=${XDDESTDIR}
 
 xdev-links: .PHONY
 	${_+_}cd ${XDDESTDIR}/usr/bin; \
 	mkdir -p ../../../../usr/bin; \
 		for i in *; do \
 			ln -sf ../../${XDTP}/usr/bin/$$i \
 			    ../../../../usr/bin/${XDDIR}-$$i; \
 			ln -sf ../../${XDTP}/usr/bin/$$i \
 			    ../../../../usr/bin/${XDDIR}${OSREL}-$$i; \
 		done
 .else
 xdev xdev-build xdev-install xdev-links:
 	@echo "*** Error: Both TARGET and TARGET_ARCH must be defined for \"${.TARGET}\" target"
 .endif
Index: projects/release-pkg/contrib/llvm/projects/libunwind/src/AddressSpace.hpp
===================================================================
--- projects/release-pkg/contrib/llvm/projects/libunwind/src/AddressSpace.hpp	(revision 293335)
+++ projects/release-pkg/contrib/llvm/projects/libunwind/src/AddressSpace.hpp	(revision 293336)
@@ -1,597 +1,598 @@
 //===------------------------- AddressSpace.hpp ---------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is dual licensed under the MIT and the University of Illinois Open
 // Source Licenses. See LICENSE.TXT for details.
 //
 //
 // Abstracts accessing local vs remote address spaces.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef __ADDRESSSPACE_HPP__
 #define __ADDRESSSPACE_HPP__
 
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 #ifndef _LIBUNWIND_IS_BAREMETAL
 #include <dlfcn.h>
 #endif
 
 #ifdef __APPLE__
 #include <mach-o/getsect.h>
 namespace libunwind {
    bool checkKeyMgrRegisteredFDEs(uintptr_t targetAddr, void *&fde);
 }
 #endif
 
 #include "libunwind.h"
 #include "config.h"
 #include "dwarf2.h"
 #include "Registers.hpp"
 
 #if _LIBUNWIND_ARM_EHABI
 #if defined(__FreeBSD__)
 
+#include <sys/link_elf.h>
 typedef void *_Unwind_Ptr;
 
 #elif defined(__linux__)
 
 typedef long unsigned int *_Unwind_Ptr;
 extern "C" _Unwind_Ptr __gnu_Unwind_Find_exidx(_Unwind_Ptr addr, int *len);
 
 // Emulate the BSD dl_unwind_find_exidx API when on a GNU libdl system.
 #define dl_unwind_find_exidx __gnu_Unwind_Find_exidx
 
 #elif !defined(_LIBUNWIND_IS_BAREMETAL)
 #include <link.h>
 #else // !defined(_LIBUNWIND_IS_BAREMETAL)
 // When statically linked on bare-metal, the symbols for the EH table are looked
 // up without going through the dynamic loader.
 struct EHTEntry {
   uint32_t functionOffset;
   uint32_t unwindOpcodes;
 };
 extern EHTEntry __exidx_start;
 extern EHTEntry __exidx_end;
 #endif // !defined(_LIBUNWIND_IS_BAREMETAL)
 #endif // _LIBUNWIND_ARM_EHABI
 
 #if defined(__CloudABI__) || defined(__FreeBSD__) || defined(__linux__)
 #if _LIBUNWIND_SUPPORT_DWARF_UNWIND && _LIBUNWIND_SUPPORT_DWARF_INDEX
 #include <link.h>
 // Macro for machine-independent access to the ELF program headers. This
 // macro is not available on some systems (e.g., FreeBSD). On these
 // systems the data structures are just called Elf_XXX. Define ElfW()
 // locally.
 #if !defined(ElfW)
 #define ElfW(type) Elf_##type
 #endif
 #include "EHHeaderParser.hpp"
 #endif
 #endif
 
 namespace libunwind {
 
 /// Used by findUnwindSections() to return info about needed sections.
 struct UnwindInfoSections {
 #if _LIBUNWIND_SUPPORT_DWARF_UNWIND || _LIBUNWIND_SUPPORT_DWARF_INDEX ||       \
     _LIBUNWIND_SUPPORT_COMPACT_UNWIND
   // No dso_base for ARM EHABI.
   uintptr_t       dso_base;
 #endif
 #if _LIBUNWIND_SUPPORT_DWARF_UNWIND
   uintptr_t       dwarf_section;
   uintptr_t       dwarf_section_length;
 #endif
 #if _LIBUNWIND_SUPPORT_DWARF_INDEX
   uintptr_t       dwarf_index_section;
   uintptr_t       dwarf_index_section_length;
 #endif
 #if _LIBUNWIND_SUPPORT_COMPACT_UNWIND
   uintptr_t       compact_unwind_section;
   uintptr_t       compact_unwind_section_length;
 #endif
 #if _LIBUNWIND_ARM_EHABI
   uintptr_t       arm_section;
   uintptr_t       arm_section_length;
 #endif
 };
 
 
 /// LocalAddressSpace is used as a template parameter to UnwindCursor when
 /// unwinding a thread in the same process.  The wrappers compile away,
 /// making local unwinds fast.
 class __attribute__((visibility("hidden"))) LocalAddressSpace {
 public:
 #ifdef __LP64__
   typedef uint64_t pint_t;
   typedef int64_t  sint_t;
 #else
   typedef uint32_t pint_t;
   typedef int32_t  sint_t;
 #endif
   uint8_t         get8(pint_t addr) {
     uint8_t val;
     memcpy(&val, (void *)addr, sizeof(val));
     return val;
   }
   uint16_t         get16(pint_t addr) {
     uint16_t val;
     memcpy(&val, (void *)addr, sizeof(val));
     return val;
   }
   uint32_t         get32(pint_t addr) {
     uint32_t val;
     memcpy(&val, (void *)addr, sizeof(val));
     return val;
   }
   uint64_t         get64(pint_t addr) {
     uint64_t val;
     memcpy(&val, (void *)addr, sizeof(val));
     return val;
   }
   double           getDouble(pint_t addr) {
     double val;
     memcpy(&val, (void *)addr, sizeof(val));
     return val;
   }
   v128             getVector(pint_t addr) {
     v128 val;
     memcpy(&val, (void *)addr, sizeof(val));
     return val;
   }
   uintptr_t       getP(pint_t addr);
   static uint64_t getULEB128(pint_t &addr, pint_t end);
   static int64_t  getSLEB128(pint_t &addr, pint_t end);
 
   pint_t getEncodedP(pint_t &addr, pint_t end, uint8_t encoding,
                      pint_t datarelBase = 0);
   bool findFunctionName(pint_t addr, char *buf, size_t bufLen,
                         unw_word_t *offset);
   bool findUnwindSections(pint_t targetAddr, UnwindInfoSections &info);
   bool findOtherFDE(pint_t targetAddr, pint_t &fde);
 
   static LocalAddressSpace sThisAddressSpace;
 };
 
 inline uintptr_t LocalAddressSpace::getP(pint_t addr) {
 #ifdef __LP64__
   return get64(addr);
 #else
   return get32(addr);
 #endif
 }
 
 /// Read a ULEB128 into a 64-bit word.
 inline uint64_t LocalAddressSpace::getULEB128(pint_t &addr, pint_t end) {
   const uint8_t *p = (uint8_t *)addr;
   const uint8_t *pend = (uint8_t *)end;
   uint64_t result = 0;
   int bit = 0;
   do {
     uint64_t b;
 
     if (p == pend)
       _LIBUNWIND_ABORT("truncated uleb128 expression");
 
     b = *p & 0x7f;
 
     if (bit >= 64 || b << bit >> bit != b) {
       _LIBUNWIND_ABORT("malformed uleb128 expression");
     } else {
       result |= b << bit;
       bit += 7;
     }
   } while (*p++ >= 0x80);
   addr = (pint_t) p;
   return result;
 }
 
 /// Read a SLEB128 into a 64-bit word.
 inline int64_t LocalAddressSpace::getSLEB128(pint_t &addr, pint_t end) {
   const uint8_t *p = (uint8_t *)addr;
   const uint8_t *pend = (uint8_t *)end;
   int64_t result = 0;
   int bit = 0;
   uint8_t byte;
   do {
     if (p == pend)
       _LIBUNWIND_ABORT("truncated sleb128 expression");
     byte = *p++;
     result |= ((byte & 0x7f) << bit);
     bit += 7;
   } while (byte & 0x80);
   // sign extend negative numbers
   if ((byte & 0x40) != 0)
     result |= (-1LL) << bit;
   addr = (pint_t) p;
   return result;
 }
 
 inline LocalAddressSpace::pint_t
 LocalAddressSpace::getEncodedP(pint_t &addr, pint_t end, uint8_t encoding,
                                pint_t datarelBase) {
   pint_t startAddr = addr;
   const uint8_t *p = (uint8_t *)addr;
   pint_t result;
 
   // first get value
   switch (encoding & 0x0F) {
   case DW_EH_PE_ptr:
     result = getP(addr);
     p += sizeof(pint_t);
     addr = (pint_t) p;
     break;
   case DW_EH_PE_uleb128:
     result = (pint_t)getULEB128(addr, end);
     break;
   case DW_EH_PE_udata2:
     result = get16(addr);
     p += 2;
     addr = (pint_t) p;
     break;
   case DW_EH_PE_udata4:
     result = get32(addr);
     p += 4;
     addr = (pint_t) p;
     break;
   case DW_EH_PE_udata8:
     result = (pint_t)get64(addr);
     p += 8;
     addr = (pint_t) p;
     break;
   case DW_EH_PE_sleb128:
     result = (pint_t)getSLEB128(addr, end);
     break;
   case DW_EH_PE_sdata2:
     // Sign extend from signed 16-bit value.
     result = (pint_t)(int16_t)get16(addr);
     p += 2;
     addr = (pint_t) p;
     break;
   case DW_EH_PE_sdata4:
     // Sign extend from signed 32-bit value.
     result = (pint_t)(int32_t)get32(addr);
     p += 4;
     addr = (pint_t) p;
     break;
   case DW_EH_PE_sdata8:
     result = (pint_t)get64(addr);
     p += 8;
     addr = (pint_t) p;
     break;
   default:
     _LIBUNWIND_ABORT("unknown pointer encoding");
   }
 
   // then add relative offset
   switch (encoding & 0x70) {
   case DW_EH_PE_absptr:
     // do nothing
     break;
   case DW_EH_PE_pcrel:
     result += startAddr;
     break;
   case DW_EH_PE_textrel:
     _LIBUNWIND_ABORT("DW_EH_PE_textrel pointer encoding not supported");
     break;
   case DW_EH_PE_datarel:
     // DW_EH_PE_datarel is only valid in a few places, so the parameter has a
     // default value of 0, and we abort in the event that someone calls this
     // function with a datarelBase of 0 and DW_EH_PE_datarel encoding.
     if (datarelBase == 0)
       _LIBUNWIND_ABORT("DW_EH_PE_datarel is invalid with a datarelBase of 0");
     result += datarelBase;
     break;
   case DW_EH_PE_funcrel:
     _LIBUNWIND_ABORT("DW_EH_PE_funcrel pointer encoding not supported");
     break;
   case DW_EH_PE_aligned:
     _LIBUNWIND_ABORT("DW_EH_PE_aligned pointer encoding not supported");
     break;
   default:
     _LIBUNWIND_ABORT("unknown pointer encoding");
     break;
   }
 
   if (encoding & DW_EH_PE_indirect)
     result = getP(result);
 
   return result;
 }
 
 #ifdef __APPLE__ 
   struct dyld_unwind_sections
   {
     const struct mach_header*   mh;
     const void*                 dwarf_section;
     uintptr_t                   dwarf_section_length;
     const void*                 compact_unwind_section;
     uintptr_t                   compact_unwind_section_length;
   };
   #if (defined(__MAC_OS_X_VERSION_MIN_REQUIRED) \
                                  && (__MAC_OS_X_VERSION_MIN_REQUIRED >= 1070)) \
       || defined(__IPHONE_OS_VERSION_MIN_REQUIRED)
     // In 10.7.0 or later, libSystem.dylib implements this function.
     extern "C" bool _dyld_find_unwind_sections(void *, dyld_unwind_sections *);
   #else
     // In 10.6.x and earlier, we need to implement this functionality.
     static inline bool _dyld_find_unwind_sections(void* addr, 
                                                     dyld_unwind_sections* info) {
       // Find mach-o image containing address.
       Dl_info dlinfo;
       if (!dladdr(addr, &dlinfo))
         return false;
       const mach_header *mh = (const mach_header *)dlinfo.dli_saddr;
       
       // Find dwarf unwind section in that image.
       unsigned long size;
       const uint8_t *p = getsectiondata(mh, "__TEXT", "__eh_frame", &size);
       if (!p)
         return false;
       
       // Fill in return struct.
       info->mh = mh;
       info->dwarf_section = p;
       info->dwarf_section_length = size;
       info->compact_unwind_section = 0;
       info->compact_unwind_section_length = 0;
      
       return true;
     }
   #endif
 #endif
 
 inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr,
                                                   UnwindInfoSections &info) {
 #ifdef __APPLE__
   dyld_unwind_sections dyldInfo;
   if (_dyld_find_unwind_sections((void *)targetAddr, &dyldInfo)) {
     info.dso_base                      = (uintptr_t)dyldInfo.mh;
  #if _LIBUNWIND_SUPPORT_DWARF_UNWIND
     info.dwarf_section                 = (uintptr_t)dyldInfo.dwarf_section;
     info.dwarf_section_length          = dyldInfo.dwarf_section_length;
  #endif
     info.compact_unwind_section        = (uintptr_t)dyldInfo.compact_unwind_section;
     info.compact_unwind_section_length = dyldInfo.compact_unwind_section_length;
     return true;
   }
 #elif _LIBUNWIND_ARM_EHABI
  #ifdef _LIBUNWIND_IS_BAREMETAL
   // Bare metal is statically linked, so no need to ask the dynamic loader
   info.arm_section =        (uintptr_t)(&__exidx_start);
   info.arm_section_length = (uintptr_t)(&__exidx_end - &__exidx_start);
  #else
   int length = 0;
   info.arm_section = (uintptr_t) dl_unwind_find_exidx(
       (_Unwind_Ptr) targetAddr, &length);
   info.arm_section_length = (uintptr_t)length;
  #endif
   _LIBUNWIND_TRACE_UNWINDING("findUnwindSections: section %X length %x\n",
                              info.arm_section, info.arm_section_length);
   if (info.arm_section && info.arm_section_length)
     return true;
 #elif _LIBUNWIND_SUPPORT_DWARF_UNWIND
 #if _LIBUNWIND_SUPPORT_DWARF_INDEX
   struct dl_iterate_cb_data {
     LocalAddressSpace *addressSpace;
     UnwindInfoSections *sects;
     uintptr_t targetAddr;
   };
 
   dl_iterate_cb_data cb_data = {this, &info, targetAddr};
   int found = dl_iterate_phdr(
       [](struct dl_phdr_info *pinfo, size_t, void *data) -> int {
         auto cbdata = static_cast<dl_iterate_cb_data *>(data);
         size_t object_length;
         bool found_obj = false;
         bool found_hdr = false;
 
         assert(cbdata);
         assert(cbdata->sects);
 
         if (cbdata->targetAddr < pinfo->dlpi_addr) {
           return false;
         }
 
 #if !defined(Elf_Half)
         typedef ElfW(Half) Elf_Half;
 #endif
 #if !defined(Elf_Phdr)
         typedef ElfW(Phdr) Elf_Phdr;
 #endif
 
         for (Elf_Half i = 0; i < pinfo->dlpi_phnum; i++) {
           const Elf_Phdr *phdr = &pinfo->dlpi_phdr[i];
           if (phdr->p_type == PT_LOAD) {
             uintptr_t begin = pinfo->dlpi_addr + phdr->p_vaddr;
             uintptr_t end = begin + phdr->p_memsz;
             if (cbdata->targetAddr >= begin && cbdata->targetAddr < end) {
               cbdata->sects->dso_base = begin;
               object_length = phdr->p_memsz;
               found_obj = true;
             }
           } else if (phdr->p_type == PT_GNU_EH_FRAME) {
             EHHeaderParser<LocalAddressSpace>::EHHeaderInfo hdrInfo;
             uintptr_t eh_frame_hdr_start = pinfo->dlpi_addr + phdr->p_vaddr;
             cbdata->sects->dwarf_index_section = eh_frame_hdr_start;
             cbdata->sects->dwarf_index_section_length = phdr->p_memsz;
             EHHeaderParser<LocalAddressSpace>::decodeEHHdr(
                 *cbdata->addressSpace, eh_frame_hdr_start, phdr->p_memsz,
                 hdrInfo);
             cbdata->sects->dwarf_section = hdrInfo.eh_frame_ptr;
             found_hdr = true;
           }
         }
 
         if (found_obj && found_hdr) {
           cbdata->sects->dwarf_section_length = object_length;
           return true;
         } else {
           return false;
         }
       },
       &cb_data);
   return static_cast<bool>(found);
 #else
 #error "_LIBUNWIND_SUPPORT_DWARF_UNWIND requires _LIBUNWIND_SUPPORT_DWARF_INDEX on this platform."
 #endif
 #endif
 
   return false;
 }
 
 
 inline bool LocalAddressSpace::findOtherFDE(pint_t targetAddr, pint_t &fde) {
 #ifdef __APPLE__
   return checkKeyMgrRegisteredFDEs(targetAddr, *((void**)&fde));
 #else
   // TO DO: if OS has way to dynamically register FDEs, check that.
   (void)targetAddr;
   (void)fde;
   return false;
 #endif
 }
 
 inline bool LocalAddressSpace::findFunctionName(pint_t addr, char *buf,
                                                 size_t bufLen,
                                                 unw_word_t *offset) {
 #ifndef _LIBUNWIND_IS_BAREMETAL
   Dl_info dyldInfo;
   if (dladdr((void *)addr, &dyldInfo)) {
     if (dyldInfo.dli_sname != NULL) {
       snprintf(buf, bufLen, "%s", dyldInfo.dli_sname);
       *offset = (addr - (pint_t) dyldInfo.dli_saddr);
       return true;
     }
   }
 #endif
   return false;
 }
 
 
 
 #ifdef UNW_REMOTE
 
 /// OtherAddressSpace is used as a template parameter to UnwindCursor when
 /// unwinding a thread in the another process.  The other process can be a
 /// different endianness and a different pointer size which is handled by
 /// the P template parameter.
 template <typename P>
 class OtherAddressSpace {
 public:
   OtherAddressSpace(task_t task) : fTask(task) {}
 
   typedef typename P::uint_t pint_t;
 
   uint8_t   get8(pint_t addr);
   uint16_t  get16(pint_t addr);
   uint32_t  get32(pint_t addr);
   uint64_t  get64(pint_t addr);
   pint_t    getP(pint_t addr);
   uint64_t  getULEB128(pint_t &addr, pint_t end);
   int64_t   getSLEB128(pint_t &addr, pint_t end);
   pint_t    getEncodedP(pint_t &addr, pint_t end, uint8_t encoding,
                         pint_t datarelBase = 0);
   bool      findFunctionName(pint_t addr, char *buf, size_t bufLen,
                         unw_word_t *offset);
   bool      findUnwindSections(pint_t targetAddr, UnwindInfoSections &info);
   bool      findOtherFDE(pint_t targetAddr, pint_t &fde);
 private:
   void *localCopy(pint_t addr);
 
   task_t fTask;
 };
 
 template <typename P> uint8_t OtherAddressSpace<P>::get8(pint_t addr) {
   return *((uint8_t *)localCopy(addr));
 }
 
 template <typename P> uint16_t OtherAddressSpace<P>::get16(pint_t addr) {
   return P::E::get16(*(uint16_t *)localCopy(addr));
 }
 
 template <typename P> uint32_t OtherAddressSpace<P>::get32(pint_t addr) {
   return P::E::get32(*(uint32_t *)localCopy(addr));
 }
 
 template <typename P> uint64_t OtherAddressSpace<P>::get64(pint_t addr) {
   return P::E::get64(*(uint64_t *)localCopy(addr));
 }
 
 template <typename P>
 typename P::uint_t OtherAddressSpace<P>::getP(pint_t addr) {
   return P::getP(*(uint64_t *)localCopy(addr));
 }
 
 template <typename P>
 uint64_t OtherAddressSpace<P>::getULEB128(pint_t &addr, pint_t end) {
   uintptr_t size = (end - addr);
   LocalAddressSpace::pint_t laddr = (LocalAddressSpace::pint_t) localCopy(addr);
   LocalAddressSpace::pint_t sladdr = laddr;
   uint64_t result = LocalAddressSpace::getULEB128(laddr, laddr + size);
   addr += (laddr - sladdr);
   return result;
 }
 
 template <typename P>
 int64_t OtherAddressSpace<P>::getSLEB128(pint_t &addr, pint_t end) {
   uintptr_t size = (end - addr);
   LocalAddressSpace::pint_t laddr = (LocalAddressSpace::pint_t) localCopy(addr);
   LocalAddressSpace::pint_t sladdr = laddr;
   uint64_t result = LocalAddressSpace::getSLEB128(laddr, laddr + size);
   addr += (laddr - sladdr);
   return result;
 }
 
 template <typename P> void *OtherAddressSpace<P>::localCopy(pint_t addr) {
   // FIX ME
 }
 
 template <typename P>
 bool OtherAddressSpace<P>::findFunctionName(pint_t addr, char *buf,
                                             size_t bufLen, unw_word_t *offset) {
   // FIX ME
 }
 
 /// unw_addr_space is the base class that abstract unw_addr_space_t type in
 /// libunwind.h points to.
 struct unw_addr_space {
   cpu_type_t cpuType;
   task_t taskPort;
 };
 
 /// unw_addr_space_i386 is the concrete instance that a unw_addr_space_t points
 /// to when examining
 /// a 32-bit intel process.
 struct unw_addr_space_i386 : public unw_addr_space {
   unw_addr_space_i386(task_t task) : oas(task) {}
   OtherAddressSpace<Pointer32<LittleEndian> > oas;
 };
 
 /// unw_addr_space_x86_64 is the concrete instance that a unw_addr_space_t
 /// points to when examining
 /// a 64-bit intel process.
 struct unw_addr_space_x86_64 : public unw_addr_space {
   unw_addr_space_x86_64(task_t task) : oas(task) {}
   OtherAddressSpace<Pointer64<LittleEndian> > oas;
 };
 
 /// unw_addr_space_ppc is the concrete instance that a unw_addr_space_t points
 /// to when examining
 /// a 32-bit PowerPC process.
 struct unw_addr_space_ppc : public unw_addr_space {
   unw_addr_space_ppc(task_t task) : oas(task) {}
   OtherAddressSpace<Pointer32<BigEndian> > oas;
 };
 
 #endif // UNW_REMOTE
 
 } // namespace libunwind
 
 #endif // __ADDRESSSPACE_HPP__
Index: projects/release-pkg/contrib/llvm/projects/libunwind
===================================================================
--- projects/release-pkg/contrib/llvm/projects/libunwind	(revision 293335)
+++ projects/release-pkg/contrib/llvm/projects/libunwind	(revision 293336)

Property changes on: projects/release-pkg/contrib/llvm/projects/libunwind
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/llvm/projects/libunwind:r293196-293335
Index: projects/release-pkg/contrib/llvm
===================================================================
--- projects/release-pkg/contrib/llvm	(revision 293335)
+++ projects/release-pkg/contrib/llvm	(revision 293336)

Property changes on: projects/release-pkg/contrib/llvm
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/contrib/llvm:r293196-293335
Index: projects/release-pkg/etc/mtree/BSD.tests.dist
===================================================================
--- projects/release-pkg/etc/mtree/BSD.tests.dist	(revision 293335)
+++ projects/release-pkg/etc/mtree/BSD.tests.dist	(revision 293336)
@@ -1,630 +1,632 @@
 # $FreeBSD$
 #
 # Please see the file src/etc/mtree/README before making changes to this file.
 #
 
 /set type=dir uname=root gname=wheel mode=0755
 .
     bin
         cat
         ..
         chown
         ..
         date
         ..
         dd
         ..
         expr
         ..
         ls
         ..
         mv
         ..
         pax
         ..
         pkill
         ..
         sh
             builtins
             ..
             errors
             ..
             execution
             ..
             expansion
             ..
             parameters
             ..
             parser
             ..
             set-e
             ..
         ..
         sleep
         ..
         test
         ..
     ..
     cddl
         lib
         ..
         sbin
         ..
         usr.bin
         ..
         usr.sbin
             dtrace
                 common
                     aggs
                     ..
                     arithmetic
                     ..
                     arrays
                     ..
                     assocs
                     ..
                     begin
                     ..
                     bitfields
                     ..
                     buffering
                     ..
                     builtinvar
                     ..
                     cg
                     ..
                     clauses
                     ..
                     cpc
                     ..
                     decls
                     ..
                     drops
                     ..
                     dtraceUtil
                     ..
                     end
                     ..
                     enum
                     ..
                     error
                     ..
                     exit
                     ..
                     fbtprovider
                     ..
                     funcs
                     ..
                     grammar
                     ..
                     include
                     ..
                     inline
                     ..
                     io
                     ..
                     ip
                     ..
                     java_api
                     ..
                     json
                     ..
                     lexer
                     ..
                     llquantize
                     ..
                     mdb
                     ..
                     mib
                     ..
                     misc
                     ..
                     multiaggs
                     ..
                     offsetof
                     ..
                     operators
                     ..
                     pid
                     ..
                     plockstat
                     ..
                     pointers
                     ..
                     pragma
                     ..
                     predicates
                     ..
                     preprocessor
                     ..
                     print
                     ..
                     printa
                     ..
                     printf
                     ..
                     privs
                     ..
                     probes
                     ..
                     proc
                     ..
                     profile-n
                     ..
                     providers
                     ..
                     raise
                     ..
                     rates
                     ..
                     safety
                     ..
                     scalars
                     ..
                     sched
                     ..
                     scripting
                     ..
                     sdt
                     ..
                     sizeof
                     ..
                     speculation
                     ..
                     stability
                     ..
                     stack
                     ..
                     stackdepth
                     ..
                     stop
                     ..
                     strlen
                     ..
                     strtoll
                     ..
                     struct
                     ..
                     syscall
                     ..
                     sysevent
                     ..
                     tick-n
                     ..
                     trace
                     ..
                     tracemem
                     ..
                     translators
                     ..
                     typedef
                     ..
                     types
                     ..
                     uctf
                     ..
                     union
                     ..
                     usdt
                     ..
                     ustack
                     ..
                     vars
                     ..
                     version
                     ..
                 ..
             ..
         ..
     ..
     etc
         rc.d
         ..
     ..
     games
     ..
     gnu
         lib
         ..
         usr.bin
             diff
             ..
         ..
     ..
     lib
         atf
             libatf-c
                 detail
                 ..
             ..
             libatf-c++
                 detail
                 ..
             ..
             test-programs
             ..
         ..
         libarchive
         ..
         libc
             c063
             ..
             db
             ..
             gen
                 execve
                 ..
                 posix_spawn
                 ..
             ..
             hash
                 data
                 ..
             ..
             inet
             ..
             locale
             ..
             net
                 getaddrinfo
                     data
                     ..
                 ..
             ..
             nss
             ..
             regex
                 data
                 ..
             ..
             resolv
             ..
             rpc
             ..
             ssp
             ..
             stdio
             ..
             stdlib
             ..
             string
             ..
             sys
             ..
             time
             ..
             tls
                 dso
                 ..
             ..
             termios
             ..
             ttyio
             ..
         ..
         libcrypt
         ..
         libmp
         ..
         libnv
         ..
         libpam
         ..
         libproc
         ..
         librt
         ..
         libthr
             dlopen
             ..
         ..
         libutil
         ..
         libxo
         ..
         msun
         ..
     ..
     libexec
         atf
             atf-check
             ..
             atf-sh
             ..
         ..
         rtld-elf
         ..
     ..
     sbin
         dhclient
         ..
         devd
         ..
         growfs
         ..
         ifconfig
         ..
         mdconfig
         ..
     ..
     secure
         lib
         ..
         libexec
         ..
         usr.bin
         ..
         usr.sbin
         ..
     ..
     share
         examples
             tests
                 atf
                 ..
                 plain
                 ..
             ..
         ..
     ..
     sys
         acl
         ..
         aio
         ..
         fifo
         ..
         file
         ..
         kern
             acct
             ..
             execve
             ..
             pipe
             ..
         ..
         kqueue
         ..
         mac
             bsdextended
             ..
             portacl
             ..
         ..
         mqueue
         ..
         netinet
         ..
         opencrypto
         ..
         pjdfstest
             chflags
             ..
             chmod
             ..
             chown
             ..
             ftruncate
             ..
             granular
             ..
             link
             ..
             mkdir
             ..
             mkfifo
             ..
             mknod
             ..
             open
             ..
             rename
             ..
             rmdir
             ..
             symlink
             ..
             truncate
             ..
             unlink
             ..
         ..
         posixshm
         ..
         vfs
         ..
         vm
         ..
     ..
     usr.bin
         apply
         ..
         basename
         ..
         bmake
             archives
                 fmt_44bsd
                 ..
                 fmt_44bsd_mod
                 ..
                 fmt_oldbsd
                 ..
             ..
             basic
                 t0
                 ..
                 t1
                 ..
                 t2
                 ..
                 t3
                 ..
             ..
             execution
                 ellipsis
                 ..
                 empty
                 ..
                 joberr
                 ..
                 plus
                 ..
             ..
             shell
                 builtin
                 ..
                 meta
                 ..
                 path
                 ..
                 path_select
                 ..
                 replace
                 ..
                 select
                 ..
             ..
             suffixes
                 basic
                 ..
                 src_wild1
                 ..
                 src_wild2
                 ..
             ..
             syntax
                 directive-t0
                 ..
                 enl
                 ..
                 funny-targets
                 ..
                 semi
                 ..
             ..
             sysmk
                 t0
                     2
                         1
                         ..
                     ..
                     mk
                     ..
                 ..
                 t1
                     2
                         1
                         ..
                     ..
                     mk
                     ..
                 ..
                 t2
                     2
                         1
                         ..
                     ..
                     mk
                     ..
                 ..
             ..
             variables
                 modifier_M
                 ..
                 modifier_t
                 ..
                 opt_V
                 ..
                 t0
                 ..
             ..
         ..
         calendar
         ..
         cmp
         ..
         cpio
         ..
         col
         ..
         comm
         ..
         cut
         ..
         dirname
         ..
         file2c
         ..
         grep
         ..
         gzip
         ..
         ident
         ..
         join
         ..
         jot
         ..
         lastcomm
         ..
         limits
         ..
         m4
         ..
         mkimg
         ..
         ncal
         ..
         opensm
         ..
         printf
         ..
         sed
             regress.multitest.out
             ..
         ..
         soelim
         ..
         tar
         ..
         timeout
         ..
         tr
         ..
         truncate
         ..
         units
         ..
         uudecode
         ..
         uuencode
         ..
         xargs
         ..
         xo
         ..
         yacc
             yacc
             ..
         ..
     ..
     usr.sbin
         etcupdate
         ..
         fstyp
         ..
         makefs
         ..
         newsyslog
         ..
         nmtree
         ..
         pw
         ..
+        rpcbind
+        ..
         sa
         ..
     ..
 ..
 
 # vim: set expandtab ts=4 sw=4:
Index: projects/release-pkg/etc/rc
===================================================================
--- projects/release-pkg/etc/rc	(revision 293335)
+++ projects/release-pkg/etc/rc	(revision 293336)
@@ -1,146 +1,152 @@
 #!/bin/sh
 #
 # Copyright (c) 2000-2004  The FreeBSD Project
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
 # 1. Redistributions of source code must retain the above copyright
 #    notice, this list of conditions and the following disclaimer.
 # 2. Redistributions in binary form must reproduce the above copyright
 #    notice, this list of conditions and the following disclaimer in the
 #    documentation and/or other materials provided with the distribution.
 #
 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 # SUCH DAMAGE.
 #
 #	@(#)rc	5.27 (Berkeley) 6/5/91
 # $FreeBSD$
 #
 
 # System startup script run by init on autoboot
 # or after single-user.
 # Output and error are redirected to console by init,
 # and the console is the controlling terminal.
 
 # Note that almost all of the user-configurable behavior is no longer in
 # this file, but rather in /etc/defaults/rc.conf.  Please check that file
 # first before contemplating any changes here.  If you do need to change
 # this file for some reason, we would like to know about it.
 
 stty status '^T' 2> /dev/null
 
 # Set shell to ignore SIGINT (2), but not children;
 # shell catches SIGQUIT (3) and returns to single user.
 #
 trap : 2
 trap "echo 'Boot interrupted'; exit 1" 3
 
 HOME=/
 PATH=/sbin:/bin:/usr/sbin:/usr/bin
 export HOME PATH
 
 if [ "$1" = autoboot ]; then
 	autoboot=yes
 	_boot="faststart"
 	rc_fast=yes        # run_rc_command(): do fast booting
 else
 	autoboot=no
 	_boot="quietstart"
 fi
 
 dlv=`/sbin/sysctl -n vfs.nfs.diskless_valid 2> /dev/null`
 if [ ${dlv:=0} -ne 0 -o -f /etc/diskless ]; then
 	sh /etc/rc.initdiskless
 fi
 
 # Run these after determining whether we are booting diskless in order
 # to minimize the number of files that are needed on a diskless system,
 # and to make the configuration file variables available to rc itself.
 #
 . /etc/rc.subr
 load_rc_config
 
 # If we receive a SIGALRM, re-source /etc/rc.conf; this allows rc.d
 # scripts to perform "boot-time configuration" including enabling and
 # disabling rc.d scripts which appear later in the boot order.
 trap "_rc_conf_loaded=false; load_rc_config" ALRM
 
 skip="-s nostart"
 if [ `/sbin/sysctl -n security.jail.jailed` -eq 1 ]; then
 	skip="$skip -s nojail"
 	if [ `/sbin/sysctl -n security.jail.vnet` -ne 1 ]; then
 		skip="$skip -s nojailvnet"
 	fi
 fi
 
 # If the firstboot sentinel doesn't exist, we want to skip firstboot scripts.
 if ! [ -e ${firstboot_sentinel} ]; then
 	skip_firstboot="-s firstboot"
 fi
 
 # Do a first pass to get everything up to $early_late_divider so that
 # we can do a second pass that includes $local_startup directories
 #
 files=`rcorder ${skip} ${skip_firstboot} /etc/rc.d/* 2>/dev/null`
 
 _rc_elem_done=' '
 for _rc_elem in ${files}; do
 	run_rc_script ${_rc_elem} ${_boot}
 	_rc_elem_done="${_rc_elem_done}${_rc_elem} "
 
 	case "$_rc_elem" in
 	*/${early_late_divider})	break ;;
 	esac
 done
 
 unset files local_rc
 
 # Now that disks are mounted, for each dir in $local_startup
 # search for init scripts that use the new rc.d semantics.
 #
 case ${local_startup} in
 [Nn][Oo] | '') ;;
 *)	find_local_scripts_new ;;
 esac
 
 # The firstboot sentinel might be on a newly mounted filesystem; look for it
 # again and unset skip_firstboot if we find it.
 if [ -e ${firstboot_sentinel} ]; then
 	skip_firstboot=""
 fi
 
 files=`rcorder ${skip} ${skip_firstboot} /etc/rc.d/* ${local_rc} 2>/dev/null`
 for _rc_elem in ${files}; do
 	case "$_rc_elem_done" in
 	*" $_rc_elem "*)	continue ;;
 	esac
 
 	run_rc_script ${_rc_elem} ${_boot}
 done
 
 # Remove the firstboot sentinel, and reboot if it was requested.
+# Be a bit paranoid about removing it to handle the common failure
+# modes since the consequence of failure can be big.
+# Note: this assumes firstboot_sentinel is on / when we have
+# a read-only /, or that it is on media that's writable.
 if [ -e ${firstboot_sentinel} ]; then
 	[ ${root_rw_mount} = "yes" ] || mount -uw /
-	/bin/rm ${firstboot_sentinel}
+	chflags -R 0 ${firstboot_sentinel}
+	rm -rf ${firstboot_sentinel}
 	if [ -e ${firstboot_sentinel}-reboot ]; then
-		/bin/rm ${firstboot_sentinel}-reboot
+		chflags -R 0 ${firstboot_sentinel}-reboot
+		rm -rf ${firstboot_sentinel}-reboot
 		[ ${root_rw_mount} = "yes" ] || mount -ur /
 		kill -INT 1
 	fi
 	[ ${root_rw_mount} = "yes" ] || mount -ur /
 fi
 
 echo ''
 date
 exit 0
Index: projects/release-pkg/gnu/usr.bin/binutils/ld/Makefile
===================================================================
--- projects/release-pkg/gnu/usr.bin/binutils/ld/Makefile	(revision 293335)
+++ projects/release-pkg/gnu/usr.bin/binutils/ld/Makefile	(revision 293336)
@@ -1,76 +1,77 @@
 # $FreeBSD$
 
 ELF_SCR_EXT=	x xbn xc xd xdc xdw xn xr xs xsc xsw xu xw
 .include "../Makefile.inc0"
 .include <src.opts.mk>
 
 .PATH: ${SRCDIR}/ld
 
-PROG=	ld
+PROG=	ld.bfd
+MAN=	ld.1
 SCRIPTDIR= /usr/libdata/ldscripts
 SRCS+=	ldcref.c \
 	ldctor.c \
 	ldemul-list.h \
 	ldemul.c \
 	ldexp.c \
 	ldfile.c \
 	ldgram.y \
 	ldlang.c \
 	ldlex.l \
 	ldmain.c \
 	ldmisc.c \
 	ldver.c \
 	ldwrite.c \
 	lexsup.c \
 	mri.c
 
 CFLAGS+= -DTARGET=\"${TARGET_TUPLE}\"
 CFLAGS+= -DDEFAULT_EMULATION=\"${NATIVE_EMULATION}\"
 CFLAGS+= -DSCRIPTDIR=\"${TOOLS_PREFIX}/usr/libdata\"
 CFLAGS+= -DBFD_VERSION_STRING=\"${VERSION}\"
 CFLAGS+= -DBINDIR=\"${BINDIR}\"
 .if defined(TOOLS_PREFIX)
 CFLAGS+= -DTARGET_SYSTEM_ROOT=\"${TOOLS_PREFIX}\"
 .else
 CFLAGS+= -DTARGET_SYSTEM_ROOT=\"/\"
 .endif
 CFLAGS+= -DTOOLBINDIR=\"${TOOLS_PREFIX}/${BINDIR}/libexec\"
 CFLAGS+= -D_GNU_SOURCE
 CFLAGS+= -I${SRCDIR}/ld -I${SRCDIR}/bfd
 .if ${MK_SHARED_TOOLCHAIN} == "no"
 NO_SHARED?= yes
 .endif
 DPADD=	${RELTOP}/libbfd/libbfd.a
 DPADD+=	${RELTOP}/libiberty/libiberty.a
 LDADD=	${DPADD}
 CLEANDIRS+=	ldscripts
 CLEANFILES+=	ldemul-list.h stringify.sed
 
 FILES=		${LDSCRIPTS:S|^|ldscripts/|}
 FILESDIR=	${SCRIPTDIR}
-LINKS=		${BINDIR}/ld ${BINDIR}/ld.bfd
+LINKS=		${BINDIR}/ld.bfd ${BINDIR}/ld
 
 HOST=		${TARGET_TUPLE}
 LIBSEARCHPATH=	\"=/lib\":\"=/usr/lib\"
 .for ext in ${ELF_SCR_EXT}
 LDSCRIPTS+=	${NATIVE_EMULATION}.${ext}
 ldscripts/${NATIVE_EMULATION}.${ext}: e${NATIVE_EMULATION}.c
 .endfor
 
 EMXFR=
 EMLST=
 .for _e in ${NATIVE_EMULATION} ${EMS}
 EMXFR+=	extern ld_emulation_xfer_type ld_${_e}_emulation;
 EMLST+=	&ld_${_e}_emulation,
 .endfor
 
 ldemul-list.h:
 	echo "${EMXFR}" > ${.TARGET}
 	echo "#define EMULATION_LIST ${EMLST} 0" >> ${.TARGET}
 
 stringify.sed: ${SRCDIR}/ld/emultempl/astring.sed .NOMETA
 	ln -sf ${.ALLSRC} ${.TARGET}
 
 GENDIRDEPS_FILTER.host+= Nusr.bin/yacc
 
 .include <bsd.prog.mk>
Index: projects/release-pkg/gnu/usr.bin/binutils
===================================================================
--- projects/release-pkg/gnu/usr.bin/binutils	(revision 293335)
+++ projects/release-pkg/gnu/usr.bin/binutils	(revision 293336)

Property changes on: projects/release-pkg/gnu/usr.bin/binutils
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/gnu/usr.bin/binutils:r289091-289384,293171-293335
Index: projects/release-pkg/lib/libstand/Makefile
===================================================================
--- projects/release-pkg/lib/libstand/Makefile	(revision 293335)
+++ projects/release-pkg/lib/libstand/Makefile	(revision 293336)
@@ -1,153 +1,154 @@
 # $FreeBSD$
 # Originally from	$NetBSD: Makefile,v 1.21 1997/10/26 22:08:38 lukem Exp $
 #
 # Notes:
 # - We don't use the libc strerror/sys_errlist because the string table is
 #   quite large.
 #
 
 MK_PROFILE=	no
 MK_SSP=		no
 
 .include <src.opts.mk>
 
 LIBSTAND_SRC?=	${.CURDIR}
 LIBSTAND_CPUARCH?=${MACHINE_CPUARCH}
 LIBC_SRC=	${LIBSTAND_SRC}/../libc
 
 LIB=		stand
 NO_PIC=
 INCS=		stand.h
 MAN?=		libstand.3
 
 WARNS?=		0
 
 CFLAGS+= -I${LIBSTAND_SRC}
 
 # standalone components and stuff we have modified locally
 SRCS+=	gzguts.h zutil.h __main.c assert.c bcd.c bswap.c environment.c getopt.c gets.c \
 	globals.c pager.c printf.c strdup.c strerror.c strtol.c strtoul.c random.c \
 	sbrk.c twiddle.c zalloc.c zalloc_malloc.c
 
 # private (pruned) versions of libc string functions
 SRCS+=	strcasecmp.c
 
 .PATH: ${LIBC_SRC}/net
 
 SRCS+= ntoh.c
 
 # string functions from libc
 .PATH: ${LIBC_SRC}/string
-SRCS+=	bcmp.c bcopy.c bzero.c ffs.c memccpy.c memchr.c memcmp.c memcpy.c \
-	memmove.c memset.c qdivrem.c strcat.c strchr.c strcmp.c strcpy.c \
+SRCS+=	bcmp.c bcopy.c bzero.c ffs.c fls.c \
+	memccpy.c memchr.c memcmp.c memcpy.c memmove.c memset.c \
+	qdivrem.c strcat.c strchr.c strcmp.c strcpy.c \
 	strcspn.c strlcat.c strlcpy.c strlen.c strncat.c strncmp.c strncpy.c \
 	strpbrk.c strrchr.c strsep.c strspn.c strstr.c strtok.c swab.c
 .if ${MACHINE_CPUARCH} == "arm"
 .PATH: ${LIBC_SRC}/arm/gen
 
 # Do not generate movt/movw, because the relocation fixup for them does not
 # translate to the -Bsymbolic -pie format required by self_reloc() in loader(8).
 # Also, the fpu is not available in a standalone environment.
 CFLAGS.clang+=	-mllvm -arm-use-movt=0
 CFLAGS.clang+=	-mfpu=none
 
 # Compiler support functions
 .PATH: ${LIBSTAND_SRC}/../../contrib/compiler-rt/lib/builtins/
 # __clzsi2 and ctzsi2 for various builtin functions
 SRCS+=	clzsi2.c ctzsi2.c
 # Divide and modulus functions called by the compiler
 SRCS+=	 divmoddi4.c  divmodsi4.c  divdi3.c  divsi3.c  moddi3.c  modsi3.c
 SRCS+=	udivmoddi4.c udivmodsi4.c udivdi3.c udivsi3.c umoddi3.c umodsi3.c
 
 .PATH: ${LIBSTAND_SRC}/../../contrib/compiler-rt/lib/builtins/arm/
 SRCS+=	aeabi_idivmod.S aeabi_ldivmod.S aeabi_uidivmod.S aeabi_uldivmod.S
 SRCS+=	aeabi_memcmp.S aeabi_memcpy.S aeabi_memmove.S aeabi_memset.S
 .endif
 
 .if ${MACHINE_CPUARCH} == "aarch64"
 .PATH: ${LIBC_SRC}/aarch64/gen
 .endif
 
 .if ${MACHINE_CPUARCH} == "powerpc"
 .PATH: ${LIBC_SRC}/quad
 SRCS+=	ashldi3.c ashrdi3.c
 SRCS+=	syncicache.c
 .endif
 
 # uuid functions from libc
 .PATH: ${LIBC_SRC}/uuid
 SRCS+= uuid_create_nil.c uuid_equal.c uuid_from_string.c uuid_is_nil.c uuid_to_string.c
 
 # _setjmp/_longjmp
 .PATH: ${LIBSTAND_SRC}/${LIBSTAND_CPUARCH}
 SRCS+=	_setjmp.S
 
 # decompression functionality from libbz2
 # NOTE: to actually test this functionality after libbz2 upgrade compile
 # loader(8) with LOADER_BZIP2_SUPPORT defined
 .PATH: ${LIBSTAND_SRC}/../../contrib/bzip2
 CFLAGS+= -DBZ_NO_STDIO -DBZ_NO_COMPRESS
 SRCS+=	libstand_bzlib_private.h
 
 .for file in bzlib.c crctable.c decompress.c huffman.c randtable.c
 SRCS+=	_${file}
 CLEANFILES+=	_${file}
 
 _${file}: ${file}
 	sed "s|bzlib_private\.h|libstand_bzlib_private.h|" \
 	    ${.ALLSRC} > ${.TARGET}
 .endfor
 
 CLEANFILES+= libstand_bzlib_private.h
 libstand_bzlib_private.h: bzlib_private.h
 	sed -e 's|<stdlib.h>|"stand.h"|' \
 		${.ALLSRC} > ${.TARGET}
 
 # decompression functionality from libz
 .PATH: ${LIBSTAND_SRC}/../libz
 CFLAGS+=-DHAVE_MEMCPY -I${LIBSTAND_SRC}/../libz
 SRCS+=	adler32.c crc32.c libstand_zutil.h libstand_gzguts.h
 
 .for file in infback.c inffast.c inflate.c inftrees.c zutil.c
 SRCS+=	_${file}
 CLEANFILES+=	_${file}
 
 _${file}: ${file}
 	sed -e "s|zutil\.h|libstand_zutil.h|" \
 	    -e "s|gzguts\.h|libstand_gzguts.h|" \
 	    ${.ALLSRC} > ${.TARGET}
 .endfor
 
 # depend on stand.h being able to be included multiple times
 .for file in zutil.h gzguts.h
 CLEANFILES+= libstand_${file}
 libstand_${file}: ${file}
 	sed -e 's|<fcntl.h>|"stand.h"|' \
 	    -e 's|<stddef.h>|"stand.h"|' \
 	    -e 's|<string.h>|"stand.h"|' \
 	    -e 's|<stdio.h>|"stand.h"|' \
 	    -e 's|<stdlib.h>|"stand.h"|' \
 	    ${.ALLSRC} > ${.TARGET}
 .endfor
 
 # io routines
 SRCS+=	closeall.c dev.c ioctl.c nullfs.c stat.c \
 	fstat.c close.c lseek.c open.c read.c write.c readdir.c
 
 # network routines
 SRCS+=	arp.c ether.c inet_ntoa.c in_cksum.c net.c udp.c netif.c rpc.c
 
 # network info services:
 SRCS+=	bootp.c rarp.c bootparam.c
 
 # boot filesystems
 SRCS+=	ufs.c nfs.c cd9660.c tftp.c gzipfs.c bzipfs.c
 SRCS+=	dosfs.c ext2fs.c
 SRCS+=	splitfs.c
 SRCS+=	pkgfs.c
 .if ${MK_NAND} != "no"
 SRCS+=	nandfs.c
 .endif
 
 .include <bsd.stand.mk>
 .include <bsd.lib.mk>
Index: projects/release-pkg/lib/libsysdecode/Makefile.depend
===================================================================
--- projects/release-pkg/lib/libsysdecode/Makefile.depend	(nonexistent)
+++ projects/release-pkg/lib/libsysdecode/Makefile.depend	(revision 293336)
@@ -0,0 +1,22 @@
+# $FreeBSD$
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+	gnu/lib/csu \
+	gnu/lib/libgcc \
+	include \
+	include/rpc \
+	include/xlocale \
+	lib/${CSU_DIR} \
+	lib/libc \
+	lib/libcompiler_rt \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+ioctl.So: ioctl.c
+ioctl.o: ioctl.c
+ioctl.po: ioctl.c
+.endif

Property changes on: projects/release-pkg/lib/libsysdecode/Makefile.depend
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: projects/release-pkg/libexec/rtld-elf/rtld.c
===================================================================
--- projects/release-pkg/libexec/rtld-elf/rtld.c	(revision 293335)
+++ projects/release-pkg/libexec/rtld-elf/rtld.c	(revision 293336)
@@ -1,5119 +1,5119 @@
 /*-
  * Copyright 1996, 1997, 1998, 1999, 2000 John D. Polstra.
  * Copyright 2003 Alexander Kabaev <kan@FreeBSD.ORG>.
  * Copyright 2009-2012 Konstantin Belousov <kib@FreeBSD.ORG>.
  * Copyright 2012 John Marino <draco@marino.st>.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * Dynamic linker for ELF.
  *
  * John Polstra <jdp@polstra.com>.
  */
 
 #include <sys/param.h>
 #include <sys/mount.h>
 #include <sys/mman.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 #include <sys/utsname.h>
 #include <sys/ktrace.h>
 
 #include <dlfcn.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
 #include "debug.h"
 #include "rtld.h"
 #include "libmap.h"
 #include "paths.h"
 #include "rtld_tls.h"
 #include "rtld_printf.h"
 #include "notes.h"
 
 /* Types. */
 typedef void (*func_ptr_type)();
 typedef void * (*path_enum_proc) (const char *path, size_t len, void *arg);
 
 /*
  * Function declarations.
  */
 static const char *basename(const char *);
 static void digest_dynamic1(Obj_Entry *, int, const Elf_Dyn **,
     const Elf_Dyn **, const Elf_Dyn **);
 static void digest_dynamic2(Obj_Entry *, const Elf_Dyn *, const Elf_Dyn *,
     const Elf_Dyn *);
 static void digest_dynamic(Obj_Entry *, int);
 static Obj_Entry *digest_phdr(const Elf_Phdr *, int, caddr_t, const char *);
 static Obj_Entry *dlcheck(void *);
 static Obj_Entry *dlopen_object(const char *name, int fd, Obj_Entry *refobj,
     int lo_flags, int mode, RtldLockState *lockstate);
 static Obj_Entry *do_load_object(int, const char *, char *, struct stat *, int);
 static int do_search_info(const Obj_Entry *obj, int, struct dl_serinfo *);
 static bool donelist_check(DoneList *, const Obj_Entry *);
 static void errmsg_restore(char *);
 static char *errmsg_save(void);
 static void *fill_search_info(const char *, size_t, void *);
 static char *find_library(const char *, const Obj_Entry *, int *);
 static const char *gethints(bool);
 static void init_dag(Obj_Entry *);
 static void init_pagesizes(Elf_Auxinfo **aux_info);
 static void init_rtld(caddr_t, Elf_Auxinfo **);
 static void initlist_add_neededs(Needed_Entry *, Objlist *);
 static void initlist_add_objects(Obj_Entry *, Obj_Entry **, Objlist *);
 static void linkmap_add(Obj_Entry *);
 static void linkmap_delete(Obj_Entry *);
 static void load_filtees(Obj_Entry *, int flags, RtldLockState *);
 static void unload_filtees(Obj_Entry *);
 static int load_needed_objects(Obj_Entry *, int);
 static int load_preload_objects(void);
 static Obj_Entry *load_object(const char *, int fd, const Obj_Entry *, int);
 static void map_stacks_exec(RtldLockState *);
 static Obj_Entry *obj_from_addr(const void *);
 static void objlist_call_fini(Objlist *, Obj_Entry *, RtldLockState *);
 static void objlist_call_init(Objlist *, RtldLockState *);
 static void objlist_clear(Objlist *);
 static Objlist_Entry *objlist_find(Objlist *, const Obj_Entry *);
 static void objlist_init(Objlist *);
 static void objlist_push_head(Objlist *, Obj_Entry *);
 static void objlist_push_tail(Objlist *, Obj_Entry *);
 static void objlist_put_after(Objlist *, Obj_Entry *, Obj_Entry *);
 static void objlist_remove(Objlist *, Obj_Entry *);
 static int parse_libdir(const char *);
 static void *path_enumerate(const char *, path_enum_proc, void *);
 static int relocate_object_dag(Obj_Entry *root, bool bind_now,
     Obj_Entry *rtldobj, int flags, RtldLockState *lockstate);
 static int relocate_object(Obj_Entry *obj, bool bind_now, Obj_Entry *rtldobj,
     int flags, RtldLockState *lockstate);
 static int relocate_objects(Obj_Entry *, bool, Obj_Entry *, int,
     RtldLockState *);
 static int resolve_objects_ifunc(Obj_Entry *first, bool bind_now,
     int flags, RtldLockState *lockstate);
 static int rtld_dirname(const char *, char *);
 static int rtld_dirname_abs(const char *, char *);
 static void *rtld_dlopen(const char *name, int fd, int mode);
 static void rtld_exit(void);
 static char *search_library_path(const char *, const char *);
 static char *search_library_pathfds(const char *, const char *, int *);
 static const void **get_program_var_addr(const char *, RtldLockState *);
 static void set_program_var(const char *, const void *);
 static int symlook_default(SymLook *, const Obj_Entry *refobj);
 static int symlook_global(SymLook *, DoneList *);
 static void symlook_init_from_req(SymLook *, const SymLook *);
 static int symlook_list(SymLook *, const Objlist *, DoneList *);
 static int symlook_needed(SymLook *, const Needed_Entry *, DoneList *);
 static int symlook_obj1_sysv(SymLook *, const Obj_Entry *);
 static int symlook_obj1_gnu(SymLook *, const Obj_Entry *);
 static void trace_loaded_objects(Obj_Entry *);
 static void unlink_object(Obj_Entry *);
 static void unload_object(Obj_Entry *);
 static void unref_dag(Obj_Entry *);
 static void ref_dag(Obj_Entry *);
 static char *origin_subst_one(Obj_Entry *, char *, const char *,
     const char *, bool);
 static char *origin_subst(Obj_Entry *, char *);
 static bool obj_resolve_origin(Obj_Entry *obj);
 static void preinit_main(void);
 static int  rtld_verify_versions(const Objlist *);
 static int  rtld_verify_object_versions(Obj_Entry *);
 static void object_add_name(Obj_Entry *, const char *);
 static int  object_match_name(const Obj_Entry *, const char *);
 static void ld_utrace_log(int, void *, void *, size_t, int, const char *);
 static void rtld_fill_dl_phdr_info(const Obj_Entry *obj,
     struct dl_phdr_info *phdr_info);
 static uint32_t gnu_hash(const char *);
 static bool matched_symbol(SymLook *, const Obj_Entry *, Sym_Match_Result *,
     const unsigned long);
 
 void r_debug_state(struct r_debug *, struct link_map *) __noinline __exported;
 void _r_debug_postinit(struct link_map *) __noinline __exported;
 
 int __sys_openat(int, const char *, int, ...);
 
 /*
  * Data declarations.
  */
 static char *error_message;	/* Message for dlerror(), or NULL */
 struct r_debug r_debug __exported;	/* for GDB; */
 static bool libmap_disable;	/* Disable libmap */
 static bool ld_loadfltr;	/* Immediate filters processing */
 static char *libmap_override;	/* Maps to use in addition to libmap.conf */
 static bool trust;		/* False for setuid and setgid programs */
 static bool dangerous_ld_env;	/* True if environment variables have been
 				   used to affect the libraries loaded */
 static char *ld_bind_now;	/* Environment variable for immediate binding */
 static char *ld_debug;		/* Environment variable for debugging */
 static char *ld_library_path;	/* Environment variable for search path */
 static char *ld_library_dirs;	/* Environment variable for library descriptors */
 static char *ld_preload;	/* Environment variable for libraries to
 				   load first */
 static char *ld_elf_hints_path;	/* Environment variable for alternative hints path */
 static char *ld_tracing;	/* Called from ldd to print libs */
 static char *ld_utrace;		/* Use utrace() to log events. */
 static Obj_Entry *obj_list;	/* Head of linked list of shared objects */
 static Obj_Entry **obj_tail;	/* Link field of last object in list */
 static Obj_Entry *obj_main;	/* The main program shared object */
 static Obj_Entry obj_rtld;	/* The dynamic linker shared object */
 static unsigned int obj_count;	/* Number of objects in obj_list */
 static unsigned int obj_loads;	/* Number of objects in obj_list */
 
 static Objlist list_global =	/* Objects dlopened with RTLD_GLOBAL */
   STAILQ_HEAD_INITIALIZER(list_global);
 static Objlist list_main =	/* Objects loaded at program startup */
   STAILQ_HEAD_INITIALIZER(list_main);
 static Objlist list_fini =	/* Objects needing fini() calls */
   STAILQ_HEAD_INITIALIZER(list_fini);
 
 Elf_Sym sym_zero;		/* For resolving undefined weak refs. */
 
 #define GDB_STATE(s,m)	r_debug.r_state = s; r_debug_state(&r_debug,m);
 
 extern Elf_Dyn _DYNAMIC;
 #pragma weak _DYNAMIC
 #ifndef RTLD_IS_DYNAMIC
 #define	RTLD_IS_DYNAMIC()	(&_DYNAMIC != NULL)
 #endif
 
 int dlclose(void *) __exported;
 char *dlerror(void) __exported;
 void *dlopen(const char *, int) __exported;
 void *fdlopen(int, int) __exported;
 void *dlsym(void *, const char *) __exported;
 dlfunc_t dlfunc(void *, const char *) __exported;
 void *dlvsym(void *, const char *, const char *) __exported;
 int dladdr(const void *, Dl_info *) __exported;
 void dllockinit(void *, void *(*)(void *), void (*)(void *), void (*)(void *),
     void (*)(void *), void (*)(void *), void (*)(void *)) __exported;
 int dlinfo(void *, int , void *) __exported;
 int dl_iterate_phdr(__dl_iterate_hdr_callback, void *) __exported;
 int _rtld_addr_phdr(const void *, struct dl_phdr_info *) __exported;
 int _rtld_get_stack_prot(void) __exported;
 int _rtld_is_dlopened(void *) __exported;
 void _rtld_error(const char *, ...) __exported;
 
 int npagesizes, osreldate;
 size_t *pagesizes;
 
 long __stack_chk_guard[8] = {0, 0, 0, 0, 0, 0, 0, 0};
 
 static int stack_prot = PROT_READ | PROT_WRITE | RTLD_DEFAULT_STACK_EXEC;
 static int max_stack_flags;
 
 /*
  * Global declarations normally provided by crt1.  The dynamic linker is
  * not built with crt1, so we have to provide them ourselves.
  */
 char *__progname;
 char **environ;
 
 /*
  * Used to pass argc, argv to init functions.
  */
 int main_argc;
 char **main_argv;
 
 /*
  * Globals to control TLS allocation.
  */
 size_t tls_last_offset;		/* Static TLS offset of last module */
 size_t tls_last_size;		/* Static TLS size of last module */
 size_t tls_static_space;	/* Static TLS space allocated */
 size_t tls_static_max_align;
 int tls_dtv_generation = 1;	/* Used to detect when dtv size changes  */
 int tls_max_index = 1;		/* Largest module index allocated */
 
 bool ld_library_path_rpath = false;
 
 /*
  * Globals for path names, and such
  */
 char *ld_elf_hints_default = _PATH_ELF_HINTS;
 char *ld_path_libmap_conf = _PATH_LIBMAP_CONF;
 char *ld_path_rtld = _PATH_RTLD;
 char *ld_standard_library_path = STANDARD_LIBRARY_PATH;
 char *ld_env_prefix = LD_;
 
 /*
  * Fill in a DoneList with an allocation large enough to hold all of
  * the currently-loaded objects.  Keep this as a macro since it calls
  * alloca and we want that to occur within the scope of the caller.
  */
 #define donelist_init(dlp)					\
     ((dlp)->objs = alloca(obj_count * sizeof (dlp)->objs[0]),	\
     assert((dlp)->objs != NULL),				\
     (dlp)->num_alloc = obj_count,				\
     (dlp)->num_used = 0)
 
 #define	UTRACE_DLOPEN_START		1
 #define	UTRACE_DLOPEN_STOP		2
 #define	UTRACE_DLCLOSE_START		3
 #define	UTRACE_DLCLOSE_STOP		4
 #define	UTRACE_LOAD_OBJECT		5
 #define	UTRACE_UNLOAD_OBJECT		6
 #define	UTRACE_ADD_RUNDEP		7
 #define	UTRACE_PRELOAD_FINISHED		8
 #define	UTRACE_INIT_CALL		9
 #define	UTRACE_FINI_CALL		10
 #define	UTRACE_DLSYM_START		11
 #define	UTRACE_DLSYM_STOP		12
 
 struct utrace_rtld {
 	char sig[4];			/* 'RTLD' */
 	int event;
 	void *handle;
 	void *mapbase;			/* Used for 'parent' and 'init/fini' */
 	size_t mapsize;
 	int refcnt;			/* Used for 'mode' */
 	char name[MAXPATHLEN];
 };
 
 #define	LD_UTRACE(e, h, mb, ms, r, n) do {			\
 	if (ld_utrace != NULL)					\
 		ld_utrace_log(e, h, mb, ms, r, n);		\
 } while (0)
 
 static void
 ld_utrace_log(int event, void *handle, void *mapbase, size_t mapsize,
     int refcnt, const char *name)
 {
 	struct utrace_rtld ut;
 
 	ut.sig[0] = 'R';
 	ut.sig[1] = 'T';
 	ut.sig[2] = 'L';
 	ut.sig[3] = 'D';
 	ut.event = event;
 	ut.handle = handle;
 	ut.mapbase = mapbase;
 	ut.mapsize = mapsize;
 	ut.refcnt = refcnt;
 	bzero(ut.name, sizeof(ut.name));
 	if (name)
 		strlcpy(ut.name, name, sizeof(ut.name));
 	utrace(&ut, sizeof(ut));
 }
 
 #ifdef RTLD_VARIANT_ENV_NAMES
 /*
  * construct the env variable based on the type of binary that's
  * running.
  */
 static inline const char *
 _LD(const char *var)
 {
 	static char buffer[128];
 
 	strlcpy(buffer, ld_env_prefix, sizeof(buffer));
 	strlcat(buffer, var, sizeof(buffer));
 	return (buffer);
 }
 #else
 #define _LD(x)	LD_ x
 #endif
 
 /*
  * Main entry point for dynamic linking.  The first argument is the
  * stack pointer.  The stack is expected to be laid out as described
  * in the SVR4 ABI specification, Intel 386 Processor Supplement.
  * Specifically, the stack pointer points to a word containing
  * ARGC.  Following that in the stack is a null-terminated sequence
  * of pointers to argument strings.  Then comes a null-terminated
  * sequence of pointers to environment strings.  Finally, there is a
  * sequence of "auxiliary vector" entries.
  *
  * The second argument points to a place to store the dynamic linker's
  * exit procedure pointer and the third to a place to store the main
  * program's object.
  *
  * The return value is the main program's entry point.
  */
 func_ptr_type
 _rtld(Elf_Addr *sp, func_ptr_type *exit_proc, Obj_Entry **objp)
 {
     Elf_Auxinfo *aux_info[AT_COUNT];
     int i;
     int argc;
     char **argv;
     char **env;
     Elf_Auxinfo *aux;
     Elf_Auxinfo *auxp;
     const char *argv0;
     Objlist_Entry *entry;
     Obj_Entry *obj;
     Obj_Entry **preload_tail;
     Obj_Entry *last_interposer;
     Objlist initlist;
     RtldLockState lockstate;
     char *library_path_rpath;
     int mib[2];
     size_t len;
 
     /*
      * On entry, the dynamic linker itself has not been relocated yet.
      * Be very careful not to reference any global data until after
      * init_rtld has returned.  It is OK to reference file-scope statics
      * and string constants, and to call static and global functions.
      */
 
     /* Find the auxiliary vector on the stack. */
     argc = *sp++;
     argv = (char **) sp;
     sp += argc + 1;	/* Skip over arguments and NULL terminator */
     env = (char **) sp;
     while (*sp++ != 0)	/* Skip over environment, and NULL terminator */
 	;
     aux = (Elf_Auxinfo *) sp;
 
     /* Digest the auxiliary vector. */
     for (i = 0;  i < AT_COUNT;  i++)
 	aux_info[i] = NULL;
     for (auxp = aux;  auxp->a_type != AT_NULL;  auxp++) {
 	if (auxp->a_type < AT_COUNT)
 	    aux_info[auxp->a_type] = auxp;
     }
 
     /* Initialize and relocate ourselves. */
     assert(aux_info[AT_BASE] != NULL);
     init_rtld((caddr_t) aux_info[AT_BASE]->a_un.a_ptr, aux_info);
 
     __progname = obj_rtld.path;
     argv0 = argv[0] != NULL ? argv[0] : "(null)";
     environ = env;
     main_argc = argc;
     main_argv = argv;
 
     if (aux_info[AT_CANARY] != NULL &&
 	aux_info[AT_CANARY]->a_un.a_ptr != NULL) {
 	    i = aux_info[AT_CANARYLEN]->a_un.a_val;
 	    if (i > sizeof(__stack_chk_guard))
 		    i = sizeof(__stack_chk_guard);
 	    memcpy(__stack_chk_guard, aux_info[AT_CANARY]->a_un.a_ptr, i);
     } else {
 	mib[0] = CTL_KERN;
 	mib[1] = KERN_ARND;
 
 	len = sizeof(__stack_chk_guard);
 	if (sysctl(mib, 2, __stack_chk_guard, &len, NULL, 0) == -1 ||
 	    len != sizeof(__stack_chk_guard)) {
 		/* If sysctl was unsuccessful, use the "terminator canary". */
 		((unsigned char *)(void *)__stack_chk_guard)[0] = 0;
 		((unsigned char *)(void *)__stack_chk_guard)[1] = 0;
 		((unsigned char *)(void *)__stack_chk_guard)[2] = '\n';
 		((unsigned char *)(void *)__stack_chk_guard)[3] = 255;
 	}
     }
 
     trust = !issetugid();
 
-    md_abi_variant_hook(aux_info);
+/*  md_abi_variant_hook(aux_info); */
 
     ld_bind_now = getenv(_LD("BIND_NOW"));
     /* 
      * If the process is tainted, then we un-set the dangerous environment
      * variables.  The process will be marked as tainted until setuid(2)
      * is called.  If any child process calls setuid(2) we do not want any
      * future processes to honor the potentially un-safe variables.
      */
     if (!trust) {
 	if (unsetenv(_LD("PRELOAD")) || unsetenv(_LD("LIBMAP")) ||
 	    unsetenv(_LD("LIBRARY_PATH")) || unsetenv(_LD("LIBRARY_PATH_FDS")) ||
 	    unsetenv(_LD("LIBMAP_DISABLE")) ||
 	    unsetenv(_LD("DEBUG")) || unsetenv(_LD("ELF_HINTS_PATH")) ||
 	    unsetenv(_LD("LOADFLTR")) || unsetenv(_LD("LIBRARY_PATH_RPATH"))) {
 		_rtld_error("environment corrupt; aborting");
 		rtld_die();
 	}
     }
     ld_debug = getenv(_LD("DEBUG"));
     libmap_disable = getenv(_LD("LIBMAP_DISABLE")) != NULL;
     libmap_override = getenv(_LD("LIBMAP"));
     ld_library_path = getenv(_LD("LIBRARY_PATH"));
     ld_library_dirs = getenv(_LD("LIBRARY_PATH_FDS"));
     ld_preload = getenv(_LD("PRELOAD"));
     ld_elf_hints_path = getenv(_LD("ELF_HINTS_PATH"));
     ld_loadfltr = getenv(_LD("LOADFLTR")) != NULL;
     library_path_rpath = getenv(_LD("LIBRARY_PATH_RPATH"));
     if (library_path_rpath != NULL) {
 	    if (library_path_rpath[0] == 'y' ||
 		library_path_rpath[0] == 'Y' ||
 		library_path_rpath[0] == '1')
 		    ld_library_path_rpath = true;
 	    else
 		    ld_library_path_rpath = false;
     }
     dangerous_ld_env = libmap_disable || (libmap_override != NULL) ||
 	(ld_library_path != NULL) || (ld_preload != NULL) ||
 	(ld_elf_hints_path != NULL) || ld_loadfltr;
     ld_tracing = getenv(_LD("TRACE_LOADED_OBJECTS"));
     ld_utrace = getenv(_LD("UTRACE"));
 
     if ((ld_elf_hints_path == NULL) || strlen(ld_elf_hints_path) == 0)
 	ld_elf_hints_path = ld_elf_hints_default;
 
     if (ld_debug != NULL && *ld_debug != '\0')
 	debug = 1;
     dbg("%s is initialized, base address = %p", __progname,
 	(caddr_t) aux_info[AT_BASE]->a_un.a_ptr);
     dbg("RTLD dynamic = %p", obj_rtld.dynamic);
     dbg("RTLD pltgot  = %p", obj_rtld.pltgot);
 
     dbg("initializing thread locks");
     lockdflt_init();
 
     /*
      * Load the main program, or process its program header if it is
      * already loaded.
      */
     if (aux_info[AT_EXECFD] != NULL) {	/* Load the main program. */
 	int fd = aux_info[AT_EXECFD]->a_un.a_val;
 	dbg("loading main program");
 	obj_main = map_object(fd, argv0, NULL);
 	close(fd);
 	if (obj_main == NULL)
 	    rtld_die();
 	max_stack_flags = obj->stack_flags;
     } else {				/* Main program already loaded. */
 	const Elf_Phdr *phdr;
 	int phnum;
 	caddr_t entry;
 
 	dbg("processing main program's program header");
 	assert(aux_info[AT_PHDR] != NULL);
 	phdr = (const Elf_Phdr *) aux_info[AT_PHDR]->a_un.a_ptr;
 	assert(aux_info[AT_PHNUM] != NULL);
 	phnum = aux_info[AT_PHNUM]->a_un.a_val;
 	assert(aux_info[AT_PHENT] != NULL);
 	assert(aux_info[AT_PHENT]->a_un.a_val == sizeof(Elf_Phdr));
 	assert(aux_info[AT_ENTRY] != NULL);
 	entry = (caddr_t) aux_info[AT_ENTRY]->a_un.a_ptr;
 	if ((obj_main = digest_phdr(phdr, phnum, entry, argv0)) == NULL)
 	    rtld_die();
     }
 
     if (aux_info[AT_EXECPATH] != 0) {
 	    char *kexecpath;
 	    char buf[MAXPATHLEN];
 
 	    kexecpath = aux_info[AT_EXECPATH]->a_un.a_ptr;
 	    dbg("AT_EXECPATH %p %s", kexecpath, kexecpath);
 	    if (kexecpath[0] == '/')
 		    obj_main->path = kexecpath;
 	    else if (getcwd(buf, sizeof(buf)) == NULL ||
 		     strlcat(buf, "/", sizeof(buf)) >= sizeof(buf) ||
 		     strlcat(buf, kexecpath, sizeof(buf)) >= sizeof(buf))
 		    obj_main->path = xstrdup(argv0);
 	    else
 		    obj_main->path = xstrdup(buf);
     } else {
 	    dbg("No AT_EXECPATH");
 	    obj_main->path = xstrdup(argv0);
     }
     dbg("obj_main path %s", obj_main->path);
     obj_main->mainprog = true;
 
     if (aux_info[AT_STACKPROT] != NULL &&
       aux_info[AT_STACKPROT]->a_un.a_val != 0)
 	    stack_prot = aux_info[AT_STACKPROT]->a_un.a_val;
 
 #ifndef COMPAT_32BIT
     /*
      * Get the actual dynamic linker pathname from the executable if
      * possible.  (It should always be possible.)  That ensures that
      * gdb will find the right dynamic linker even if a non-standard
      * one is being used.
      */
     if (obj_main->interp != NULL &&
       strcmp(obj_main->interp, obj_rtld.path) != 0) {
 	free(obj_rtld.path);
 	obj_rtld.path = xstrdup(obj_main->interp);
         __progname = obj_rtld.path;
     }
 #endif
 
     digest_dynamic(obj_main, 0);
     dbg("%s valid_hash_sysv %d valid_hash_gnu %d dynsymcount %d",
 	obj_main->path, obj_main->valid_hash_sysv, obj_main->valid_hash_gnu,
 	obj_main->dynsymcount);
 
     linkmap_add(obj_main);
     linkmap_add(&obj_rtld);
 
     /* Link the main program into the list of objects. */
     *obj_tail = obj_main;
     obj_tail = &obj_main->next;
     obj_count++;
     obj_loads++;
 
     /* Initialize a fake symbol for resolving undefined weak references. */
     sym_zero.st_info = ELF_ST_INFO(STB_GLOBAL, STT_NOTYPE);
     sym_zero.st_shndx = SHN_UNDEF;
     sym_zero.st_value = -(uintptr_t)obj_main->relocbase;
 
     if (!libmap_disable)
         libmap_disable = (bool)lm_init(libmap_override);
 
     dbg("loading LD_PRELOAD libraries");
     if (load_preload_objects() == -1)
 	rtld_die();
     preload_tail = obj_tail;
 
     dbg("loading needed objects");
     if (load_needed_objects(obj_main, 0) == -1)
 	rtld_die();
 
     /* Make a list of all objects loaded at startup. */
     last_interposer = obj_main;
     for (obj = obj_list;  obj != NULL;  obj = obj->next) {
 	if (obj->z_interpose && obj != obj_main) {
 	    objlist_put_after(&list_main, last_interposer, obj);
 	    last_interposer = obj;
 	} else {
 	    objlist_push_tail(&list_main, obj);
 	}
     	obj->refcount++;
     }
 
     dbg("checking for required versions");
     if (rtld_verify_versions(&list_main) == -1 && !ld_tracing)
 	rtld_die();
 
     if (ld_tracing) {		/* We're done */
 	trace_loaded_objects(obj_main);
 	exit(0);
     }
 
     if (getenv(_LD("DUMP_REL_PRE")) != NULL) {
        dump_relocations(obj_main);
        exit (0);
     }
 
     /*
      * Processing tls relocations requires having the tls offsets
      * initialized.  Prepare offsets before starting initial
      * relocation processing.
      */
     dbg("initializing initial thread local storage offsets");
     STAILQ_FOREACH(entry, &list_main, link) {
 	/*
 	 * Allocate all the initial objects out of the static TLS
 	 * block even if they didn't ask for it.
 	 */
 	allocate_tls_offset(entry->obj);
     }
 
     if (relocate_objects(obj_main,
       ld_bind_now != NULL && *ld_bind_now != '\0',
       &obj_rtld, SYMLOOK_EARLY, NULL) == -1)
 	rtld_die();
 
     dbg("doing copy relocations");
     if (do_copy_relocations(obj_main) == -1)
 	rtld_die();
 
     if (getenv(_LD("DUMP_REL_POST")) != NULL) {
        dump_relocations(obj_main);
        exit (0);
     }
 
     /*
      * Setup TLS for main thread.  This must be done after the
      * relocations are processed, since tls initialization section
      * might be the subject for relocations.
      */
     dbg("initializing initial thread local storage");
     allocate_initial_tls(obj_list);
 
     dbg("initializing key program variables");
     set_program_var("__progname", argv[0] != NULL ? basename(argv[0]) : "");
     set_program_var("environ", env);
     set_program_var("__elf_aux_vector", aux);
 
     /* Make a list of init functions to call. */
     objlist_init(&initlist);
     initlist_add_objects(obj_list, preload_tail, &initlist);
 
     r_debug_state(NULL, &obj_main->linkmap); /* say hello to gdb! */
 
     map_stacks_exec(NULL);
 
     dbg("resolving ifuncs");
     if (resolve_objects_ifunc(obj_main,
       ld_bind_now != NULL && *ld_bind_now != '\0', SYMLOOK_EARLY,
       NULL) == -1)
 	rtld_die();
 
     if (!obj_main->crt_no_init) {
 	/*
 	 * Make sure we don't call the main program's init and fini
 	 * functions for binaries linked with old crt1 which calls
 	 * _init itself.
 	 */
 	obj_main->init = obj_main->fini = (Elf_Addr)NULL;
 	obj_main->preinit_array = obj_main->init_array =
 	    obj_main->fini_array = (Elf_Addr)NULL;
     }
 
     wlock_acquire(rtld_bind_lock, &lockstate);
     if (obj_main->crt_no_init)
 	preinit_main();
     objlist_call_init(&initlist, &lockstate);
     _r_debug_postinit(&obj_main->linkmap);
     objlist_clear(&initlist);
     dbg("loading filtees");
     for (obj = obj_list->next; obj != NULL; obj = obj->next) {
 	if (ld_loadfltr || obj->z_loadfltr)
 	    load_filtees(obj, 0, &lockstate);
     }
     lock_release(rtld_bind_lock, &lockstate);
 
     dbg("transferring control to program entry point = %p", obj_main->entry);
 
     /* Return the exit procedure and the program entry point. */
     *exit_proc = rtld_exit;
     *objp = obj_main;
     return (func_ptr_type) obj_main->entry;
 }
 
 void *
 rtld_resolve_ifunc(const Obj_Entry *obj, const Elf_Sym *def)
 {
 	void *ptr;
 	Elf_Addr target;
 
 	ptr = (void *)make_function_pointer(def, obj);
 	target = ((Elf_Addr (*)(void))ptr)();
 	return ((void *)target);
 }
 
 Elf_Addr
 _rtld_bind(Obj_Entry *obj, Elf_Size reloff)
 {
     const Elf_Rel *rel;
     const Elf_Sym *def;
     const Obj_Entry *defobj;
     Elf_Addr *where;
     Elf_Addr target;
     RtldLockState lockstate;
 
     rlock_acquire(rtld_bind_lock, &lockstate);
     if (sigsetjmp(lockstate.env, 0) != 0)
 	    lock_upgrade(rtld_bind_lock, &lockstate);
     if (obj->pltrel)
 	rel = (const Elf_Rel *) ((caddr_t) obj->pltrel + reloff);
     else
 	rel = (const Elf_Rel *) ((caddr_t) obj->pltrela + reloff);
 
     where = (Elf_Addr *) (obj->relocbase + rel->r_offset);
     def = find_symdef(ELF_R_SYM(rel->r_info), obj, &defobj, true, NULL,
 	&lockstate);
     if (def == NULL)
 	rtld_die();
     if (ELF_ST_TYPE(def->st_info) == STT_GNU_IFUNC)
 	target = (Elf_Addr)rtld_resolve_ifunc(defobj, def);
     else
 	target = (Elf_Addr)(defobj->relocbase + def->st_value);
 
     dbg("\"%s\" in \"%s\" ==> %p in \"%s\"",
       defobj->strtab + def->st_name, basename(obj->path),
       (void *)target, basename(defobj->path));
 
     /*
      * Write the new contents for the jmpslot. Note that depending on
      * architecture, the value which we need to return back to the
      * lazy binding trampoline may or may not be the target
      * address. The value returned from reloc_jmpslot() is the value
      * that the trampoline needs.
      */
     target = reloc_jmpslot(where, target, defobj, obj, rel);
     lock_release(rtld_bind_lock, &lockstate);
     return target;
 }
 
 /*
  * Error reporting function.  Use it like printf.  If formats the message
  * into a buffer, and sets things up so that the next call to dlerror()
  * will return the message.
  */
 void
 _rtld_error(const char *fmt, ...)
 {
     static char buf[512];
     va_list ap;
 
     va_start(ap, fmt);
     rtld_vsnprintf(buf, sizeof buf, fmt, ap);
     error_message = buf;
     va_end(ap);
 }
 
 /*
  * Return a dynamically-allocated copy of the current error message, if any.
  */
 static char *
 errmsg_save(void)
 {
     return error_message == NULL ? NULL : xstrdup(error_message);
 }
 
 /*
  * Restore the current error message from a copy which was previously saved
  * by errmsg_save().  The copy is freed.
  */
 static void
 errmsg_restore(char *saved_msg)
 {
     if (saved_msg == NULL)
 	error_message = NULL;
     else {
 	_rtld_error("%s", saved_msg);
 	free(saved_msg);
     }
 }
 
 static const char *
 basename(const char *name)
 {
     const char *p = strrchr(name, '/');
     return p != NULL ? p + 1 : name;
 }
 
 static struct utsname uts;
 
 static char *
 origin_subst_one(Obj_Entry *obj, char *real, const char *kw,
     const char *subst, bool may_free)
 {
 	char *p, *p1, *res, *resp;
 	int subst_len, kw_len, subst_count, old_len, new_len;
 
 	kw_len = strlen(kw);
 
 	/*
 	 * First, count the number of the keyword occurences, to
 	 * preallocate the final string.
 	 */
 	for (p = real, subst_count = 0;; p = p1 + kw_len, subst_count++) {
 		p1 = strstr(p, kw);
 		if (p1 == NULL)
 			break;
 	}
 
 	/*
 	 * If the keyword is not found, just return.
 	 *
 	 * Return non-substituted string if resolution failed.  We
 	 * cannot do anything more reasonable, the failure mode of the
 	 * caller is unresolved library anyway.
 	 */
 	if (subst_count == 0 || (obj != NULL && !obj_resolve_origin(obj)))
 		return (may_free ? real : xstrdup(real));
 	if (obj != NULL)
 		subst = obj->origin_path;
 
 	/*
 	 * There is indeed something to substitute.  Calculate the
 	 * length of the resulting string, and allocate it.
 	 */
 	subst_len = strlen(subst);
 	old_len = strlen(real);
 	new_len = old_len + (subst_len - kw_len) * subst_count;
 	res = xmalloc(new_len + 1);
 
 	/*
 	 * Now, execute the substitution loop.
 	 */
 	for (p = real, resp = res, *resp = '\0';;) {
 		p1 = strstr(p, kw);
 		if (p1 != NULL) {
 			/* Copy the prefix before keyword. */
 			memcpy(resp, p, p1 - p);
 			resp += p1 - p;
 			/* Keyword replacement. */
 			memcpy(resp, subst, subst_len);
 			resp += subst_len;
 			*resp = '\0';
 			p = p1 + kw_len;
 		} else
 			break;
 	}
 
 	/* Copy to the end of string and finish. */
 	strcat(resp, p);
 	if (may_free)
 		free(real);
 	return (res);
 }
 
 static char *
 origin_subst(Obj_Entry *obj, char *real)
 {
 	char *res1, *res2, *res3, *res4;
 
 	if (obj == NULL || !trust)
 		return (xstrdup(real));
 	if (uts.sysname[0] == '\0') {
 		if (uname(&uts) != 0) {
 			_rtld_error("utsname failed: %d", errno);
 			return (NULL);
 		}
 	}
 	res1 = origin_subst_one(obj, real, "$ORIGIN", NULL, false);
 	res2 = origin_subst_one(NULL, res1, "$OSNAME", uts.sysname, true);
 	res3 = origin_subst_one(NULL, res2, "$OSREL", uts.release, true);
 	res4 = origin_subst_one(NULL, res3, "$PLATFORM", uts.machine, true);
 	return (res4);
 }
 
 void
 rtld_die(void)
 {
     const char *msg = dlerror();
 
     if (msg == NULL)
 	msg = "Fatal error";
     rtld_fdputstr(STDERR_FILENO, msg);
     rtld_fdputchar(STDERR_FILENO, '\n');
     _exit(1);
 }
 
 /*
  * Process a shared object's DYNAMIC section, and save the important
  * information in its Obj_Entry structure.
  */
 static void
 digest_dynamic1(Obj_Entry *obj, int early, const Elf_Dyn **dyn_rpath,
     const Elf_Dyn **dyn_soname, const Elf_Dyn **dyn_runpath)
 {
     const Elf_Dyn *dynp;
     Needed_Entry **needed_tail = &obj->needed;
     Needed_Entry **needed_filtees_tail = &obj->needed_filtees;
     Needed_Entry **needed_aux_filtees_tail = &obj->needed_aux_filtees;
     const Elf_Hashelt *hashtab;
     const Elf32_Word *hashval;
     Elf32_Word bkt, nmaskwords;
     int bloom_size32;
     int plttype = DT_REL;
 
     *dyn_rpath = NULL;
     *dyn_soname = NULL;
     *dyn_runpath = NULL;
 
     obj->bind_now = false;
     for (dynp = obj->dynamic;  dynp->d_tag != DT_NULL;  dynp++) {
 	switch (dynp->d_tag) {
 
 	case DT_REL:
 	    obj->rel = (const Elf_Rel *) (obj->relocbase + dynp->d_un.d_ptr);
 	    break;
 
 	case DT_RELSZ:
 	    obj->relsize = dynp->d_un.d_val;
 	    break;
 
 	case DT_RELENT:
 	    assert(dynp->d_un.d_val == sizeof(Elf_Rel));
 	    break;
 
 	case DT_JMPREL:
 	    obj->pltrel = (const Elf_Rel *)
 	      (obj->relocbase + dynp->d_un.d_ptr);
 	    break;
 
 	case DT_PLTRELSZ:
 	    obj->pltrelsize = dynp->d_un.d_val;
 	    break;
 
 	case DT_RELA:
 	    obj->rela = (const Elf_Rela *) (obj->relocbase + dynp->d_un.d_ptr);
 	    break;
 
 	case DT_RELASZ:
 	    obj->relasize = dynp->d_un.d_val;
 	    break;
 
 	case DT_RELAENT:
 	    assert(dynp->d_un.d_val == sizeof(Elf_Rela));
 	    break;
 
 	case DT_PLTREL:
 	    plttype = dynp->d_un.d_val;
 	    assert(dynp->d_un.d_val == DT_REL || plttype == DT_RELA);
 	    break;
 
 	case DT_SYMTAB:
 	    obj->symtab = (const Elf_Sym *)
 	      (obj->relocbase + dynp->d_un.d_ptr);
 	    break;
 
 	case DT_SYMENT:
 	    assert(dynp->d_un.d_val == sizeof(Elf_Sym));
 	    break;
 
 	case DT_STRTAB:
 	    obj->strtab = (const char *) (obj->relocbase + dynp->d_un.d_ptr);
 	    break;
 
 	case DT_STRSZ:
 	    obj->strsize = dynp->d_un.d_val;
 	    break;
 
 	case DT_VERNEED:
 	    obj->verneed = (const Elf_Verneed *) (obj->relocbase +
 		dynp->d_un.d_val);
 	    break;
 
 	case DT_VERNEEDNUM:
 	    obj->verneednum = dynp->d_un.d_val;
 	    break;
 
 	case DT_VERDEF:
 	    obj->verdef = (const Elf_Verdef *) (obj->relocbase +
 		dynp->d_un.d_val);
 	    break;
 
 	case DT_VERDEFNUM:
 	    obj->verdefnum = dynp->d_un.d_val;
 	    break;
 
 	case DT_VERSYM:
 	    obj->versyms = (const Elf_Versym *)(obj->relocbase +
 		dynp->d_un.d_val);
 	    break;
 
 	case DT_HASH:
 	    {
 		hashtab = (const Elf_Hashelt *)(obj->relocbase +
 		    dynp->d_un.d_ptr);
 		obj->nbuckets = hashtab[0];
 		obj->nchains = hashtab[1];
 		obj->buckets = hashtab + 2;
 		obj->chains = obj->buckets + obj->nbuckets;
 		obj->valid_hash_sysv = obj->nbuckets > 0 && obj->nchains > 0 &&
 		  obj->buckets != NULL;
 	    }
 	    break;
 
 	case DT_GNU_HASH:
 	    {
 		hashtab = (const Elf_Hashelt *)(obj->relocbase +
 		    dynp->d_un.d_ptr);
 		obj->nbuckets_gnu = hashtab[0];
 		obj->symndx_gnu = hashtab[1];
 		nmaskwords = hashtab[2];
 		bloom_size32 = (__ELF_WORD_SIZE / 32) * nmaskwords;
 		obj->maskwords_bm_gnu = nmaskwords - 1;
 		obj->shift2_gnu = hashtab[3];
 		obj->bloom_gnu = (Elf_Addr *) (hashtab + 4);
 		obj->buckets_gnu = hashtab + 4 + bloom_size32;
 		obj->chain_zero_gnu = obj->buckets_gnu + obj->nbuckets_gnu -
 		  obj->symndx_gnu;
 		/* Number of bitmask words is required to be power of 2 */
 		obj->valid_hash_gnu = powerof2(nmaskwords) &&
 		    obj->nbuckets_gnu > 0 && obj->buckets_gnu != NULL;
 	    }
 	    break;
 
 	case DT_NEEDED:
 	    if (!obj->rtld) {
 		Needed_Entry *nep = NEW(Needed_Entry);
 		nep->name = dynp->d_un.d_val;
 		nep->obj = NULL;
 		nep->next = NULL;
 
 		*needed_tail = nep;
 		needed_tail = &nep->next;
 	    }
 	    break;
 
 	case DT_FILTER:
 	    if (!obj->rtld) {
 		Needed_Entry *nep = NEW(Needed_Entry);
 		nep->name = dynp->d_un.d_val;
 		nep->obj = NULL;
 		nep->next = NULL;
 
 		*needed_filtees_tail = nep;
 		needed_filtees_tail = &nep->next;
 	    }
 	    break;
 
 	case DT_AUXILIARY:
 	    if (!obj->rtld) {
 		Needed_Entry *nep = NEW(Needed_Entry);
 		nep->name = dynp->d_un.d_val;
 		nep->obj = NULL;
 		nep->next = NULL;
 
 		*needed_aux_filtees_tail = nep;
 		needed_aux_filtees_tail = &nep->next;
 	    }
 	    break;
 
 	case DT_PLTGOT:
 	    obj->pltgot = (Elf_Addr *) (obj->relocbase + dynp->d_un.d_ptr);
 	    break;
 
 	case DT_TEXTREL:
 	    obj->textrel = true;
 	    break;
 
 	case DT_SYMBOLIC:
 	    obj->symbolic = true;
 	    break;
 
 	case DT_RPATH:
 	    /*
 	     * We have to wait until later to process this, because we
 	     * might not have gotten the address of the string table yet.
 	     */
 	    *dyn_rpath = dynp;
 	    break;
 
 	case DT_SONAME:
 	    *dyn_soname = dynp;
 	    break;
 
 	case DT_RUNPATH:
 	    *dyn_runpath = dynp;
 	    break;
 
 	case DT_INIT:
 	    obj->init = (Elf_Addr) (obj->relocbase + dynp->d_un.d_ptr);
 	    break;
 
 	case DT_PREINIT_ARRAY:
 	    obj->preinit_array = (Elf_Addr)(obj->relocbase + dynp->d_un.d_ptr);
 	    break;
 
 	case DT_PREINIT_ARRAYSZ:
 	    obj->preinit_array_num = dynp->d_un.d_val / sizeof(Elf_Addr);
 	    break;
 
 	case DT_INIT_ARRAY:
 	    obj->init_array = (Elf_Addr)(obj->relocbase + dynp->d_un.d_ptr);
 	    break;
 
 	case DT_INIT_ARRAYSZ:
 	    obj->init_array_num = dynp->d_un.d_val / sizeof(Elf_Addr);
 	    break;
 
 	case DT_FINI:
 	    obj->fini = (Elf_Addr) (obj->relocbase + dynp->d_un.d_ptr);
 	    break;
 
 	case DT_FINI_ARRAY:
 	    obj->fini_array = (Elf_Addr)(obj->relocbase + dynp->d_un.d_ptr);
 	    break;
 
 	case DT_FINI_ARRAYSZ:
 	    obj->fini_array_num = dynp->d_un.d_val / sizeof(Elf_Addr);
 	    break;
 
 	/*
 	 * Don't process DT_DEBUG on MIPS as the dynamic section
 	 * is mapped read-only. DT_MIPS_RLD_MAP is used instead.
 	 */
 
 	case DT_DEBUG:
 	    if (!obj->writable_dynamic)
 		break;
 	    if (!early)
 		dbg("Filling in DT_DEBUG entry");
 	    ((Elf_Dyn*)dynp)->d_un.d_ptr = (Elf_Addr) &r_debug;
 	    break;
 
 	case DT_FLAGS:
 		if (dynp->d_un.d_val & DF_ORIGIN)
 		    obj->z_origin = true;
 		if (dynp->d_un.d_val & DF_SYMBOLIC)
 		    obj->symbolic = true;
 		if (dynp->d_un.d_val & DF_TEXTREL)
 		    obj->textrel = true;
 		if (dynp->d_un.d_val & DF_BIND_NOW)
 		    obj->bind_now = true;
 		/*if (dynp->d_un.d_val & DF_STATIC_TLS)
 		    ;*/
 	    break;
 #ifdef __mips__
 	case DT_MIPS_LOCAL_GOTNO:
 		obj->local_gotno = dynp->d_un.d_val;
 		break;
 
 	case DT_MIPS_SYMTABNO:
 		obj->symtabno = dynp->d_un.d_val;
 		break;
 
 	case DT_MIPS_GOTSYM:
 		obj->gotsym = dynp->d_un.d_val;
 		break;
 
 	case DT_MIPS_RLD_MAP:
 		*((Elf_Addr *)(dynp->d_un.d_ptr)) = (Elf_Addr) &r_debug;
 		break;
 #endif
 
 #ifdef __powerpc64__
 	case DT_PPC64_GLINK:
 		obj->glink = (Elf_Addr) (obj->relocbase + dynp->d_un.d_ptr);
 		break;
 #endif
 
 	case DT_FLAGS_1:
 		if (dynp->d_un.d_val & DF_1_NOOPEN)
 		    obj->z_noopen = true;
 		if (dynp->d_un.d_val & DF_1_ORIGIN)
 		    obj->z_origin = true;
 		if (dynp->d_un.d_val & DF_1_GLOBAL)
 		    obj->z_global = true;
 		if (dynp->d_un.d_val & DF_1_BIND_NOW)
 		    obj->bind_now = true;
 		if (dynp->d_un.d_val & DF_1_NODELETE)
 		    obj->z_nodelete = true;
 		if (dynp->d_un.d_val & DF_1_LOADFLTR)
 		    obj->z_loadfltr = true;
 		if (dynp->d_un.d_val & DF_1_INTERPOSE)
 		    obj->z_interpose = true;
 		if (dynp->d_un.d_val & DF_1_NODEFLIB)
 		    obj->z_nodeflib = true;
 	    break;
 
 	default:
 	    if (!early) {
 		dbg("Ignoring d_tag %ld = %#lx", (long)dynp->d_tag,
 		    (long)dynp->d_tag);
 	    }
 	    break;
 	}
     }
 
     obj->traced = false;
 
     if (plttype == DT_RELA) {
 	obj->pltrela = (const Elf_Rela *) obj->pltrel;
 	obj->pltrel = NULL;
 	obj->pltrelasize = obj->pltrelsize;
 	obj->pltrelsize = 0;
     }
 
     /* Determine size of dynsym table (equal to nchains of sysv hash) */
     if (obj->valid_hash_sysv)
 	obj->dynsymcount = obj->nchains;
     else if (obj->valid_hash_gnu) {
 	obj->dynsymcount = 0;
 	for (bkt = 0; bkt < obj->nbuckets_gnu; bkt++) {
 	    if (obj->buckets_gnu[bkt] == 0)
 		continue;
 	    hashval = &obj->chain_zero_gnu[obj->buckets_gnu[bkt]];
 	    do
 		obj->dynsymcount++;
 	    while ((*hashval++ & 1u) == 0);
 	}
 	obj->dynsymcount += obj->symndx_gnu;
     }
 }
 
 static bool
 obj_resolve_origin(Obj_Entry *obj)
 {
 
 	if (obj->origin_path != NULL)
 		return (true);
 	obj->origin_path = xmalloc(PATH_MAX);
 	return (rtld_dirname_abs(obj->path, obj->origin_path) != -1);
 }
 
 static void
 digest_dynamic2(Obj_Entry *obj, const Elf_Dyn *dyn_rpath,
     const Elf_Dyn *dyn_soname, const Elf_Dyn *dyn_runpath)
 {
 
 	if (obj->z_origin && !obj_resolve_origin(obj))
 		rtld_die();
 
 	if (dyn_runpath != NULL) {
 		obj->runpath = (char *)obj->strtab + dyn_runpath->d_un.d_val;
 		obj->runpath = origin_subst(obj, obj->runpath);
 	} else if (dyn_rpath != NULL) {
 		obj->rpath = (char *)obj->strtab + dyn_rpath->d_un.d_val;
 		obj->rpath = origin_subst(obj, obj->rpath);
 	}
 	if (dyn_soname != NULL)
 		object_add_name(obj, obj->strtab + dyn_soname->d_un.d_val);
 }
 
 static void
 digest_dynamic(Obj_Entry *obj, int early)
 {
 	const Elf_Dyn *dyn_rpath;
 	const Elf_Dyn *dyn_soname;
 	const Elf_Dyn *dyn_runpath;
 
 	digest_dynamic1(obj, early, &dyn_rpath, &dyn_soname, &dyn_runpath);
 	digest_dynamic2(obj, dyn_rpath, dyn_soname, dyn_runpath);
 }
 
 /*
  * Process a shared object's program header.  This is used only for the
  * main program, when the kernel has already loaded the main program
  * into memory before calling the dynamic linker.  It creates and
  * returns an Obj_Entry structure.
  */
 static Obj_Entry *
 digest_phdr(const Elf_Phdr *phdr, int phnum, caddr_t entry, const char *path)
 {
     Obj_Entry *obj;
     const Elf_Phdr *phlimit = phdr + phnum;
     const Elf_Phdr *ph;
     Elf_Addr note_start, note_end;
     int nsegs = 0;
 
     obj = obj_new();
     for (ph = phdr;  ph < phlimit;  ph++) {
 	if (ph->p_type != PT_PHDR)
 	    continue;
 
 	obj->phdr = phdr;
 	obj->phsize = ph->p_memsz;
 	obj->relocbase = (caddr_t)phdr - ph->p_vaddr;
 	break;
     }
 
     obj->stack_flags = PF_X | PF_R | PF_W;
 
     for (ph = phdr;  ph < phlimit;  ph++) {
 	switch (ph->p_type) {
 
 	case PT_INTERP:
 	    obj->interp = (const char *)(ph->p_vaddr + obj->relocbase);
 	    break;
 
 	case PT_LOAD:
 	    if (nsegs == 0) {	/* First load segment */
 		obj->vaddrbase = trunc_page(ph->p_vaddr);
 		obj->mapbase = obj->vaddrbase + obj->relocbase;
 		obj->textsize = round_page(ph->p_vaddr + ph->p_memsz) -
 		  obj->vaddrbase;
 	    } else {		/* Last load segment */
 		obj->mapsize = round_page(ph->p_vaddr + ph->p_memsz) -
 		  obj->vaddrbase;
 	    }
 	    nsegs++;
 	    break;
 
 	case PT_DYNAMIC:
 	    if (ph->p_flags & PROT_WRITE)
 		obj->writable_dynamic = true;
 	    obj->dynamic = (const Elf_Dyn *)(ph->p_vaddr + obj->relocbase);
 	    break;
 
 	case PT_TLS:
 	    obj->tlsindex = 1;
 	    obj->tlssize = ph->p_memsz;
 	    obj->tlsalign = ph->p_align;
 	    obj->tlsinitsize = ph->p_filesz;
 	    obj->tlsinit = (void*)(ph->p_vaddr + obj->relocbase);
 	    break;
 
 	case PT_GNU_STACK:
 	    obj->stack_flags = ph->p_flags;
 	    break;
 
 	case PT_GNU_RELRO:
 	    obj->relro_page = obj->relocbase + trunc_page(ph->p_vaddr);
 	    obj->relro_size = round_page(ph->p_memsz);
 	    break;
 
 	case PT_NOTE:
 	    note_start = (Elf_Addr)obj->relocbase + ph->p_vaddr;
 	    note_end = note_start + ph->p_filesz;
 	    digest_notes(obj, note_start, note_end);
 	    break;
 	}
     }
     if (nsegs < 1) {
 	_rtld_error("%s: too few PT_LOAD segments", path);
 	return NULL;
     }
 
     obj->entry = entry;
     return obj;
 }
 
 void
 digest_notes(Obj_Entry *obj, Elf_Addr note_start, Elf_Addr note_end)
 {
 	const Elf_Note *note;
 	const char *note_name;
 	uintptr_t p;
 
 	for (note = (const Elf_Note *)note_start; (Elf_Addr)note < note_end;
 	    note = (const Elf_Note *)((const char *)(note + 1) +
 	      roundup2(note->n_namesz, sizeof(Elf32_Addr)) +
 	      roundup2(note->n_descsz, sizeof(Elf32_Addr)))) {
 		if (note->n_namesz != sizeof(NOTE_FREEBSD_VENDOR) ||
 		    note->n_descsz != sizeof(int32_t))
 			continue;
 		if (note->n_type != NT_FREEBSD_ABI_TAG &&
 		    note->n_type != NT_FREEBSD_NOINIT_TAG)
 			continue;
 		note_name = (const char *)(note + 1);
 		if (strncmp(NOTE_FREEBSD_VENDOR, note_name,
 		    sizeof(NOTE_FREEBSD_VENDOR)) != 0)
 			continue;
 		switch (note->n_type) {
 		case NT_FREEBSD_ABI_TAG:
 			/* FreeBSD osrel note */
 			p = (uintptr_t)(note + 1);
 			p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
 			obj->osrel = *(const int32_t *)(p);
 			dbg("note osrel %d", obj->osrel);
 			break;
 		case NT_FREEBSD_NOINIT_TAG:
 			/* FreeBSD 'crt does not call init' note */
 			obj->crt_no_init = true;
 			dbg("note crt_no_init");
 			break;
 		}
 	}
 }
 
 static Obj_Entry *
 dlcheck(void *handle)
 {
     Obj_Entry *obj;
 
     for (obj = obj_list;  obj != NULL;  obj = obj->next)
 	if (obj == (Obj_Entry *) handle)
 	    break;
 
     if (obj == NULL || obj->refcount == 0 || obj->dl_refcount == 0) {
 	_rtld_error("Invalid shared object handle %p", handle);
 	return NULL;
     }
     return obj;
 }
 
 /*
  * If the given object is already in the donelist, return true.  Otherwise
  * add the object to the list and return false.
  */
 static bool
 donelist_check(DoneList *dlp, const Obj_Entry *obj)
 {
     unsigned int i;
 
     for (i = 0;  i < dlp->num_used;  i++)
 	if (dlp->objs[i] == obj)
 	    return true;
     /*
      * Our donelist allocation should always be sufficient.  But if
      * our threads locking isn't working properly, more shared objects
      * could have been loaded since we allocated the list.  That should
      * never happen, but we'll handle it properly just in case it does.
      */
     if (dlp->num_used < dlp->num_alloc)
 	dlp->objs[dlp->num_used++] = obj;
     return false;
 }
 
 /*
  * Hash function for symbol table lookup.  Don't even think about changing
  * this.  It is specified by the System V ABI.
  */
 unsigned long
 elf_hash(const char *name)
 {
     const unsigned char *p = (const unsigned char *) name;
     unsigned long h = 0;
     unsigned long g;
 
     while (*p != '\0') {
 	h = (h << 4) + *p++;
 	if ((g = h & 0xf0000000) != 0)
 	    h ^= g >> 24;
 	h &= ~g;
     }
     return h;
 }
 
 /*
  * The GNU hash function is the Daniel J. Bernstein hash clipped to 32 bits
  * unsigned in case it's implemented with a wider type.
  */
 static uint32_t
 gnu_hash(const char *s)
 {
 	uint32_t h;
 	unsigned char c;
 
 	h = 5381;
 	for (c = *s; c != '\0'; c = *++s)
 		h = h * 33 + c;
 	return (h & 0xffffffff);
 }
 
 
 /*
  * Find the library with the given name, and return its full pathname.
  * The returned string is dynamically allocated.  Generates an error
  * message and returns NULL if the library cannot be found.
  *
  * If the second argument is non-NULL, then it refers to an already-
  * loaded shared object, whose library search path will be searched.
  *
  * If a library is successfully located via LD_LIBRARY_PATH_FDS, its
  * descriptor (which is close-on-exec) will be passed out via the third
  * argument.
  *
  * The search order is:
  *   DT_RPATH in the referencing file _unless_ DT_RUNPATH is present (1)
  *   DT_RPATH of the main object if DSO without defined DT_RUNPATH (1)
  *   LD_LIBRARY_PATH
  *   DT_RUNPATH in the referencing file
  *   ldconfig hints (if -z nodefaultlib, filter out default library directories
  *	 from list)
  *   /lib:/usr/lib _unless_ the referencing file is linked with -z nodefaultlib
  *
  * (1) Handled in digest_dynamic2 - rpath left NULL if runpath defined.
  */
 static char *
 find_library(const char *xname, const Obj_Entry *refobj, int *fdp)
 {
     char *pathname;
     char *name;
     bool nodeflib, objgiven;
 
     objgiven = refobj != NULL;
     if (strchr(xname, '/') != NULL) {	/* Hard coded pathname */
 	if (xname[0] != '/' && !trust) {
 	    _rtld_error("Absolute pathname required for shared object \"%s\"",
 	      xname);
 	    return NULL;
 	}
 	return (origin_subst(__DECONST(Obj_Entry *, refobj),
 	  __DECONST(char *, xname)));
     }
 
     if (libmap_disable || !objgiven ||
 	(name = lm_find(refobj->path, xname)) == NULL)
 	name = (char *)xname;
 
     dbg(" Searching for \"%s\"", name);
 
     /*
      * If refobj->rpath != NULL, then refobj->runpath is NULL.  Fall
      * back to pre-conforming behaviour if user requested so with
      * LD_LIBRARY_PATH_RPATH environment variable and ignore -z
      * nodeflib.
      */
     if (objgiven && refobj->rpath != NULL && ld_library_path_rpath) {
 	if ((pathname = search_library_path(name, ld_library_path)) != NULL ||
 	  (refobj != NULL &&
 	  (pathname = search_library_path(name, refobj->rpath)) != NULL) ||
 	  (pathname = search_library_pathfds(name, ld_library_dirs, fdp)) != NULL ||
           (pathname = search_library_path(name, gethints(false))) != NULL ||
 	  (pathname = search_library_path(name, ld_standard_library_path)) != NULL)
 	    return (pathname);
     } else {
 	nodeflib = objgiven ? refobj->z_nodeflib : false;
 	if ((objgiven &&
 	  (pathname = search_library_path(name, refobj->rpath)) != NULL) ||
 	  (objgiven && refobj->runpath == NULL && refobj != obj_main &&
 	  (pathname = search_library_path(name, obj_main->rpath)) != NULL) ||
 	  (pathname = search_library_path(name, ld_library_path)) != NULL ||
 	  (objgiven &&
 	  (pathname = search_library_path(name, refobj->runpath)) != NULL) ||
 	  (pathname = search_library_pathfds(name, ld_library_dirs, fdp)) != NULL ||
 	  (pathname = search_library_path(name, gethints(nodeflib))) != NULL ||
 	  (objgiven && !nodeflib &&
 	  (pathname = search_library_path(name, ld_standard_library_path)) != NULL))
 	    return (pathname);
     }
 
     if (objgiven && refobj->path != NULL) {
 	_rtld_error("Shared object \"%s\" not found, required by \"%s\"",
 	  name, basename(refobj->path));
     } else {
 	_rtld_error("Shared object \"%s\" not found", name);
     }
     return NULL;
 }
 
 /*
  * Given a symbol number in a referencing object, find the corresponding
  * definition of the symbol.  Returns a pointer to the symbol, or NULL if
  * no definition was found.  Returns a pointer to the Obj_Entry of the
  * defining object via the reference parameter DEFOBJ_OUT.
  */
 const Elf_Sym *
 find_symdef(unsigned long symnum, const Obj_Entry *refobj,
     const Obj_Entry **defobj_out, int flags, SymCache *cache,
     RtldLockState *lockstate)
 {
     const Elf_Sym *ref;
     const Elf_Sym *def;
     const Obj_Entry *defobj;
     SymLook req;
     const char *name;
     int res;
 
     /*
      * If we have already found this symbol, get the information from
      * the cache.
      */
     if (symnum >= refobj->dynsymcount)
 	return NULL;	/* Bad object */
     if (cache != NULL && cache[symnum].sym != NULL) {
 	*defobj_out = cache[symnum].obj;
 	return cache[symnum].sym;
     }
 
     ref = refobj->symtab + symnum;
     name = refobj->strtab + ref->st_name;
     def = NULL;
     defobj = NULL;
 
     /*
      * We don't have to do a full scale lookup if the symbol is local.
      * We know it will bind to the instance in this load module; to
      * which we already have a pointer (ie ref). By not doing a lookup,
      * we not only improve performance, but it also avoids unresolvable
      * symbols when local symbols are not in the hash table. This has
      * been seen with the ia64 toolchain.
      */
     if (ELF_ST_BIND(ref->st_info) != STB_LOCAL) {
 	if (ELF_ST_TYPE(ref->st_info) == STT_SECTION) {
 	    _rtld_error("%s: Bogus symbol table entry %lu", refobj->path,
 		symnum);
 	}
 	symlook_init(&req, name);
 	req.flags = flags;
 	req.ventry = fetch_ventry(refobj, symnum);
 	req.lockstate = lockstate;
 	res = symlook_default(&req, refobj);
 	if (res == 0) {
 	    def = req.sym_out;
 	    defobj = req.defobj_out;
 	}
     } else {
 	def = ref;
 	defobj = refobj;
     }
 
     /*
      * If we found no definition and the reference is weak, treat the
      * symbol as having the value zero.
      */
     if (def == NULL && ELF_ST_BIND(ref->st_info) == STB_WEAK) {
 	def = &sym_zero;
 	defobj = obj_main;
     }
 
     if (def != NULL) {
 	*defobj_out = defobj;
 	/* Record the information in the cache to avoid subsequent lookups. */
 	if (cache != NULL) {
 	    cache[symnum].sym = def;
 	    cache[symnum].obj = defobj;
 	}
     } else {
 	if (refobj != &obj_rtld)
 	    _rtld_error("%s: Undefined symbol \"%s\"", refobj->path, name);
     }
     return def;
 }
 
 /*
  * Return the search path from the ldconfig hints file, reading it if
  * necessary.  If nostdlib is true, then the default search paths are
  * not added to result.
  *
  * Returns NULL if there are problems with the hints file,
  * or if the search path there is empty.
  */
 static const char *
 gethints(bool nostdlib)
 {
 	static char *hints, *filtered_path;
 	struct elfhints_hdr hdr;
 	struct fill_search_info_args sargs, hargs;
 	struct dl_serinfo smeta, hmeta, *SLPinfo, *hintinfo;
 	struct dl_serpath *SLPpath, *hintpath;
 	char *p;
 	unsigned int SLPndx, hintndx, fndx, fcount;
 	int fd;
 	size_t flen;
 	bool skip;
 
 	/* First call, read the hints file */
 	if (hints == NULL) {
 		/* Keep from trying again in case the hints file is bad. */
 		hints = "";
 
 		if ((fd = open(ld_elf_hints_path, O_RDONLY | O_CLOEXEC)) == -1)
 			return (NULL);
 		if (read(fd, &hdr, sizeof hdr) != sizeof hdr ||
 		    hdr.magic != ELFHINTS_MAGIC ||
 		    hdr.version != 1) {
 			close(fd);
 			return (NULL);
 		}
 		p = xmalloc(hdr.dirlistlen + 1);
 		if (lseek(fd, hdr.strtab + hdr.dirlist, SEEK_SET) == -1 ||
 		    read(fd, p, hdr.dirlistlen + 1) !=
 		    (ssize_t)hdr.dirlistlen + 1) {
 			free(p);
 			close(fd);
 			return (NULL);
 		}
 		hints = p;
 		close(fd);
 	}
 
 	/*
 	 * If caller agreed to receive list which includes the default
 	 * paths, we are done. Otherwise, if we still did not
 	 * calculated filtered result, do it now.
 	 */
 	if (!nostdlib)
 		return (hints[0] != '\0' ? hints : NULL);
 	if (filtered_path != NULL)
 		goto filt_ret;
 
 	/*
 	 * Obtain the list of all configured search paths, and the
 	 * list of the default paths.
 	 *
 	 * First estimate the size of the results.
 	 */
 	smeta.dls_size = __offsetof(struct dl_serinfo, dls_serpath);
 	smeta.dls_cnt = 0;
 	hmeta.dls_size = __offsetof(struct dl_serinfo, dls_serpath);
 	hmeta.dls_cnt = 0;
 
 	sargs.request = RTLD_DI_SERINFOSIZE;
 	sargs.serinfo = &smeta;
 	hargs.request = RTLD_DI_SERINFOSIZE;
 	hargs.serinfo = &hmeta;
 
 	path_enumerate(ld_standard_library_path, fill_search_info, &sargs);
 	path_enumerate(p, fill_search_info, &hargs);
 
 	SLPinfo = xmalloc(smeta.dls_size);
 	hintinfo = xmalloc(hmeta.dls_size);
 
 	/*
 	 * Next fetch both sets of paths.
 	 */
 	sargs.request = RTLD_DI_SERINFO;
 	sargs.serinfo = SLPinfo;
 	sargs.serpath = &SLPinfo->dls_serpath[0];
 	sargs.strspace = (char *)&SLPinfo->dls_serpath[smeta.dls_cnt];
 
 	hargs.request = RTLD_DI_SERINFO;
 	hargs.serinfo = hintinfo;
 	hargs.serpath = &hintinfo->dls_serpath[0];
 	hargs.strspace = (char *)&hintinfo->dls_serpath[hmeta.dls_cnt];
 
 	path_enumerate(ld_standard_library_path, fill_search_info, &sargs);
 	path_enumerate(p, fill_search_info, &hargs);
 
 	/*
 	 * Now calculate the difference between two sets, by excluding
 	 * standard paths from the full set.
 	 */
 	fndx = 0;
 	fcount = 0;
 	filtered_path = xmalloc(hdr.dirlistlen + 1);
 	hintpath = &hintinfo->dls_serpath[0];
 	for (hintndx = 0; hintndx < hmeta.dls_cnt; hintndx++, hintpath++) {
 		skip = false;
 		SLPpath = &SLPinfo->dls_serpath[0];
 		/*
 		 * Check each standard path against current.
 		 */
 		for (SLPndx = 0; SLPndx < smeta.dls_cnt; SLPndx++, SLPpath++) {
 			/* matched, skip the path */
 			if (!strcmp(hintpath->dls_name, SLPpath->dls_name)) {
 				skip = true;
 				break;
 			}
 		}
 		if (skip)
 			continue;
 		/*
 		 * Not matched against any standard path, add the path
 		 * to result. Separate consequtive paths with ':'.
 		 */
 		if (fcount > 0) {
 			filtered_path[fndx] = ':';
 			fndx++;
 		}
 		fcount++;
 		flen = strlen(hintpath->dls_name);
 		strncpy((filtered_path + fndx),	hintpath->dls_name, flen);
 		fndx += flen;
 	}
 	filtered_path[fndx] = '\0';
 
 	free(SLPinfo);
 	free(hintinfo);
 
 filt_ret:
 	return (filtered_path[0] != '\0' ? filtered_path : NULL);
 }
 
 static void
 init_dag(Obj_Entry *root)
 {
     const Needed_Entry *needed;
     const Objlist_Entry *elm;
     DoneList donelist;
 
     if (root->dag_inited)
 	return;
     donelist_init(&donelist);
 
     /* Root object belongs to own DAG. */
     objlist_push_tail(&root->dldags, root);
     objlist_push_tail(&root->dagmembers, root);
     donelist_check(&donelist, root);
 
     /*
      * Add dependencies of root object to DAG in breadth order
      * by exploiting the fact that each new object get added
      * to the tail of the dagmembers list.
      */
     STAILQ_FOREACH(elm, &root->dagmembers, link) {
 	for (needed = elm->obj->needed; needed != NULL; needed = needed->next) {
 	    if (needed->obj == NULL || donelist_check(&donelist, needed->obj))
 		continue;
 	    objlist_push_tail(&needed->obj->dldags, root);
 	    objlist_push_tail(&root->dagmembers, needed->obj);
 	}
     }
     root->dag_inited = true;
 }
 
 static void
 process_z(Obj_Entry *root)
 {
 	const Objlist_Entry *elm;
 	Obj_Entry *obj;
 
 	/*
 	 * Walk over object DAG and process every dependent object
 	 * that is marked as DF_1_NODELETE or DF_1_GLOBAL. They need
 	 * to grow their own DAG.
 	 *
 	 * For DF_1_GLOBAL, DAG is required for symbol lookups in
 	 * symlook_global() to work.
 	 *
 	 * For DF_1_NODELETE, the DAG should have its reference upped.
 	 */
 	STAILQ_FOREACH(elm, &root->dagmembers, link) {
 		obj = elm->obj;
 		if (obj == NULL)
 			continue;
 		if (obj->z_nodelete && !obj->ref_nodel) {
 			dbg("obj %s -z nodelete", obj->path);
 			init_dag(obj);
 			ref_dag(obj);
 			obj->ref_nodel = true;
 		}
 		if (obj->z_global && objlist_find(&list_global, obj) == NULL) {
 			dbg("obj %s -z global", obj->path);
 			objlist_push_tail(&list_global, obj);
 			init_dag(obj);
 		}
 	}
 }
 /*
  * Initialize the dynamic linker.  The argument is the address at which
  * the dynamic linker has been mapped into memory.  The primary task of
  * this function is to relocate the dynamic linker.
  */
 static void
 init_rtld(caddr_t mapbase, Elf_Auxinfo **aux_info)
 {
     Obj_Entry objtmp;	/* Temporary rtld object */
     const Elf_Dyn *dyn_rpath;
     const Elf_Dyn *dyn_soname;
     const Elf_Dyn *dyn_runpath;
 
 #ifdef RTLD_INIT_PAGESIZES_EARLY
     /* The page size is required by the dynamic memory allocator. */
     init_pagesizes(aux_info);
 #endif
 
     /*
      * Conjure up an Obj_Entry structure for the dynamic linker.
      *
      * The "path" member can't be initialized yet because string constants
      * cannot yet be accessed. Below we will set it correctly.
      */
     memset(&objtmp, 0, sizeof(objtmp));
     objtmp.path = NULL;
     objtmp.rtld = true;
     objtmp.mapbase = mapbase;
 #ifdef PIC
     objtmp.relocbase = mapbase;
 #endif
     if (RTLD_IS_DYNAMIC()) {
 	objtmp.dynamic = rtld_dynamic(&objtmp);
 	digest_dynamic1(&objtmp, 1, &dyn_rpath, &dyn_soname, &dyn_runpath);
 	assert(objtmp.needed == NULL);
 #if !defined(__mips__)
 	/* MIPS has a bogus DT_TEXTREL. */
 	assert(!objtmp.textrel);
 #endif
 
 	/*
 	 * Temporarily put the dynamic linker entry into the object list, so
 	 * that symbols can be found.
 	 */
 
 	relocate_objects(&objtmp, true, &objtmp, 0, NULL);
     }
 
     /* Initialize the object list. */
     obj_tail = &obj_list;
 
     /* Now that non-local variables can be accesses, copy out obj_rtld. */
     memcpy(&obj_rtld, &objtmp, sizeof(obj_rtld));
 
 #ifndef RTLD_INIT_PAGESIZES_EARLY
     /* The page size is required by the dynamic memory allocator. */
     init_pagesizes(aux_info);
 #endif
 
     if (aux_info[AT_OSRELDATE] != NULL)
 	    osreldate = aux_info[AT_OSRELDATE]->a_un.a_val;
 
     digest_dynamic2(&obj_rtld, dyn_rpath, dyn_soname, dyn_runpath);
 
     /* Replace the path with a dynamically allocated copy. */
     obj_rtld.path = xstrdup(ld_path_rtld);
 
     r_debug.r_brk = r_debug_state;
     r_debug.r_state = RT_CONSISTENT;
 }
 
 /*
  * Retrieve the array of supported page sizes.  The kernel provides the page
  * sizes in increasing order.
  */
 static void
 init_pagesizes(Elf_Auxinfo **aux_info)
 {
 	static size_t psa[MAXPAGESIZES];
 	int mib[2];
 	size_t len, size;
 
 	if (aux_info[AT_PAGESIZES] != NULL && aux_info[AT_PAGESIZESLEN] !=
 	    NULL) {
 		size = aux_info[AT_PAGESIZESLEN]->a_un.a_val;
 		pagesizes = aux_info[AT_PAGESIZES]->a_un.a_ptr;
 	} else {
 		len = 2;
 		if (sysctlnametomib("hw.pagesizes", mib, &len) == 0)
 			size = sizeof(psa);
 		else {
 			/* As a fallback, retrieve the base page size. */
 			size = sizeof(psa[0]);
 			if (aux_info[AT_PAGESZ] != NULL) {
 				psa[0] = aux_info[AT_PAGESZ]->a_un.a_val;
 				goto psa_filled;
 			} else {
 				mib[0] = CTL_HW;
 				mib[1] = HW_PAGESIZE;
 				len = 2;
 			}
 		}
 		if (sysctl(mib, len, psa, &size, NULL, 0) == -1) {
 			_rtld_error("sysctl for hw.pagesize(s) failed");
 			rtld_die();
 		}
 psa_filled:
 		pagesizes = psa;
 	}
 	npagesizes = size / sizeof(pagesizes[0]);
 	/* Discard any invalid entries at the end of the array. */
 	while (npagesizes > 0 && pagesizes[npagesizes - 1] == 0)
 		npagesizes--;
 }
 
 /*
  * Add the init functions from a needed object list (and its recursive
  * needed objects) to "list".  This is not used directly; it is a helper
  * function for initlist_add_objects().  The write lock must be held
  * when this function is called.
  */
 static void
 initlist_add_neededs(Needed_Entry *needed, Objlist *list)
 {
     /* Recursively process the successor needed objects. */
     if (needed->next != NULL)
 	initlist_add_neededs(needed->next, list);
 
     /* Process the current needed object. */
     if (needed->obj != NULL)
 	initlist_add_objects(needed->obj, &needed->obj->next, list);
 }
 
 /*
  * Scan all of the DAGs rooted in the range of objects from "obj" to
  * "tail" and add their init functions to "list".  This recurses over
  * the DAGs and ensure the proper init ordering such that each object's
  * needed libraries are initialized before the object itself.  At the
  * same time, this function adds the objects to the global finalization
  * list "list_fini" in the opposite order.  The write lock must be
  * held when this function is called.
  */
 static void
 initlist_add_objects(Obj_Entry *obj, Obj_Entry **tail, Objlist *list)
 {
 
     if (obj->init_scanned || obj->init_done)
 	return;
     obj->init_scanned = true;
 
     /* Recursively process the successor objects. */
     if (&obj->next != tail)
 	initlist_add_objects(obj->next, tail, list);
 
     /* Recursively process the needed objects. */
     if (obj->needed != NULL)
 	initlist_add_neededs(obj->needed, list);
     if (obj->needed_filtees != NULL)
 	initlist_add_neededs(obj->needed_filtees, list);
     if (obj->needed_aux_filtees != NULL)
 	initlist_add_neededs(obj->needed_aux_filtees, list);
 
     /* Add the object to the init list. */
     if (obj->preinit_array != (Elf_Addr)NULL || obj->init != (Elf_Addr)NULL ||
       obj->init_array != (Elf_Addr)NULL)
 	objlist_push_tail(list, obj);
 
     /* Add the object to the global fini list in the reverse order. */
     if ((obj->fini != (Elf_Addr)NULL || obj->fini_array != (Elf_Addr)NULL)
       && !obj->on_fini_list) {
 	objlist_push_head(&list_fini, obj);
 	obj->on_fini_list = true;
     }
 }
 
 #ifndef FPTR_TARGET
 #define FPTR_TARGET(f)	((Elf_Addr) (f))
 #endif
 
 static void
 free_needed_filtees(Needed_Entry *n)
 {
     Needed_Entry *needed, *needed1;
 
     for (needed = n; needed != NULL; needed = needed->next) {
 	if (needed->obj != NULL) {
 	    dlclose(needed->obj);
 	    needed->obj = NULL;
 	}
     }
     for (needed = n; needed != NULL; needed = needed1) {
 	needed1 = needed->next;
 	free(needed);
     }
 }
 
 static void
 unload_filtees(Obj_Entry *obj)
 {
 
     free_needed_filtees(obj->needed_filtees);
     obj->needed_filtees = NULL;
     free_needed_filtees(obj->needed_aux_filtees);
     obj->needed_aux_filtees = NULL;
     obj->filtees_loaded = false;
 }
 
 static void
 load_filtee1(Obj_Entry *obj, Needed_Entry *needed, int flags,
     RtldLockState *lockstate)
 {
 
     for (; needed != NULL; needed = needed->next) {
 	needed->obj = dlopen_object(obj->strtab + needed->name, -1, obj,
 	  flags, ((ld_loadfltr || obj->z_loadfltr) ? RTLD_NOW : RTLD_LAZY) |
 	  RTLD_LOCAL, lockstate);
     }
 }
 
 static void
 load_filtees(Obj_Entry *obj, int flags, RtldLockState *lockstate)
 {
 
     lock_restart_for_upgrade(lockstate);
     if (!obj->filtees_loaded) {
 	load_filtee1(obj, obj->needed_filtees, flags, lockstate);
 	load_filtee1(obj, obj->needed_aux_filtees, flags, lockstate);
 	obj->filtees_loaded = true;
     }
 }
 
 static int
 process_needed(Obj_Entry *obj, Needed_Entry *needed, int flags)
 {
     Obj_Entry *obj1;
 
     for (; needed != NULL; needed = needed->next) {
 	obj1 = needed->obj = load_object(obj->strtab + needed->name, -1, obj,
 	  flags & ~RTLD_LO_NOLOAD);
 	if (obj1 == NULL && !ld_tracing && (flags & RTLD_LO_FILTEES) == 0)
 	    return (-1);
     }
     return (0);
 }
 
 /*
  * Given a shared object, traverse its list of needed objects, and load
  * each of them.  Returns 0 on success.  Generates an error message and
  * returns -1 on failure.
  */
 static int
 load_needed_objects(Obj_Entry *first, int flags)
 {
     Obj_Entry *obj;
 
     for (obj = first;  obj != NULL;  obj = obj->next) {
 	if (process_needed(obj, obj->needed, flags) == -1)
 	    return (-1);
     }
     return (0);
 }
 
 static int
 load_preload_objects(void)
 {
     char *p = ld_preload;
     Obj_Entry *obj;
     static const char delim[] = " \t:;";
 
     if (p == NULL)
 	return 0;
 
     p += strspn(p, delim);
     while (*p != '\0') {
 	size_t len = strcspn(p, delim);
 	char savech;
 
 	savech = p[len];
 	p[len] = '\0';
 	obj = load_object(p, -1, NULL, 0);
 	if (obj == NULL)
 	    return -1;	/* XXX - cleanup */
 	obj->z_interpose = true;
 	p[len] = savech;
 	p += len;
 	p += strspn(p, delim);
     }
     LD_UTRACE(UTRACE_PRELOAD_FINISHED, NULL, NULL, 0, 0, NULL);
     return 0;
 }
 
 static const char *
 printable_path(const char *path)
 {
 
 	return (path == NULL ? "<unknown>" : path);
 }
 
 /*
  * Load a shared object into memory, if it is not already loaded.  The
  * object may be specified by name or by user-supplied file descriptor
  * fd_u. In the later case, the fd_u descriptor is not closed, but its
  * duplicate is.
  *
  * Returns a pointer to the Obj_Entry for the object.  Returns NULL
  * on failure.
  */
 static Obj_Entry *
 load_object(const char *name, int fd_u, const Obj_Entry *refobj, int flags)
 {
     Obj_Entry *obj;
     int fd;
     struct stat sb;
     char *path;
 
     fd = -1;
     if (name != NULL) {
 	for (obj = obj_list->next;  obj != NULL;  obj = obj->next) {
 	    if (object_match_name(obj, name))
 		return (obj);
 	}
 
 	path = find_library(name, refobj, &fd);
 	if (path == NULL)
 	    return (NULL);
     } else
 	path = NULL;
 
     if (fd >= 0) {
 	/*
 	 * search_library_pathfds() opens a fresh file descriptor for the
 	 * library, so there is no need to dup().
 	 */
     } else if (fd_u == -1) {
 	/*
 	 * If we didn't find a match by pathname, or the name is not
 	 * supplied, open the file and check again by device and inode.
 	 * This avoids false mismatches caused by multiple links or ".."
 	 * in pathnames.
 	 *
 	 * To avoid a race, we open the file and use fstat() rather than
 	 * using stat().
 	 */
 	if ((fd = open(path, O_RDONLY | O_CLOEXEC | O_VERIFY)) == -1) {
 	    _rtld_error("Cannot open \"%s\"", path);
 	    free(path);
 	    return (NULL);
 	}
     } else {
 	fd = fcntl(fd_u, F_DUPFD_CLOEXEC, 0);
 	if (fd == -1) {
 	    _rtld_error("Cannot dup fd");
 	    free(path);
 	    return (NULL);
 	}
     }
     if (fstat(fd, &sb) == -1) {
 	_rtld_error("Cannot fstat \"%s\"", printable_path(path));
 	close(fd);
 	free(path);
 	return NULL;
     }
     for (obj = obj_list->next;  obj != NULL;  obj = obj->next)
 	if (obj->ino == sb.st_ino && obj->dev == sb.st_dev)
 	    break;
     if (obj != NULL && name != NULL) {
 	object_add_name(obj, name);
 	free(path);
 	close(fd);
 	return obj;
     }
     if (flags & RTLD_LO_NOLOAD) {
 	free(path);
 	close(fd);
 	return (NULL);
     }
 
     /* First use of this object, so we must map it in */
     obj = do_load_object(fd, name, path, &sb, flags);
     if (obj == NULL)
 	free(path);
     close(fd);
 
     return obj;
 }
 
 static Obj_Entry *
 do_load_object(int fd, const char *name, char *path, struct stat *sbp,
   int flags)
 {
     Obj_Entry *obj;
     struct statfs fs;
 
     /*
      * but first, make sure that environment variables haven't been
      * used to circumvent the noexec flag on a filesystem.
      */
     if (dangerous_ld_env) {
 	if (fstatfs(fd, &fs) != 0) {
 	    _rtld_error("Cannot fstatfs \"%s\"", printable_path(path));
 	    return NULL;
 	}
 	if (fs.f_flags & MNT_NOEXEC) {
 	    _rtld_error("Cannot execute objects on %s\n", fs.f_mntonname);
 	    return NULL;
 	}
     }
     dbg("loading \"%s\"", printable_path(path));
     obj = map_object(fd, printable_path(path), sbp);
     if (obj == NULL)
         return NULL;
 
     /*
      * If DT_SONAME is present in the object, digest_dynamic2 already
      * added it to the object names.
      */
     if (name != NULL)
 	object_add_name(obj, name);
     obj->path = path;
     digest_dynamic(obj, 0);
     dbg("%s valid_hash_sysv %d valid_hash_gnu %d dynsymcount %d", obj->path,
 	obj->valid_hash_sysv, obj->valid_hash_gnu, obj->dynsymcount);
     if (obj->z_noopen && (flags & (RTLD_LO_DLOPEN | RTLD_LO_TRACE)) ==
       RTLD_LO_DLOPEN) {
 	dbg("refusing to load non-loadable \"%s\"", obj->path);
 	_rtld_error("Cannot dlopen non-loadable %s", obj->path);
 	munmap(obj->mapbase, obj->mapsize);
 	obj_free(obj);
 	return (NULL);
     }
 
     obj->dlopened = (flags & RTLD_LO_DLOPEN) != 0;
     *obj_tail = obj;
     obj_tail = &obj->next;
     obj_count++;
     obj_loads++;
     linkmap_add(obj);	/* for GDB & dlinfo() */
     max_stack_flags |= obj->stack_flags;
 
     dbg("  %p .. %p: %s", obj->mapbase,
          obj->mapbase + obj->mapsize - 1, obj->path);
     if (obj->textrel)
 	dbg("  WARNING: %s has impure text", obj->path);
     LD_UTRACE(UTRACE_LOAD_OBJECT, obj, obj->mapbase, obj->mapsize, 0,
 	obj->path);    
 
     return obj;
 }
 
 static Obj_Entry *
 obj_from_addr(const void *addr)
 {
     Obj_Entry *obj;
 
     for (obj = obj_list;  obj != NULL;  obj = obj->next) {
 	if (addr < (void *) obj->mapbase)
 	    continue;
 	if (addr < (void *) (obj->mapbase + obj->mapsize))
 	    return obj;
     }
     return NULL;
 }
 
 static void
 preinit_main(void)
 {
     Elf_Addr *preinit_addr;
     int index;
 
     preinit_addr = (Elf_Addr *)obj_main->preinit_array;
     if (preinit_addr == NULL)
 	return;
 
     for (index = 0; index < obj_main->preinit_array_num; index++) {
 	if (preinit_addr[index] != 0 && preinit_addr[index] != 1) {
 	    dbg("calling preinit function for %s at %p", obj_main->path,
 	      (void *)preinit_addr[index]);
 	    LD_UTRACE(UTRACE_INIT_CALL, obj_main, (void *)preinit_addr[index],
 	      0, 0, obj_main->path);
 	    call_init_pointer(obj_main, preinit_addr[index]);
 	}
     }
 }
 
 /*
  * Call the finalization functions for each of the objects in "list"
  * belonging to the DAG of "root" and referenced once. If NULL "root"
  * is specified, every finalization function will be called regardless
  * of the reference count and the list elements won't be freed. All of
  * the objects are expected to have non-NULL fini functions.
  */
 static void
 objlist_call_fini(Objlist *list, Obj_Entry *root, RtldLockState *lockstate)
 {
     Objlist_Entry *elm;
     char *saved_msg;
     Elf_Addr *fini_addr;
     int index;
 
     assert(root == NULL || root->refcount == 1);
 
     /*
      * Preserve the current error message since a fini function might
      * call into the dynamic linker and overwrite it.
      */
     saved_msg = errmsg_save();
     do {
 	STAILQ_FOREACH(elm, list, link) {
 	    if (root != NULL && (elm->obj->refcount != 1 ||
 	      objlist_find(&root->dagmembers, elm->obj) == NULL))
 		continue;
 	    /* Remove object from fini list to prevent recursive invocation. */
 	    STAILQ_REMOVE(list, elm, Struct_Objlist_Entry, link);
 	    /*
 	     * XXX: If a dlopen() call references an object while the
 	     * fini function is in progress, we might end up trying to
 	     * unload the referenced object in dlclose() or the object
 	     * won't be unloaded although its fini function has been
 	     * called.
 	     */
 	    lock_release(rtld_bind_lock, lockstate);
 
 	    /*
 	     * It is legal to have both DT_FINI and DT_FINI_ARRAY defined.
 	     * When this happens, DT_FINI_ARRAY is processed first.
 	     */
 	    fini_addr = (Elf_Addr *)elm->obj->fini_array;
 	    if (fini_addr != NULL && elm->obj->fini_array_num > 0) {
 		for (index = elm->obj->fini_array_num - 1; index >= 0;
 		  index--) {
 		    if (fini_addr[index] != 0 && fini_addr[index] != 1) {
 			dbg("calling fini function for %s at %p",
 			    elm->obj->path, (void *)fini_addr[index]);
 			LD_UTRACE(UTRACE_FINI_CALL, elm->obj,
 			    (void *)fini_addr[index], 0, 0, elm->obj->path);
 			call_initfini_pointer(elm->obj, fini_addr[index]);
 		    }
 		}
 	    }
 	    if (elm->obj->fini != (Elf_Addr)NULL) {
 		dbg("calling fini function for %s at %p", elm->obj->path,
 		    (void *)elm->obj->fini);
 		LD_UTRACE(UTRACE_FINI_CALL, elm->obj, (void *)elm->obj->fini,
 		    0, 0, elm->obj->path);
 		call_initfini_pointer(elm->obj, elm->obj->fini);
 	    }
 	    wlock_acquire(rtld_bind_lock, lockstate);
 	    /* No need to free anything if process is going down. */
 	    if (root != NULL)
 	    	free(elm);
 	    /*
 	     * We must restart the list traversal after every fini call
 	     * because a dlclose() call from the fini function or from
 	     * another thread might have modified the reference counts.
 	     */
 	    break;
 	}
     } while (elm != NULL);
     errmsg_restore(saved_msg);
 }
 
 /*
  * Call the initialization functions for each of the objects in
  * "list".  All of the objects are expected to have non-NULL init
  * functions.
  */
 static void
 objlist_call_init(Objlist *list, RtldLockState *lockstate)
 {
     Objlist_Entry *elm;
     Obj_Entry *obj;
     char *saved_msg;
     Elf_Addr *init_addr;
     int index;
 
     /*
      * Clean init_scanned flag so that objects can be rechecked and
      * possibly initialized earlier if any of vectors called below
      * cause the change by using dlopen.
      */
     for (obj = obj_list;  obj != NULL;  obj = obj->next)
 	obj->init_scanned = false;
 
     /*
      * Preserve the current error message since an init function might
      * call into the dynamic linker and overwrite it.
      */
     saved_msg = errmsg_save();
     STAILQ_FOREACH(elm, list, link) {
 	if (elm->obj->init_done) /* Initialized early. */
 	    continue;
 	/*
 	 * Race: other thread might try to use this object before current
 	 * one completes the initilization. Not much can be done here
 	 * without better locking.
 	 */
 	elm->obj->init_done = true;
 	lock_release(rtld_bind_lock, lockstate);
 
         /*
          * It is legal to have both DT_INIT and DT_INIT_ARRAY defined.
          * When this happens, DT_INIT is processed first.
          */
 	if (elm->obj->init != (Elf_Addr)NULL) {
 	    dbg("calling init function for %s at %p", elm->obj->path,
 	        (void *)elm->obj->init);
 	    LD_UTRACE(UTRACE_INIT_CALL, elm->obj, (void *)elm->obj->init,
 	        0, 0, elm->obj->path);
 	    call_initfini_pointer(elm->obj, elm->obj->init);
 	}
 	init_addr = (Elf_Addr *)elm->obj->init_array;
 	if (init_addr != NULL) {
 	    for (index = 0; index < elm->obj->init_array_num; index++) {
 		if (init_addr[index] != 0 && init_addr[index] != 1) {
 		    dbg("calling init function for %s at %p", elm->obj->path,
 			(void *)init_addr[index]);
 		    LD_UTRACE(UTRACE_INIT_CALL, elm->obj,
 			(void *)init_addr[index], 0, 0, elm->obj->path);
 		    call_init_pointer(elm->obj, init_addr[index]);
 		}
 	    }
 	}
 	wlock_acquire(rtld_bind_lock, lockstate);
     }
     errmsg_restore(saved_msg);
 }
 
 static void
 objlist_clear(Objlist *list)
 {
     Objlist_Entry *elm;
 
     while (!STAILQ_EMPTY(list)) {
 	elm = STAILQ_FIRST(list);
 	STAILQ_REMOVE_HEAD(list, link);
 	free(elm);
     }
 }
 
 static Objlist_Entry *
 objlist_find(Objlist *list, const Obj_Entry *obj)
 {
     Objlist_Entry *elm;
 
     STAILQ_FOREACH(elm, list, link)
 	if (elm->obj == obj)
 	    return elm;
     return NULL;
 }
 
 static void
 objlist_init(Objlist *list)
 {
     STAILQ_INIT(list);
 }
 
 static void
 objlist_push_head(Objlist *list, Obj_Entry *obj)
 {
     Objlist_Entry *elm;
 
     elm = NEW(Objlist_Entry);
     elm->obj = obj;
     STAILQ_INSERT_HEAD(list, elm, link);
 }
 
 static void
 objlist_push_tail(Objlist *list, Obj_Entry *obj)
 {
     Objlist_Entry *elm;
 
     elm = NEW(Objlist_Entry);
     elm->obj = obj;
     STAILQ_INSERT_TAIL(list, elm, link);
 }
 
 static void
 objlist_put_after(Objlist *list, Obj_Entry *listobj, Obj_Entry *obj)
 {
 	Objlist_Entry *elm, *listelm;
 
 	STAILQ_FOREACH(listelm, list, link) {
 		if (listelm->obj == listobj)
 			break;
 	}
 	elm = NEW(Objlist_Entry);
 	elm->obj = obj;
 	if (listelm != NULL)
 		STAILQ_INSERT_AFTER(list, listelm, elm, link);
 	else
 		STAILQ_INSERT_TAIL(list, elm, link);
 }
 
 static void
 objlist_remove(Objlist *list, Obj_Entry *obj)
 {
     Objlist_Entry *elm;
 
     if ((elm = objlist_find(list, obj)) != NULL) {
 	STAILQ_REMOVE(list, elm, Struct_Objlist_Entry, link);
 	free(elm);
     }
 }
 
 /*
  * Relocate dag rooted in the specified object.
  * Returns 0 on success, or -1 on failure.
  */
 
 static int
 relocate_object_dag(Obj_Entry *root, bool bind_now, Obj_Entry *rtldobj,
     int flags, RtldLockState *lockstate)
 {
 	Objlist_Entry *elm;
 	int error;
 
 	error = 0;
 	STAILQ_FOREACH(elm, &root->dagmembers, link) {
 		error = relocate_object(elm->obj, bind_now, rtldobj, flags,
 		    lockstate);
 		if (error == -1)
 			break;
 	}
 	return (error);
 }
 
 /*
  * Relocate single object.
  * Returns 0 on success, or -1 on failure.
  */
 static int
 relocate_object(Obj_Entry *obj, bool bind_now, Obj_Entry *rtldobj,
     int flags, RtldLockState *lockstate)
 {
 
 	if (obj->relocated)
 		return (0);
 	obj->relocated = true;
 	if (obj != rtldobj)
 		dbg("relocating \"%s\"", obj->path);
 
 	if (obj->symtab == NULL || obj->strtab == NULL ||
 	    !(obj->valid_hash_sysv || obj->valid_hash_gnu)) {
 		_rtld_error("%s: Shared object has no run-time symbol table",
 			    obj->path);
 		return (-1);
 	}
 
 	if (obj->textrel) {
 		/* There are relocations to the write-protected text segment. */
 		if (mprotect(obj->mapbase, obj->textsize,
 		    PROT_READ|PROT_WRITE|PROT_EXEC) == -1) {
 			_rtld_error("%s: Cannot write-enable text segment: %s",
 			    obj->path, rtld_strerror(errno));
 			return (-1);
 		}
 	}
 
 	/* Process the non-PLT non-IFUNC relocations. */
 	if (reloc_non_plt(obj, rtldobj, flags, lockstate))
 		return (-1);
 
 	if (obj->textrel) {	/* Re-protected the text segment. */
 		if (mprotect(obj->mapbase, obj->textsize,
 		    PROT_READ|PROT_EXEC) == -1) {
 			_rtld_error("%s: Cannot write-protect text segment: %s",
 			    obj->path, rtld_strerror(errno));
 			return (-1);
 		}
 	}
 
 	/* Set the special PLT or GOT entries. */
 	init_pltgot(obj);
 
 	/* Process the PLT relocations. */
 	if (reloc_plt(obj) == -1)
 		return (-1);
 	/* Relocate the jump slots if we are doing immediate binding. */
 	if (obj->bind_now || bind_now)
 		if (reloc_jmpslots(obj, flags, lockstate) == -1)
 			return (-1);
 
 	/*
 	 * Process the non-PLT IFUNC relocations.  The relocations are
 	 * processed in two phases, because IFUNC resolvers may
 	 * reference other symbols, which must be readily processed
 	 * before resolvers are called.
 	 */
 	if (obj->non_plt_gnu_ifunc &&
 	    reloc_non_plt(obj, rtldobj, flags | SYMLOOK_IFUNC, lockstate))
 		return (-1);
 
 	if (obj->relro_size > 0) {
 		if (mprotect(obj->relro_page, obj->relro_size,
 		    PROT_READ) == -1) {
 			_rtld_error("%s: Cannot enforce relro protection: %s",
 			    obj->path, rtld_strerror(errno));
 			return (-1);
 		}
 	}
 
 	/*
 	 * Set up the magic number and version in the Obj_Entry.  These
 	 * were checked in the crt1.o from the original ElfKit, so we
 	 * set them for backward compatibility.
 	 */
 	obj->magic = RTLD_MAGIC;
 	obj->version = RTLD_VERSION;
 
 	return (0);
 }
 
 /*
  * Relocate newly-loaded shared objects.  The argument is a pointer to
  * the Obj_Entry for the first such object.  All objects from the first
  * to the end of the list of objects are relocated.  Returns 0 on success,
  * or -1 on failure.
  */
 static int
 relocate_objects(Obj_Entry *first, bool bind_now, Obj_Entry *rtldobj,
     int flags, RtldLockState *lockstate)
 {
 	Obj_Entry *obj;
 	int error;
 
 	for (error = 0, obj = first;  obj != NULL;  obj = obj->next) {
 		error = relocate_object(obj, bind_now, rtldobj, flags,
 		    lockstate);
 		if (error == -1)
 			break;
 	}
 	return (error);
 }
 
 /*
  * The handling of R_MACHINE_IRELATIVE relocations and jumpslots
  * referencing STT_GNU_IFUNC symbols is postponed till the other
  * relocations are done.  The indirect functions specified as
  * ifunc are allowed to call other symbols, so we need to have
  * objects relocated before asking for resolution from indirects.
  *
  * The R_MACHINE_IRELATIVE slots are resolved in greedy fashion,
  * instead of the usual lazy handling of PLT slots.  It is
  * consistent with how GNU does it.
  */
 static int
 resolve_object_ifunc(Obj_Entry *obj, bool bind_now, int flags,
     RtldLockState *lockstate)
 {
 	if (obj->irelative && reloc_iresolve(obj, lockstate) == -1)
 		return (-1);
 	if ((obj->bind_now || bind_now) && obj->gnu_ifunc &&
 	    reloc_gnu_ifunc(obj, flags, lockstate) == -1)
 		return (-1);
 	return (0);
 }
 
 static int
 resolve_objects_ifunc(Obj_Entry *first, bool bind_now, int flags,
     RtldLockState *lockstate)
 {
 	Obj_Entry *obj;
 
 	for (obj = first;  obj != NULL;  obj = obj->next) {
 		if (resolve_object_ifunc(obj, bind_now, flags, lockstate) == -1)
 			return (-1);
 	}
 	return (0);
 }
 
 static int
 initlist_objects_ifunc(Objlist *list, bool bind_now, int flags,
     RtldLockState *lockstate)
 {
 	Objlist_Entry *elm;
 
 	STAILQ_FOREACH(elm, list, link) {
 		if (resolve_object_ifunc(elm->obj, bind_now, flags,
 		    lockstate) == -1)
 			return (-1);
 	}
 	return (0);
 }
 
 /*
  * Cleanup procedure.  It will be called (by the atexit mechanism) just
  * before the process exits.
  */
 static void
 rtld_exit(void)
 {
     RtldLockState lockstate;
 
     wlock_acquire(rtld_bind_lock, &lockstate);
     dbg("rtld_exit()");
     objlist_call_fini(&list_fini, NULL, &lockstate);
     /* No need to remove the items from the list, since we are exiting. */
     if (!libmap_disable)
         lm_fini();
     lock_release(rtld_bind_lock, &lockstate);
 }
 
 /*
  * Iterate over a search path, translate each element, and invoke the
  * callback on the result.
  */
 static void *
 path_enumerate(const char *path, path_enum_proc callback, void *arg)
 {
     const char *trans;
     if (path == NULL)
 	return (NULL);
 
     path += strspn(path, ":;");
     while (*path != '\0') {
 	size_t len;
 	char  *res;
 
 	len = strcspn(path, ":;");
 	trans = lm_findn(NULL, path, len);
 	if (trans)
 	    res = callback(trans, strlen(trans), arg);
 	else
 	    res = callback(path, len, arg);
 
 	if (res != NULL)
 	    return (res);
 
 	path += len;
 	path += strspn(path, ":;");
     }
 
     return (NULL);
 }
 
 struct try_library_args {
     const char	*name;
     size_t	 namelen;
     char	*buffer;
     size_t	 buflen;
 };
 
 static void *
 try_library_path(const char *dir, size_t dirlen, void *param)
 {
     struct try_library_args *arg;
 
     arg = param;
     if (*dir == '/' || trust) {
 	char *pathname;
 
 	if (dirlen + 1 + arg->namelen + 1 > arg->buflen)
 		return (NULL);
 
 	pathname = arg->buffer;
 	strncpy(pathname, dir, dirlen);
 	pathname[dirlen] = '/';
 	strcpy(pathname + dirlen + 1, arg->name);
 
 	dbg("  Trying \"%s\"", pathname);
 	if (access(pathname, F_OK) == 0) {		/* We found it */
 	    pathname = xmalloc(dirlen + 1 + arg->namelen + 1);
 	    strcpy(pathname, arg->buffer);
 	    return (pathname);
 	}
     }
     return (NULL);
 }
 
 static char *
 search_library_path(const char *name, const char *path)
 {
     char *p;
     struct try_library_args arg;
 
     if (path == NULL)
 	return NULL;
 
     arg.name = name;
     arg.namelen = strlen(name);
     arg.buffer = xmalloc(PATH_MAX);
     arg.buflen = PATH_MAX;
 
     p = path_enumerate(path, try_library_path, &arg);
 
     free(arg.buffer);
 
     return (p);
 }
 
 
 /*
  * Finds the library with the given name using the directory descriptors
  * listed in the LD_LIBRARY_PATH_FDS environment variable.
  *
  * Returns a freshly-opened close-on-exec file descriptor for the library,
  * or -1 if the library cannot be found.
  */
 static char *
 search_library_pathfds(const char *name, const char *path, int *fdp)
 {
 	char *envcopy, *fdstr, *found, *last_token;
 	size_t len;
 	int dirfd, fd;
 
 	dbg("%s('%s', '%s', fdp)", __func__, name, path);
 
 	/* Don't load from user-specified libdirs into setuid binaries. */
 	if (!trust)
 		return (NULL);
 
 	/* We can't do anything if LD_LIBRARY_PATH_FDS isn't set. */
 	if (path == NULL)
 		return (NULL);
 
 	/* LD_LIBRARY_PATH_FDS only works with relative paths. */
 	if (name[0] == '/') {
 		dbg("Absolute path (%s) passed to %s", name, __func__);
 		return (NULL);
 	}
 
 	/*
 	 * Use strtok_r() to walk the FD:FD:FD list.  This requires a local
 	 * copy of the path, as strtok_r rewrites separator tokens
 	 * with '\0'.
 	 */
 	found = NULL;
 	envcopy = xstrdup(path);
 	for (fdstr = strtok_r(envcopy, ":", &last_token); fdstr != NULL;
 	    fdstr = strtok_r(NULL, ":", &last_token)) {
 		dirfd = parse_libdir(fdstr);
 		if (dirfd < 0)
 			break;
 		fd = __sys_openat(dirfd, name, O_RDONLY | O_CLOEXEC | O_VERIFY);
 		if (fd >= 0) {
 			*fdp = fd;
 			len = strlen(fdstr) + strlen(name) + 3;
 			found = xmalloc(len);
 			if (rtld_snprintf(found, len, "#%d/%s", dirfd, name) < 0) {
 				_rtld_error("error generating '%d/%s'",
 				    dirfd, name);
 				rtld_die();
 			}
 			dbg("open('%s') => %d", found, fd);
 			break;
 		}
 	}
 	free(envcopy);
 
 	return (found);
 }
 
 
 int
 dlclose(void *handle)
 {
     Obj_Entry *root;
     RtldLockState lockstate;
 
     wlock_acquire(rtld_bind_lock, &lockstate);
     root = dlcheck(handle);
     if (root == NULL) {
 	lock_release(rtld_bind_lock, &lockstate);
 	return -1;
     }
     LD_UTRACE(UTRACE_DLCLOSE_START, handle, NULL, 0, root->dl_refcount,
 	root->path);
 
     /* Unreference the object and its dependencies. */
     root->dl_refcount--;
 
     if (root->refcount == 1) {
 	/*
 	 * The object will be no longer referenced, so we must unload it.
 	 * First, call the fini functions.
 	 */
 	objlist_call_fini(&list_fini, root, &lockstate);
 
 	unref_dag(root);
 
 	/* Finish cleaning up the newly-unreferenced objects. */
 	GDB_STATE(RT_DELETE,&root->linkmap);
 	unload_object(root);
 	GDB_STATE(RT_CONSISTENT,NULL);
     } else
 	unref_dag(root);
 
     LD_UTRACE(UTRACE_DLCLOSE_STOP, handle, NULL, 0, 0, NULL);
     lock_release(rtld_bind_lock, &lockstate);
     return 0;
 }
 
 char *
 dlerror(void)
 {
     char *msg = error_message;
     error_message = NULL;
     return msg;
 }
 
 /*
  * This function is deprecated and has no effect.
  */
 void
 dllockinit(void *context,
 	   void *(*lock_create)(void *context),
            void (*rlock_acquire)(void *lock),
            void (*wlock_acquire)(void *lock),
            void (*lock_release)(void *lock),
            void (*lock_destroy)(void *lock),
 	   void (*context_destroy)(void *context))
 {
     static void *cur_context;
     static void (*cur_context_destroy)(void *);
 
     /* Just destroy the context from the previous call, if necessary. */
     if (cur_context_destroy != NULL)
 	cur_context_destroy(cur_context);
     cur_context = context;
     cur_context_destroy = context_destroy;
 }
 
 void *
 dlopen(const char *name, int mode)
 {
 
 	return (rtld_dlopen(name, -1, mode));
 }
 
 void *
 fdlopen(int fd, int mode)
 {
 
 	return (rtld_dlopen(NULL, fd, mode));
 }
 
 static void *
 rtld_dlopen(const char *name, int fd, int mode)
 {
     RtldLockState lockstate;
     int lo_flags;
 
     LD_UTRACE(UTRACE_DLOPEN_START, NULL, NULL, 0, mode, name);
     ld_tracing = (mode & RTLD_TRACE) == 0 ? NULL : "1";
     if (ld_tracing != NULL) {
 	rlock_acquire(rtld_bind_lock, &lockstate);
 	if (sigsetjmp(lockstate.env, 0) != 0)
 	    lock_upgrade(rtld_bind_lock, &lockstate);
 	environ = (char **)*get_program_var_addr("environ", &lockstate);
 	lock_release(rtld_bind_lock, &lockstate);
     }
     lo_flags = RTLD_LO_DLOPEN;
     if (mode & RTLD_NODELETE)
 	    lo_flags |= RTLD_LO_NODELETE;
     if (mode & RTLD_NOLOAD)
 	    lo_flags |= RTLD_LO_NOLOAD;
     if (ld_tracing != NULL)
 	    lo_flags |= RTLD_LO_TRACE;
 
     return (dlopen_object(name, fd, obj_main, lo_flags,
       mode & (RTLD_MODEMASK | RTLD_GLOBAL), NULL));
 }
 
 static void
 dlopen_cleanup(Obj_Entry *obj)
 {
 
 	obj->dl_refcount--;
 	unref_dag(obj);
 	if (obj->refcount == 0)
 		unload_object(obj);
 }
 
 static Obj_Entry *
 dlopen_object(const char *name, int fd, Obj_Entry *refobj, int lo_flags,
     int mode, RtldLockState *lockstate)
 {
     Obj_Entry **old_obj_tail;
     Obj_Entry *obj;
     Objlist initlist;
     RtldLockState mlockstate;
     int result;
 
     objlist_init(&initlist);
 
     if (lockstate == NULL && !(lo_flags & RTLD_LO_EARLY)) {
 	wlock_acquire(rtld_bind_lock, &mlockstate);
 	lockstate = &mlockstate;
     }
     GDB_STATE(RT_ADD,NULL);
 
     old_obj_tail = obj_tail;
     obj = NULL;
     if (name == NULL && fd == -1) {
 	obj = obj_main;
 	obj->refcount++;
     } else {
 	obj = load_object(name, fd, refobj, lo_flags);
     }
 
     if (obj) {
 	obj->dl_refcount++;
 	if (mode & RTLD_GLOBAL && objlist_find(&list_global, obj) == NULL)
 	    objlist_push_tail(&list_global, obj);
 	if (*old_obj_tail != NULL) {		/* We loaded something new. */
 	    assert(*old_obj_tail == obj);
 	    result = load_needed_objects(obj,
 		lo_flags & (RTLD_LO_DLOPEN | RTLD_LO_EARLY));
 	    init_dag(obj);
 	    ref_dag(obj);
 	    if (result != -1)
 		result = rtld_verify_versions(&obj->dagmembers);
 	    if (result != -1 && ld_tracing)
 		goto trace;
 	    if (result == -1 || relocate_object_dag(obj,
 	      (mode & RTLD_MODEMASK) == RTLD_NOW, &obj_rtld,
 	      (lo_flags & RTLD_LO_EARLY) ? SYMLOOK_EARLY : 0,
 	      lockstate) == -1) {
 		dlopen_cleanup(obj);
 		obj = NULL;
 	    } else if (lo_flags & RTLD_LO_EARLY) {
 		/*
 		 * Do not call the init functions for early loaded
 		 * filtees.  The image is still not initialized enough
 		 * for them to work.
 		 *
 		 * Our object is found by the global object list and
 		 * will be ordered among all init calls done right
 		 * before transferring control to main.
 		 */
 	    } else {
 		/* Make list of init functions to call. */
 		initlist_add_objects(obj, &obj->next, &initlist);
 	    }
 	    /*
 	     * Process all no_delete or global objects here, given
 	     * them own DAGs to prevent their dependencies from being
 	     * unloaded.  This has to be done after we have loaded all
 	     * of the dependencies, so that we do not miss any.
 	     */
 	    if (obj != NULL)
 		process_z(obj);
 	} else {
 	    /*
 	     * Bump the reference counts for objects on this DAG.  If
 	     * this is the first dlopen() call for the object that was
 	     * already loaded as a dependency, initialize the dag
 	     * starting at it.
 	     */
 	    init_dag(obj);
 	    ref_dag(obj);
 
 	    if ((lo_flags & RTLD_LO_TRACE) != 0)
 		goto trace;
 	}
 	if (obj != NULL && ((lo_flags & RTLD_LO_NODELETE) != 0 ||
 	  obj->z_nodelete) && !obj->ref_nodel) {
 	    dbg("obj %s nodelete", obj->path);
 	    ref_dag(obj);
 	    obj->z_nodelete = obj->ref_nodel = true;
 	}
     }
 
     LD_UTRACE(UTRACE_DLOPEN_STOP, obj, NULL, 0, obj ? obj->dl_refcount : 0,
 	name);
     GDB_STATE(RT_CONSISTENT,obj ? &obj->linkmap : NULL);
 
     if (!(lo_flags & RTLD_LO_EARLY)) {
 	map_stacks_exec(lockstate);
     }
 
     if (initlist_objects_ifunc(&initlist, (mode & RTLD_MODEMASK) == RTLD_NOW,
       (lo_flags & RTLD_LO_EARLY) ? SYMLOOK_EARLY : 0,
       lockstate) == -1) {
 	objlist_clear(&initlist);
 	dlopen_cleanup(obj);
 	if (lockstate == &mlockstate)
 	    lock_release(rtld_bind_lock, lockstate);
 	return (NULL);
     }
 
     if (!(lo_flags & RTLD_LO_EARLY)) {
 	/* Call the init functions. */
 	objlist_call_init(&initlist, lockstate);
     }
     objlist_clear(&initlist);
     if (lockstate == &mlockstate)
 	lock_release(rtld_bind_lock, lockstate);
     return obj;
 trace:
     trace_loaded_objects(obj);
     if (lockstate == &mlockstate)
 	lock_release(rtld_bind_lock, lockstate);
     exit(0);
 }
 
 static void *
 do_dlsym(void *handle, const char *name, void *retaddr, const Ver_Entry *ve,
     int flags)
 {
     DoneList donelist;
     const Obj_Entry *obj, *defobj;
     const Elf_Sym *def;
     SymLook req;
     RtldLockState lockstate;
     tls_index ti;
     void *sym;
     int res;
 
     def = NULL;
     defobj = NULL;
     symlook_init(&req, name);
     req.ventry = ve;
     req.flags = flags | SYMLOOK_IN_PLT;
     req.lockstate = &lockstate;
 
     LD_UTRACE(UTRACE_DLSYM_START, handle, NULL, 0, 0, name);
     rlock_acquire(rtld_bind_lock, &lockstate);
     if (sigsetjmp(lockstate.env, 0) != 0)
 	    lock_upgrade(rtld_bind_lock, &lockstate);
     if (handle == NULL || handle == RTLD_NEXT ||
 	handle == RTLD_DEFAULT || handle == RTLD_SELF) {
 
 	if ((obj = obj_from_addr(retaddr)) == NULL) {
 	    _rtld_error("Cannot determine caller's shared object");
 	    lock_release(rtld_bind_lock, &lockstate);
 	    LD_UTRACE(UTRACE_DLSYM_STOP, handle, NULL, 0, 0, name);
 	    return NULL;
 	}
 	if (handle == NULL) {	/* Just the caller's shared object. */
 	    res = symlook_obj(&req, obj);
 	    if (res == 0) {
 		def = req.sym_out;
 		defobj = req.defobj_out;
 	    }
 	} else if (handle == RTLD_NEXT || /* Objects after caller's */
 		   handle == RTLD_SELF) { /* ... caller included */
 	    if (handle == RTLD_NEXT)
 		obj = obj->next;
 	    for (; obj != NULL; obj = obj->next) {
 		res = symlook_obj(&req, obj);
 		if (res == 0) {
 		    if (def == NULL ||
 		      ELF_ST_BIND(req.sym_out->st_info) != STB_WEAK) {
 			def = req.sym_out;
 			defobj = req.defobj_out;
 			if (ELF_ST_BIND(def->st_info) != STB_WEAK)
 			    break;
 		    }
 		}
 	    }
 	    /*
 	     * Search the dynamic linker itself, and possibly resolve the
 	     * symbol from there.  This is how the application links to
 	     * dynamic linker services such as dlopen.
 	     */
 	    if (def == NULL || ELF_ST_BIND(def->st_info) == STB_WEAK) {
 		res = symlook_obj(&req, &obj_rtld);
 		if (res == 0) {
 		    def = req.sym_out;
 		    defobj = req.defobj_out;
 		}
 	    }
 	} else {
 	    assert(handle == RTLD_DEFAULT);
 	    res = symlook_default(&req, obj);
 	    if (res == 0) {
 		defobj = req.defobj_out;
 		def = req.sym_out;
 	    }
 	}
     } else {
 	if ((obj = dlcheck(handle)) == NULL) {
 	    lock_release(rtld_bind_lock, &lockstate);
 	    LD_UTRACE(UTRACE_DLSYM_STOP, handle, NULL, 0, 0, name);
 	    return NULL;
 	}
 
 	donelist_init(&donelist);
 	if (obj->mainprog) {
             /* Handle obtained by dlopen(NULL, ...) implies global scope. */
 	    res = symlook_global(&req, &donelist);
 	    if (res == 0) {
 		def = req.sym_out;
 		defobj = req.defobj_out;
 	    }
 	    /*
 	     * Search the dynamic linker itself, and possibly resolve the
 	     * symbol from there.  This is how the application links to
 	     * dynamic linker services such as dlopen.
 	     */
 	    if (def == NULL || ELF_ST_BIND(def->st_info) == STB_WEAK) {
 		res = symlook_obj(&req, &obj_rtld);
 		if (res == 0) {
 		    def = req.sym_out;
 		    defobj = req.defobj_out;
 		}
 	    }
 	}
 	else {
 	    /* Search the whole DAG rooted at the given object. */
 	    res = symlook_list(&req, &obj->dagmembers, &donelist);
 	    if (res == 0) {
 		def = req.sym_out;
 		defobj = req.defobj_out;
 	    }
 	}
     }
 
     if (def != NULL) {
 	lock_release(rtld_bind_lock, &lockstate);
 
 	/*
 	 * The value required by the caller is derived from the value
 	 * of the symbol. this is simply the relocated value of the
 	 * symbol.
 	 */
 	if (ELF_ST_TYPE(def->st_info) == STT_FUNC)
 	    sym = make_function_pointer(def, defobj);
 	else if (ELF_ST_TYPE(def->st_info) == STT_GNU_IFUNC)
 	    sym = rtld_resolve_ifunc(defobj, def);
 	else if (ELF_ST_TYPE(def->st_info) == STT_TLS) {
 	    ti.ti_module = defobj->tlsindex;
 	    ti.ti_offset = def->st_value;
 	    sym = __tls_get_addr(&ti);
 	} else
 	    sym = defobj->relocbase + def->st_value;
 	LD_UTRACE(UTRACE_DLSYM_STOP, handle, sym, 0, 0, name);
 	return (sym);
     }
 
     _rtld_error("Undefined symbol \"%s\"", name);
     lock_release(rtld_bind_lock, &lockstate);
     LD_UTRACE(UTRACE_DLSYM_STOP, handle, NULL, 0, 0, name);
     return NULL;
 }
 
 void *
 dlsym(void *handle, const char *name)
 {
 	return do_dlsym(handle, name, __builtin_return_address(0), NULL,
 	    SYMLOOK_DLSYM);
 }
 
 dlfunc_t
 dlfunc(void *handle, const char *name)
 {
 	union {
 		void *d;
 		dlfunc_t f;
 	} rv;
 
 	rv.d = do_dlsym(handle, name, __builtin_return_address(0), NULL,
 	    SYMLOOK_DLSYM);
 	return (rv.f);
 }
 
 void *
 dlvsym(void *handle, const char *name, const char *version)
 {
 	Ver_Entry ventry;
 
 	ventry.name = version;
 	ventry.file = NULL;
 	ventry.hash = elf_hash(version);
 	ventry.flags= 0;
 	return do_dlsym(handle, name, __builtin_return_address(0), &ventry,
 	    SYMLOOK_DLSYM);
 }
 
 int
 _rtld_addr_phdr(const void *addr, struct dl_phdr_info *phdr_info)
 {
     const Obj_Entry *obj;
     RtldLockState lockstate;
 
     rlock_acquire(rtld_bind_lock, &lockstate);
     obj = obj_from_addr(addr);
     if (obj == NULL) {
         _rtld_error("No shared object contains address");
 	lock_release(rtld_bind_lock, &lockstate);
         return (0);
     }
     rtld_fill_dl_phdr_info(obj, phdr_info);
     lock_release(rtld_bind_lock, &lockstate);
     return (1);
 }
 
 int
 dladdr(const void *addr, Dl_info *info)
 {
     const Obj_Entry *obj;
     const Elf_Sym *def;
     void *symbol_addr;
     unsigned long symoffset;
     RtldLockState lockstate;
 
     rlock_acquire(rtld_bind_lock, &lockstate);
     obj = obj_from_addr(addr);
     if (obj == NULL) {
         _rtld_error("No shared object contains address");
 	lock_release(rtld_bind_lock, &lockstate);
         return 0;
     }
     info->dli_fname = obj->path;
     info->dli_fbase = obj->mapbase;
     info->dli_saddr = (void *)0;
     info->dli_sname = NULL;
 
     /*
      * Walk the symbol list looking for the symbol whose address is
      * closest to the address sent in.
      */
     for (symoffset = 0; symoffset < obj->dynsymcount; symoffset++) {
         def = obj->symtab + symoffset;
 
         /*
          * For skip the symbol if st_shndx is either SHN_UNDEF or
          * SHN_COMMON.
          */
         if (def->st_shndx == SHN_UNDEF || def->st_shndx == SHN_COMMON)
             continue;
 
         /*
          * If the symbol is greater than the specified address, or if it
          * is further away from addr than the current nearest symbol,
          * then reject it.
          */
         symbol_addr = obj->relocbase + def->st_value;
         if (symbol_addr > addr || symbol_addr < info->dli_saddr)
             continue;
 
         /* Update our idea of the nearest symbol. */
         info->dli_sname = obj->strtab + def->st_name;
         info->dli_saddr = symbol_addr;
 
         /* Exact match? */
         if (info->dli_saddr == addr)
             break;
     }
     lock_release(rtld_bind_lock, &lockstate);
     return 1;
 }
 
 int
 dlinfo(void *handle, int request, void *p)
 {
     const Obj_Entry *obj;
     RtldLockState lockstate;
     int error;
 
     rlock_acquire(rtld_bind_lock, &lockstate);
 
     if (handle == NULL || handle == RTLD_SELF) {
 	void *retaddr;
 
 	retaddr = __builtin_return_address(0);	/* __GNUC__ only */
 	if ((obj = obj_from_addr(retaddr)) == NULL)
 	    _rtld_error("Cannot determine caller's shared object");
     } else
 	obj = dlcheck(handle);
 
     if (obj == NULL) {
 	lock_release(rtld_bind_lock, &lockstate);
 	return (-1);
     }
 
     error = 0;
     switch (request) {
     case RTLD_DI_LINKMAP:
 	*((struct link_map const **)p) = &obj->linkmap;
 	break;
     case RTLD_DI_ORIGIN:
 	error = rtld_dirname(obj->path, p);
 	break;
 
     case RTLD_DI_SERINFOSIZE:
     case RTLD_DI_SERINFO:
 	error = do_search_info(obj, request, (struct dl_serinfo *)p);
 	break;
 
     default:
 	_rtld_error("Invalid request %d passed to dlinfo()", request);
 	error = -1;
     }
 
     lock_release(rtld_bind_lock, &lockstate);
 
     return (error);
 }
 
 static void
 rtld_fill_dl_phdr_info(const Obj_Entry *obj, struct dl_phdr_info *phdr_info)
 {
 
 	phdr_info->dlpi_addr = (Elf_Addr)obj->relocbase;
 	phdr_info->dlpi_name = obj->path;
 	phdr_info->dlpi_phdr = obj->phdr;
 	phdr_info->dlpi_phnum = obj->phsize / sizeof(obj->phdr[0]);
 	phdr_info->dlpi_tls_modid = obj->tlsindex;
 	phdr_info->dlpi_tls_data = obj->tlsinit;
 	phdr_info->dlpi_adds = obj_loads;
 	phdr_info->dlpi_subs = obj_loads - obj_count;
 }
 
 int
 dl_iterate_phdr(__dl_iterate_hdr_callback callback, void *param)
 {
     struct dl_phdr_info phdr_info;
     const Obj_Entry *obj;
     RtldLockState bind_lockstate, phdr_lockstate;
     int error;
 
     wlock_acquire(rtld_phdr_lock, &phdr_lockstate);
     rlock_acquire(rtld_bind_lock, &bind_lockstate);
 
     error = 0;
 
     for (obj = obj_list;  obj != NULL;  obj = obj->next) {
 	rtld_fill_dl_phdr_info(obj, &phdr_info);
 	if ((error = callback(&phdr_info, sizeof phdr_info, param)) != 0)
 		break;
 
     }
     if (error == 0) {
 	rtld_fill_dl_phdr_info(&obj_rtld, &phdr_info);
 	error = callback(&phdr_info, sizeof(phdr_info), param);
     }
 
     lock_release(rtld_bind_lock, &bind_lockstate);
     lock_release(rtld_phdr_lock, &phdr_lockstate);
 
     return (error);
 }
 
 static void *
 fill_search_info(const char *dir, size_t dirlen, void *param)
 {
     struct fill_search_info_args *arg;
 
     arg = param;
 
     if (arg->request == RTLD_DI_SERINFOSIZE) {
 	arg->serinfo->dls_cnt ++;
 	arg->serinfo->dls_size += sizeof(struct dl_serpath) + dirlen + 1;
     } else {
 	struct dl_serpath *s_entry;
 
 	s_entry = arg->serpath;
 	s_entry->dls_name  = arg->strspace;
 	s_entry->dls_flags = arg->flags;
 
 	strncpy(arg->strspace, dir, dirlen);
 	arg->strspace[dirlen] = '\0';
 
 	arg->strspace += dirlen + 1;
 	arg->serpath++;
     }
 
     return (NULL);
 }
 
 static int
 do_search_info(const Obj_Entry *obj, int request, struct dl_serinfo *info)
 {
     struct dl_serinfo _info;
     struct fill_search_info_args args;
 
     args.request = RTLD_DI_SERINFOSIZE;
     args.serinfo = &_info;
 
     _info.dls_size = __offsetof(struct dl_serinfo, dls_serpath);
     _info.dls_cnt  = 0;
 
     path_enumerate(obj->rpath, fill_search_info, &args);
     path_enumerate(ld_library_path, fill_search_info, &args);
     path_enumerate(obj->runpath, fill_search_info, &args);
     path_enumerate(gethints(obj->z_nodeflib), fill_search_info, &args);
     if (!obj->z_nodeflib)
       path_enumerate(ld_standard_library_path, fill_search_info, &args);
 
 
     if (request == RTLD_DI_SERINFOSIZE) {
 	info->dls_size = _info.dls_size;
 	info->dls_cnt = _info.dls_cnt;
 	return (0);
     }
 
     if (info->dls_cnt != _info.dls_cnt || info->dls_size != _info.dls_size) {
 	_rtld_error("Uninitialized Dl_serinfo struct passed to dlinfo()");
 	return (-1);
     }
 
     args.request  = RTLD_DI_SERINFO;
     args.serinfo  = info;
     args.serpath  = &info->dls_serpath[0];
     args.strspace = (char *)&info->dls_serpath[_info.dls_cnt];
 
     args.flags = LA_SER_RUNPATH;
     if (path_enumerate(obj->rpath, fill_search_info, &args) != NULL)
 	return (-1);
 
     args.flags = LA_SER_LIBPATH;
     if (path_enumerate(ld_library_path, fill_search_info, &args) != NULL)
 	return (-1);
 
     args.flags = LA_SER_RUNPATH;
     if (path_enumerate(obj->runpath, fill_search_info, &args) != NULL)
 	return (-1);
 
     args.flags = LA_SER_CONFIG;
     if (path_enumerate(gethints(obj->z_nodeflib), fill_search_info, &args)
       != NULL)
 	return (-1);
 
     args.flags = LA_SER_DEFAULT;
     if (!obj->z_nodeflib &&
       path_enumerate(ld_standard_library_path, fill_search_info, &args) != NULL)
 	return (-1);
     return (0);
 }
 
 static int
 rtld_dirname(const char *path, char *bname)
 {
     const char *endp;
 
     /* Empty or NULL string gets treated as "." */
     if (path == NULL || *path == '\0') {
 	bname[0] = '.';
 	bname[1] = '\0';
 	return (0);
     }
 
     /* Strip trailing slashes */
     endp = path + strlen(path) - 1;
     while (endp > path && *endp == '/')
 	endp--;
 
     /* Find the start of the dir */
     while (endp > path && *endp != '/')
 	endp--;
 
     /* Either the dir is "/" or there are no slashes */
     if (endp == path) {
 	bname[0] = *endp == '/' ? '/' : '.';
 	bname[1] = '\0';
 	return (0);
     } else {
 	do {
 	    endp--;
 	} while (endp > path && *endp == '/');
     }
 
     if (endp - path + 2 > PATH_MAX)
     {
 	_rtld_error("Filename is too long: %s", path);
 	return(-1);
     }
 
     strncpy(bname, path, endp - path + 1);
     bname[endp - path + 1] = '\0';
     return (0);
 }
 
 static int
 rtld_dirname_abs(const char *path, char *base)
 {
 	char *last;
 
 	if (realpath(path, base) == NULL)
 		return (-1);
 	dbg("%s -> %s", path, base);
 	last = strrchr(base, '/');
 	if (last == NULL)
 		return (-1);
 	if (last != base)
 		*last = '\0';
 	return (0);
 }
 
 static void
 linkmap_add(Obj_Entry *obj)
 {
     struct link_map *l = &obj->linkmap;
     struct link_map *prev;
 
     obj->linkmap.l_name = obj->path;
     obj->linkmap.l_addr = obj->mapbase;
     obj->linkmap.l_ld = obj->dynamic;
 #ifdef __mips__
     /* GDB needs load offset on MIPS to use the symbols */
     obj->linkmap.l_offs = obj->relocbase;
 #endif
 
     if (r_debug.r_map == NULL) {
 	r_debug.r_map = l;
 	return;
     }
 
     /*
      * Scan to the end of the list, but not past the entry for the
      * dynamic linker, which we want to keep at the very end.
      */
     for (prev = r_debug.r_map;
       prev->l_next != NULL && prev->l_next != &obj_rtld.linkmap;
       prev = prev->l_next)
 	;
 
     /* Link in the new entry. */
     l->l_prev = prev;
     l->l_next = prev->l_next;
     if (l->l_next != NULL)
 	l->l_next->l_prev = l;
     prev->l_next = l;
 }
 
 static void
 linkmap_delete(Obj_Entry *obj)
 {
     struct link_map *l = &obj->linkmap;
 
     if (l->l_prev == NULL) {
 	if ((r_debug.r_map = l->l_next) != NULL)
 	    l->l_next->l_prev = NULL;
 	return;
     }
 
     if ((l->l_prev->l_next = l->l_next) != NULL)
 	l->l_next->l_prev = l->l_prev;
 }
 
 /*
  * Function for the debugger to set a breakpoint on to gain control.
  *
  * The two parameters allow the debugger to easily find and determine
  * what the runtime loader is doing and to whom it is doing it.
  *
  * When the loadhook trap is hit (r_debug_state, set at program
  * initialization), the arguments can be found on the stack:
  *
  *  +8   struct link_map *m
  *  +4   struct r_debug  *rd
  *  +0   RetAddr
  */
 void
 r_debug_state(struct r_debug* rd, struct link_map *m)
 {
     /*
      * The following is a hack to force the compiler to emit calls to
      * this function, even when optimizing.  If the function is empty,
      * the compiler is not obliged to emit any code for calls to it,
      * even when marked __noinline.  However, gdb depends on those
      * calls being made.
      */
     __compiler_membar();
 }
 
 /*
  * A function called after init routines have completed. This can be used to
  * break before a program's entry routine is called, and can be used when
  * main is not available in the symbol table.
  */
 void
 _r_debug_postinit(struct link_map *m)
 {
 
 	/* See r_debug_state(). */
 	__compiler_membar();
 }
 
 /*
  * Get address of the pointer variable in the main program.
  * Prefer non-weak symbol over the weak one.
  */
 static const void **
 get_program_var_addr(const char *name, RtldLockState *lockstate)
 {
     SymLook req;
     DoneList donelist;
 
     symlook_init(&req, name);
     req.lockstate = lockstate;
     donelist_init(&donelist);
     if (symlook_global(&req, &donelist) != 0)
 	return (NULL);
     if (ELF_ST_TYPE(req.sym_out->st_info) == STT_FUNC)
 	return ((const void **)make_function_pointer(req.sym_out,
 	  req.defobj_out));
     else if (ELF_ST_TYPE(req.sym_out->st_info) == STT_GNU_IFUNC)
 	return ((const void **)rtld_resolve_ifunc(req.defobj_out, req.sym_out));
     else
 	return ((const void **)(req.defobj_out->relocbase +
 	  req.sym_out->st_value));
 }
 
 /*
  * Set a pointer variable in the main program to the given value.  This
  * is used to set key variables such as "environ" before any of the
  * init functions are called.
  */
 static void
 set_program_var(const char *name, const void *value)
 {
     const void **addr;
 
     if ((addr = get_program_var_addr(name, NULL)) != NULL) {
 	dbg("\"%s\": *%p <-- %p", name, addr, value);
 	*addr = value;
     }
 }
 
 /*
  * Search the global objects, including dependencies and main object,
  * for the given symbol.
  */
 static int
 symlook_global(SymLook *req, DoneList *donelist)
 {
     SymLook req1;
     const Objlist_Entry *elm;
     int res;
 
     symlook_init_from_req(&req1, req);
 
     /* Search all objects loaded at program start up. */
     if (req->defobj_out == NULL ||
       ELF_ST_BIND(req->sym_out->st_info) == STB_WEAK) {
 	res = symlook_list(&req1, &list_main, donelist);
 	if (res == 0 && (req->defobj_out == NULL ||
 	  ELF_ST_BIND(req1.sym_out->st_info) != STB_WEAK)) {
 	    req->sym_out = req1.sym_out;
 	    req->defobj_out = req1.defobj_out;
 	    assert(req->defobj_out != NULL);
 	}
     }
 
     /* Search all DAGs whose roots are RTLD_GLOBAL objects. */
     STAILQ_FOREACH(elm, &list_global, link) {
 	if (req->defobj_out != NULL &&
 	  ELF_ST_BIND(req->sym_out->st_info) != STB_WEAK)
 	    break;
 	res = symlook_list(&req1, &elm->obj->dagmembers, donelist);
 	if (res == 0 && (req->defobj_out == NULL ||
 	  ELF_ST_BIND(req1.sym_out->st_info) != STB_WEAK)) {
 	    req->sym_out = req1.sym_out;
 	    req->defobj_out = req1.defobj_out;
 	    assert(req->defobj_out != NULL);
 	}
     }
 
     return (req->sym_out != NULL ? 0 : ESRCH);
 }
 
 /*
  * Given a symbol name in a referencing object, find the corresponding
  * definition of the symbol.  Returns a pointer to the symbol, or NULL if
  * no definition was found.  Returns a pointer to the Obj_Entry of the
  * defining object via the reference parameter DEFOBJ_OUT.
  */
 static int
 symlook_default(SymLook *req, const Obj_Entry *refobj)
 {
     DoneList donelist;
     const Objlist_Entry *elm;
     SymLook req1;
     int res;
 
     donelist_init(&donelist);
     symlook_init_from_req(&req1, req);
 
     /* Look first in the referencing object if linked symbolically. */
     if (refobj->symbolic && !donelist_check(&donelist, refobj)) {
 	res = symlook_obj(&req1, refobj);
 	if (res == 0) {
 	    req->sym_out = req1.sym_out;
 	    req->defobj_out = req1.defobj_out;
 	    assert(req->defobj_out != NULL);
 	}
     }
 
     symlook_global(req, &donelist);
 
     /* Search all dlopened DAGs containing the referencing object. */
     STAILQ_FOREACH(elm, &refobj->dldags, link) {
 	if (req->sym_out != NULL &&
 	  ELF_ST_BIND(req->sym_out->st_info) != STB_WEAK)
 	    break;
 	res = symlook_list(&req1, &elm->obj->dagmembers, &donelist);
 	if (res == 0 && (req->sym_out == NULL ||
 	  ELF_ST_BIND(req1.sym_out->st_info) != STB_WEAK)) {
 	    req->sym_out = req1.sym_out;
 	    req->defobj_out = req1.defobj_out;
 	    assert(req->defobj_out != NULL);
 	}
     }
 
     /*
      * Search the dynamic linker itself, and possibly resolve the
      * symbol from there.  This is how the application links to
      * dynamic linker services such as dlopen.
      */
     if (req->sym_out == NULL ||
       ELF_ST_BIND(req->sym_out->st_info) == STB_WEAK) {
 	res = symlook_obj(&req1, &obj_rtld);
 	if (res == 0) {
 	    req->sym_out = req1.sym_out;
 	    req->defobj_out = req1.defobj_out;
 	    assert(req->defobj_out != NULL);
 	}
     }
 
     return (req->sym_out != NULL ? 0 : ESRCH);
 }
 
 static int
 symlook_list(SymLook *req, const Objlist *objlist, DoneList *dlp)
 {
     const Elf_Sym *def;
     const Obj_Entry *defobj;
     const Objlist_Entry *elm;
     SymLook req1;
     int res;
 
     def = NULL;
     defobj = NULL;
     STAILQ_FOREACH(elm, objlist, link) {
 	if (donelist_check(dlp, elm->obj))
 	    continue;
 	symlook_init_from_req(&req1, req);
 	if ((res = symlook_obj(&req1, elm->obj)) == 0) {
 	    if (def == NULL || ELF_ST_BIND(req1.sym_out->st_info) != STB_WEAK) {
 		def = req1.sym_out;
 		defobj = req1.defobj_out;
 		if (ELF_ST_BIND(def->st_info) != STB_WEAK)
 		    break;
 	    }
 	}
     }
     if (def != NULL) {
 	req->sym_out = def;
 	req->defobj_out = defobj;
 	return (0);
     }
     return (ESRCH);
 }
 
 /*
  * Search the chain of DAGS cointed to by the given Needed_Entry
  * for a symbol of the given name.  Each DAG is scanned completely
  * before advancing to the next one.  Returns a pointer to the symbol,
  * or NULL if no definition was found.
  */
 static int
 symlook_needed(SymLook *req, const Needed_Entry *needed, DoneList *dlp)
 {
     const Elf_Sym *def;
     const Needed_Entry *n;
     const Obj_Entry *defobj;
     SymLook req1;
     int res;
 
     def = NULL;
     defobj = NULL;
     symlook_init_from_req(&req1, req);
     for (n = needed; n != NULL; n = n->next) {
 	if (n->obj == NULL ||
 	    (res = symlook_list(&req1, &n->obj->dagmembers, dlp)) != 0)
 	    continue;
 	if (def == NULL || ELF_ST_BIND(req1.sym_out->st_info) != STB_WEAK) {
 	    def = req1.sym_out;
 	    defobj = req1.defobj_out;
 	    if (ELF_ST_BIND(def->st_info) != STB_WEAK)
 		break;
 	}
     }
     if (def != NULL) {
 	req->sym_out = def;
 	req->defobj_out = defobj;
 	return (0);
     }
     return (ESRCH);
 }
 
 /*
  * Search the symbol table of a single shared object for a symbol of
  * the given name and version, if requested.  Returns a pointer to the
  * symbol, or NULL if no definition was found.  If the object is
  * filter, return filtered symbol from filtee.
  *
  * The symbol's hash value is passed in for efficiency reasons; that
  * eliminates many recomputations of the hash value.
  */
 int
 symlook_obj(SymLook *req, const Obj_Entry *obj)
 {
     DoneList donelist;
     SymLook req1;
     int flags, res, mres;
 
     /*
      * If there is at least one valid hash at this point, we prefer to
      * use the faster GNU version if available.
      */
     if (obj->valid_hash_gnu)
 	mres = symlook_obj1_gnu(req, obj);
     else if (obj->valid_hash_sysv)
 	mres = symlook_obj1_sysv(req, obj);
     else
 	return (EINVAL);
 
     if (mres == 0) {
 	if (obj->needed_filtees != NULL) {
 	    flags = (req->flags & SYMLOOK_EARLY) ? RTLD_LO_EARLY : 0;
 	    load_filtees(__DECONST(Obj_Entry *, obj), flags, req->lockstate);
 	    donelist_init(&donelist);
 	    symlook_init_from_req(&req1, req);
 	    res = symlook_needed(&req1, obj->needed_filtees, &donelist);
 	    if (res == 0) {
 		req->sym_out = req1.sym_out;
 		req->defobj_out = req1.defobj_out;
 	    }
 	    return (res);
 	}
 	if (obj->needed_aux_filtees != NULL) {
 	    flags = (req->flags & SYMLOOK_EARLY) ? RTLD_LO_EARLY : 0;
 	    load_filtees(__DECONST(Obj_Entry *, obj), flags, req->lockstate);
 	    donelist_init(&donelist);
 	    symlook_init_from_req(&req1, req);
 	    res = symlook_needed(&req1, obj->needed_aux_filtees, &donelist);
 	    if (res == 0) {
 		req->sym_out = req1.sym_out;
 		req->defobj_out = req1.defobj_out;
 		return (res);
 	    }
 	}
     }
     return (mres);
 }
 
 /* Symbol match routine common to both hash functions */
 static bool
 matched_symbol(SymLook *req, const Obj_Entry *obj, Sym_Match_Result *result,
     const unsigned long symnum)
 {
 	Elf_Versym verndx;
 	const Elf_Sym *symp;
 	const char *strp;
 
 	symp = obj->symtab + symnum;
 	strp = obj->strtab + symp->st_name;
 
 	switch (ELF_ST_TYPE(symp->st_info)) {
 	case STT_FUNC:
 	case STT_NOTYPE:
 	case STT_OBJECT:
 	case STT_COMMON:
 	case STT_GNU_IFUNC:
 		if (symp->st_value == 0)
 			return (false);
 		/* fallthrough */
 	case STT_TLS:
 		if (symp->st_shndx != SHN_UNDEF)
 			break;
 #ifndef __mips__
 		else if (((req->flags & SYMLOOK_IN_PLT) == 0) &&
 		    (ELF_ST_TYPE(symp->st_info) == STT_FUNC))
 			break;
 		/* fallthrough */
 #endif
 	default:
 		return (false);
 	}
 	if (req->name[0] != strp[0] || strcmp(req->name, strp) != 0)
 		return (false);
 
 	if (req->ventry == NULL) {
 		if (obj->versyms != NULL) {
 			verndx = VER_NDX(obj->versyms[symnum]);
 			if (verndx > obj->vernum) {
 				_rtld_error(
 				    "%s: symbol %s references wrong version %d",
 				    obj->path, obj->strtab + symnum, verndx);
 				return (false);
 			}
 			/*
 			 * If we are not called from dlsym (i.e. this
 			 * is a normal relocation from unversioned
 			 * binary), accept the symbol immediately if
 			 * it happens to have first version after this
 			 * shared object became versioned.  Otherwise,
 			 * if symbol is versioned and not hidden,
 			 * remember it. If it is the only symbol with
 			 * this name exported by the shared object, it
 			 * will be returned as a match by the calling
 			 * function. If symbol is global (verndx < 2)
 			 * accept it unconditionally.
 			 */
 			if ((req->flags & SYMLOOK_DLSYM) == 0 &&
 			    verndx == VER_NDX_GIVEN) {
 				result->sym_out = symp;
 				return (true);
 			}
 			else if (verndx >= VER_NDX_GIVEN) {
 				if ((obj->versyms[symnum] & VER_NDX_HIDDEN)
 				    == 0) {
 					if (result->vsymp == NULL)
 						result->vsymp = symp;
 					result->vcount++;
 				}
 				return (false);
 			}
 		}
 		result->sym_out = symp;
 		return (true);
 	}
 	if (obj->versyms == NULL) {
 		if (object_match_name(obj, req->ventry->name)) {
 			_rtld_error("%s: object %s should provide version %s "
 			    "for symbol %s", obj_rtld.path, obj->path,
 			    req->ventry->name, obj->strtab + symnum);
 			return (false);
 		}
 	} else {
 		verndx = VER_NDX(obj->versyms[symnum]);
 		if (verndx > obj->vernum) {
 			_rtld_error("%s: symbol %s references wrong version %d",
 			    obj->path, obj->strtab + symnum, verndx);
 			return (false);
 		}
 		if (obj->vertab[verndx].hash != req->ventry->hash ||
 		    strcmp(obj->vertab[verndx].name, req->ventry->name)) {
 			/*
 			 * Version does not match. Look if this is a
 			 * global symbol and if it is not hidden. If
 			 * global symbol (verndx < 2) is available,
 			 * use it. Do not return symbol if we are
 			 * called by dlvsym, because dlvsym looks for
 			 * a specific version and default one is not
 			 * what dlvsym wants.
 			 */
 			if ((req->flags & SYMLOOK_DLSYM) ||
 			    (verndx >= VER_NDX_GIVEN) ||
 			    (obj->versyms[symnum] & VER_NDX_HIDDEN))
 				return (false);
 		}
 	}
 	result->sym_out = symp;
 	return (true);
 }
 
 /*
  * Search for symbol using SysV hash function.
  * obj->buckets is known not to be NULL at this point; the test for this was
  * performed with the obj->valid_hash_sysv assignment.
  */
 static int
 symlook_obj1_sysv(SymLook *req, const Obj_Entry *obj)
 {
 	unsigned long symnum;
 	Sym_Match_Result matchres;
 
 	matchres.sym_out = NULL;
 	matchres.vsymp = NULL;
 	matchres.vcount = 0;
 
 	for (symnum = obj->buckets[req->hash % obj->nbuckets];
 	    symnum != STN_UNDEF; symnum = obj->chains[symnum]) {
 		if (symnum >= obj->nchains)
 			return (ESRCH);	/* Bad object */
 
 		if (matched_symbol(req, obj, &matchres, symnum)) {
 			req->sym_out = matchres.sym_out;
 			req->defobj_out = obj;
 			return (0);
 		}
 	}
 	if (matchres.vcount == 1) {
 		req->sym_out = matchres.vsymp;
 		req->defobj_out = obj;
 		return (0);
 	}
 	return (ESRCH);
 }
 
 /* Search for symbol using GNU hash function */
 static int
 symlook_obj1_gnu(SymLook *req, const Obj_Entry *obj)
 {
 	Elf_Addr bloom_word;
 	const Elf32_Word *hashval;
 	Elf32_Word bucket;
 	Sym_Match_Result matchres;
 	unsigned int h1, h2;
 	unsigned long symnum;
 
 	matchres.sym_out = NULL;
 	matchres.vsymp = NULL;
 	matchres.vcount = 0;
 
 	/* Pick right bitmask word from Bloom filter array */
 	bloom_word = obj->bloom_gnu[(req->hash_gnu / __ELF_WORD_SIZE) &
 	    obj->maskwords_bm_gnu];
 
 	/* Calculate modulus word size of gnu hash and its derivative */
 	h1 = req->hash_gnu & (__ELF_WORD_SIZE - 1);
 	h2 = ((req->hash_gnu >> obj->shift2_gnu) & (__ELF_WORD_SIZE - 1));
 
 	/* Filter out the "definitely not in set" queries */
 	if (((bloom_word >> h1) & (bloom_word >> h2) & 1) == 0)
 		return (ESRCH);
 
 	/* Locate hash chain and corresponding value element*/
 	bucket = obj->buckets_gnu[req->hash_gnu % obj->nbuckets_gnu];
 	if (bucket == 0)
 		return (ESRCH);
 	hashval = &obj->chain_zero_gnu[bucket];
 	do {
 		if (((*hashval ^ req->hash_gnu) >> 1) == 0) {
 			symnum = hashval - obj->chain_zero_gnu;
 			if (matched_symbol(req, obj, &matchres, symnum)) {
 				req->sym_out = matchres.sym_out;
 				req->defobj_out = obj;
 				return (0);
 			}
 		}
 	} while ((*hashval++ & 1) == 0);
 	if (matchres.vcount == 1) {
 		req->sym_out = matchres.vsymp;
 		req->defobj_out = obj;
 		return (0);
 	}
 	return (ESRCH);
 }
 
 static void
 trace_loaded_objects(Obj_Entry *obj)
 {
     char	*fmt1, *fmt2, *fmt, *main_local, *list_containers;
     int		c;
 
     if ((main_local = getenv(_LD("TRACE_LOADED_OBJECTS_PROGNAME"))) == NULL)
 	main_local = "";
 
     if ((fmt1 = getenv(_LD("TRACE_LOADED_OBJECTS_FMT1"))) == NULL)
 	fmt1 = "\t%o => %p (%x)\n";
 
     if ((fmt2 = getenv(_LD("TRACE_LOADED_OBJECTS_FMT2"))) == NULL)
 	fmt2 = "\t%o (%x)\n";
 
     list_containers = getenv(_LD("TRACE_LOADED_OBJECTS_ALL"));
 
     for (; obj; obj = obj->next) {
 	Needed_Entry		*needed;
 	char			*name, *path;
 	bool			is_lib;
 
 	if (list_containers && obj->needed != NULL)
 	    rtld_printf("%s:\n", obj->path);
 	for (needed = obj->needed; needed; needed = needed->next) {
 	    if (needed->obj != NULL) {
 		if (needed->obj->traced && !list_containers)
 		    continue;
 		needed->obj->traced = true;
 		path = needed->obj->path;
 	    } else
 		path = "not found";
 
 	    name = (char *)obj->strtab + needed->name;
 	    is_lib = strncmp(name, "lib", 3) == 0;	/* XXX - bogus */
 
 	    fmt = is_lib ? fmt1 : fmt2;
 	    while ((c = *fmt++) != '\0') {
 		switch (c) {
 		default:
 		    rtld_putchar(c);
 		    continue;
 		case '\\':
 		    switch (c = *fmt) {
 		    case '\0':
 			continue;
 		    case 'n':
 			rtld_putchar('\n');
 			break;
 		    case 't':
 			rtld_putchar('\t');
 			break;
 		    }
 		    break;
 		case '%':
 		    switch (c = *fmt) {
 		    case '\0':
 			continue;
 		    case '%':
 		    default:
 			rtld_putchar(c);
 			break;
 		    case 'A':
 			rtld_putstr(main_local);
 			break;
 		    case 'a':
 			rtld_putstr(obj_main->path);
 			break;
 		    case 'o':
 			rtld_putstr(name);
 			break;
 #if 0
 		    case 'm':
 			rtld_printf("%d", sodp->sod_major);
 			break;
 		    case 'n':
 			rtld_printf("%d", sodp->sod_minor);
 			break;
 #endif
 		    case 'p':
 			rtld_putstr(path);
 			break;
 		    case 'x':
 			rtld_printf("%p", needed->obj ? needed->obj->mapbase :
 			  0);
 			break;
 		    }
 		    break;
 		}
 		++fmt;
 	    }
 	}
     }
 }
 
 /*
  * Unload a dlopened object and its dependencies from memory and from
  * our data structures.  It is assumed that the DAG rooted in the
  * object has already been unreferenced, and that the object has a
  * reference count of 0.
  */
 static void
 unload_object(Obj_Entry *root)
 {
     Obj_Entry *obj;
     Obj_Entry **linkp;
 
     assert(root->refcount == 0);
 
     /*
      * Pass over the DAG removing unreferenced objects from
      * appropriate lists.
      */
     unlink_object(root);
 
     /* Unmap all objects that are no longer referenced. */
     linkp = &obj_list->next;
     while ((obj = *linkp) != NULL) {
 	if (obj->refcount == 0) {
 	    LD_UTRACE(UTRACE_UNLOAD_OBJECT, obj, obj->mapbase, obj->mapsize, 0,
 		obj->path);
 	    dbg("unloading \"%s\"", obj->path);
 	    unload_filtees(root);
 	    munmap(obj->mapbase, obj->mapsize);
 	    linkmap_delete(obj);
 	    *linkp = obj->next;
 	    obj_count--;
 	    obj_free(obj);
 	} else
 	    linkp = &obj->next;
     }
     obj_tail = linkp;
 }
 
 static void
 unlink_object(Obj_Entry *root)
 {
     Objlist_Entry *elm;
 
     if (root->refcount == 0) {
 	/* Remove the object from the RTLD_GLOBAL list. */
 	objlist_remove(&list_global, root);
 
     	/* Remove the object from all objects' DAG lists. */
     	STAILQ_FOREACH(elm, &root->dagmembers, link) {
 	    objlist_remove(&elm->obj->dldags, root);
 	    if (elm->obj != root)
 		unlink_object(elm->obj);
 	}
     }
 }
 
 static void
 ref_dag(Obj_Entry *root)
 {
     Objlist_Entry *elm;
 
     assert(root->dag_inited);
     STAILQ_FOREACH(elm, &root->dagmembers, link)
 	elm->obj->refcount++;
 }
 
 static void
 unref_dag(Obj_Entry *root)
 {
     Objlist_Entry *elm;
 
     assert(root->dag_inited);
     STAILQ_FOREACH(elm, &root->dagmembers, link)
 	elm->obj->refcount--;
 }
 
 /*
  * Common code for MD __tls_get_addr().
  */
 static void *tls_get_addr_slow(Elf_Addr **, int, size_t) __noinline;
 static void *
 tls_get_addr_slow(Elf_Addr **dtvp, int index, size_t offset)
 {
     Elf_Addr *newdtv, *dtv;
     RtldLockState lockstate;
     int to_copy;
 
     dtv = *dtvp;
     /* Check dtv generation in case new modules have arrived */
     if (dtv[0] != tls_dtv_generation) {
 	wlock_acquire(rtld_bind_lock, &lockstate);
 	newdtv = xcalloc(tls_max_index + 2, sizeof(Elf_Addr));
 	to_copy = dtv[1];
 	if (to_copy > tls_max_index)
 	    to_copy = tls_max_index;
 	memcpy(&newdtv[2], &dtv[2], to_copy * sizeof(Elf_Addr));
 	newdtv[0] = tls_dtv_generation;
 	newdtv[1] = tls_max_index;
 	free(dtv);
 	lock_release(rtld_bind_lock, &lockstate);
 	dtv = *dtvp = newdtv;
     }
 
     /* Dynamically allocate module TLS if necessary */
     if (dtv[index + 1] == 0) {
 	/* Signal safe, wlock will block out signals. */
 	wlock_acquire(rtld_bind_lock, &lockstate);
 	if (!dtv[index + 1])
 	    dtv[index + 1] = (Elf_Addr)allocate_module_tls(index);
 	lock_release(rtld_bind_lock, &lockstate);
     }
     return ((void *)(dtv[index + 1] + offset));
 }
 
 void *
 tls_get_addr_common(Elf_Addr **dtvp, int index, size_t offset)
 {
 	Elf_Addr *dtv;
 
 	dtv = *dtvp;
 	/* Check dtv generation in case new modules have arrived */
 	if (__predict_true(dtv[0] == tls_dtv_generation &&
 	    dtv[index + 1] != 0))
 		return ((void *)(dtv[index + 1] + offset));
 	return (tls_get_addr_slow(dtvp, index, offset));
 }
 
 #if defined(__aarch64__) || defined(__arm__) || defined(__mips__) || \
     defined(__powerpc__) || defined(__riscv__)
 
 /*
  * Allocate Static TLS using the Variant I method.
  */
 void *
 allocate_tls(Obj_Entry *objs, void *oldtcb, size_t tcbsize, size_t tcbalign)
 {
     Obj_Entry *obj;
     char *tcb;
     Elf_Addr **tls;
     Elf_Addr *dtv;
     Elf_Addr addr;
     int i;
 
     if (oldtcb != NULL && tcbsize == TLS_TCB_SIZE)
 	return (oldtcb);
 
     assert(tcbsize >= TLS_TCB_SIZE);
     tcb = xcalloc(1, tls_static_space - TLS_TCB_SIZE + tcbsize);
     tls = (Elf_Addr **)(tcb + tcbsize - TLS_TCB_SIZE);
 
     if (oldtcb != NULL) {
 	memcpy(tls, oldtcb, tls_static_space);
 	free(oldtcb);
 
 	/* Adjust the DTV. */
 	dtv = tls[0];
 	for (i = 0; i < dtv[1]; i++) {
 	    if (dtv[i+2] >= (Elf_Addr)oldtcb &&
 		dtv[i+2] < (Elf_Addr)oldtcb + tls_static_space) {
 		dtv[i+2] = dtv[i+2] - (Elf_Addr)oldtcb + (Elf_Addr)tls;
 	    }
 	}
     } else {
 	dtv = xcalloc(tls_max_index + 2, sizeof(Elf_Addr));
 	tls[0] = dtv;
 	dtv[0] = tls_dtv_generation;
 	dtv[1] = tls_max_index;
 
 	for (obj = objs; obj; obj = obj->next) {
 	    if (obj->tlsoffset > 0) {
 		addr = (Elf_Addr)tls + obj->tlsoffset;
 		if (obj->tlsinitsize > 0)
 		    memcpy((void*) addr, obj->tlsinit, obj->tlsinitsize);
 		if (obj->tlssize > obj->tlsinitsize)
 		    memset((void*) (addr + obj->tlsinitsize), 0,
 			   obj->tlssize - obj->tlsinitsize);
 		dtv[obj->tlsindex + 1] = addr;
 	    }
 	}
     }
 
     return (tcb);
 }
 
 void
 free_tls(void *tcb, size_t tcbsize, size_t tcbalign)
 {
     Elf_Addr *dtv;
     Elf_Addr tlsstart, tlsend;
     int dtvsize, i;
 
     assert(tcbsize >= TLS_TCB_SIZE);
 
     tlsstart = (Elf_Addr)tcb + tcbsize - TLS_TCB_SIZE;
     tlsend = tlsstart + tls_static_space;
 
     dtv = *(Elf_Addr **)tlsstart;
     dtvsize = dtv[1];
     for (i = 0; i < dtvsize; i++) {
 	if (dtv[i+2] && (dtv[i+2] < tlsstart || dtv[i+2] >= tlsend)) {
 	    free((void*)dtv[i+2]);
 	}
     }
     free(dtv);
     free(tcb);
 }
 
 #endif
 
 #if defined(__i386__) || defined(__amd64__) || defined(__sparc64__)
 
 /*
  * Allocate Static TLS using the Variant II method.
  */
 void *
 allocate_tls(Obj_Entry *objs, void *oldtls, size_t tcbsize, size_t tcbalign)
 {
     Obj_Entry *obj;
     size_t size, ralign;
     char *tls;
     Elf_Addr *dtv, *olddtv;
     Elf_Addr segbase, oldsegbase, addr;
     int i;
 
     ralign = tcbalign;
     if (tls_static_max_align > ralign)
 	    ralign = tls_static_max_align;
     size = round(tls_static_space, ralign) + round(tcbsize, ralign);
 
     assert(tcbsize >= 2*sizeof(Elf_Addr));
     tls = malloc_aligned(size, ralign);
     dtv = xcalloc(tls_max_index + 2, sizeof(Elf_Addr));
 
     segbase = (Elf_Addr)(tls + round(tls_static_space, ralign));
     ((Elf_Addr*)segbase)[0] = segbase;
     ((Elf_Addr*)segbase)[1] = (Elf_Addr) dtv;
 
     dtv[0] = tls_dtv_generation;
     dtv[1] = tls_max_index;
 
     if (oldtls) {
 	/*
 	 * Copy the static TLS block over whole.
 	 */
 	oldsegbase = (Elf_Addr) oldtls;
 	memcpy((void *)(segbase - tls_static_space),
 	       (const void *)(oldsegbase - tls_static_space),
 	       tls_static_space);
 
 	/*
 	 * If any dynamic TLS blocks have been created tls_get_addr(),
 	 * move them over.
 	 */
 	olddtv = ((Elf_Addr**)oldsegbase)[1];
 	for (i = 0; i < olddtv[1]; i++) {
 	    if (olddtv[i+2] < oldsegbase - size || olddtv[i+2] > oldsegbase) {
 		dtv[i+2] = olddtv[i+2];
 		olddtv[i+2] = 0;
 	    }
 	}
 
 	/*
 	 * We assume that this block was the one we created with
 	 * allocate_initial_tls().
 	 */
 	free_tls(oldtls, 2*sizeof(Elf_Addr), sizeof(Elf_Addr));
     } else {
 	for (obj = objs; obj; obj = obj->next) {
 	    if (obj->tlsoffset) {
 		addr = segbase - obj->tlsoffset;
 		memset((void*) (addr + obj->tlsinitsize),
 		       0, obj->tlssize - obj->tlsinitsize);
 		if (obj->tlsinit)
 		    memcpy((void*) addr, obj->tlsinit, obj->tlsinitsize);
 		dtv[obj->tlsindex + 1] = addr;
 	    }
 	}
     }
 
     return (void*) segbase;
 }
 
 void
 free_tls(void *tls, size_t tcbsize, size_t tcbalign)
 {
     Elf_Addr* dtv;
     size_t size, ralign;
     int dtvsize, i;
     Elf_Addr tlsstart, tlsend;
 
     /*
      * Figure out the size of the initial TLS block so that we can
      * find stuff which ___tls_get_addr() allocated dynamically.
      */
     ralign = tcbalign;
     if (tls_static_max_align > ralign)
 	    ralign = tls_static_max_align;
     size = round(tls_static_space, ralign);
 
     dtv = ((Elf_Addr**)tls)[1];
     dtvsize = dtv[1];
     tlsend = (Elf_Addr) tls;
     tlsstart = tlsend - size;
     for (i = 0; i < dtvsize; i++) {
 	if (dtv[i + 2] != 0 && (dtv[i + 2] < tlsstart || dtv[i + 2] > tlsend)) {
 		free_aligned((void *)dtv[i + 2]);
 	}
     }
 
     free_aligned((void *)tlsstart);
     free((void*) dtv);
 }
 
 #endif
 
 /*
  * Allocate TLS block for module with given index.
  */
 void *
 allocate_module_tls(int index)
 {
     Obj_Entry* obj;
     char* p;
 
     for (obj = obj_list; obj; obj = obj->next) {
 	if (obj->tlsindex == index)
 	    break;
     }
     if (!obj) {
 	_rtld_error("Can't find module with TLS index %d", index);
 	rtld_die();
     }
 
     p = malloc_aligned(obj->tlssize, obj->tlsalign);
     memcpy(p, obj->tlsinit, obj->tlsinitsize);
     memset(p + obj->tlsinitsize, 0, obj->tlssize - obj->tlsinitsize);
 
     return p;
 }
 
 bool
 allocate_tls_offset(Obj_Entry *obj)
 {
     size_t off;
 
     if (obj->tls_done)
 	return true;
 
     if (obj->tlssize == 0) {
 	obj->tls_done = true;
 	return true;
     }
 
     if (tls_last_offset == 0)
 	off = calculate_first_tls_offset(obj->tlssize, obj->tlsalign);
     else
 	off = calculate_tls_offset(tls_last_offset, tls_last_size,
 				   obj->tlssize, obj->tlsalign);
 
     /*
      * If we have already fixed the size of the static TLS block, we
      * must stay within that size. When allocating the static TLS, we
      * leave a small amount of space spare to be used for dynamically
      * loading modules which use static TLS.
      */
     if (tls_static_space != 0) {
 	if (calculate_tls_end(off, obj->tlssize) > tls_static_space)
 	    return false;
     } else if (obj->tlsalign > tls_static_max_align) {
 	    tls_static_max_align = obj->tlsalign;
     }
 
     tls_last_offset = obj->tlsoffset = off;
     tls_last_size = obj->tlssize;
     obj->tls_done = true;
 
     return true;
 }
 
 void
 free_tls_offset(Obj_Entry *obj)
 {
 
     /*
      * If we were the last thing to allocate out of the static TLS
      * block, we give our space back to the 'allocator'. This is a
      * simplistic workaround to allow libGL.so.1 to be loaded and
      * unloaded multiple times.
      */
     if (calculate_tls_end(obj->tlsoffset, obj->tlssize)
 	== calculate_tls_end(tls_last_offset, tls_last_size)) {
 	tls_last_offset -= obj->tlssize;
 	tls_last_size = 0;
     }
 }
 
 void *
 _rtld_allocate_tls(void *oldtls, size_t tcbsize, size_t tcbalign)
 {
     void *ret;
     RtldLockState lockstate;
 
     wlock_acquire(rtld_bind_lock, &lockstate);
     ret = allocate_tls(obj_list, oldtls, tcbsize, tcbalign);
     lock_release(rtld_bind_lock, &lockstate);
     return (ret);
 }
 
 void
 _rtld_free_tls(void *tcb, size_t tcbsize, size_t tcbalign)
 {
     RtldLockState lockstate;
 
     wlock_acquire(rtld_bind_lock, &lockstate);
     free_tls(tcb, tcbsize, tcbalign);
     lock_release(rtld_bind_lock, &lockstate);
 }
 
 static void
 object_add_name(Obj_Entry *obj, const char *name)
 {
     Name_Entry *entry;
     size_t len;
 
     len = strlen(name);
     entry = malloc(sizeof(Name_Entry) + len);
 
     if (entry != NULL) {
 	strcpy(entry->name, name);
 	STAILQ_INSERT_TAIL(&obj->names, entry, link);
     }
 }
 
 static int
 object_match_name(const Obj_Entry *obj, const char *name)
 {
     Name_Entry *entry;
 
     STAILQ_FOREACH(entry, &obj->names, link) {
 	if (strcmp(name, entry->name) == 0)
 	    return (1);
     }
     return (0);
 }
 
 static Obj_Entry *
 locate_dependency(const Obj_Entry *obj, const char *name)
 {
     const Objlist_Entry *entry;
     const Needed_Entry *needed;
 
     STAILQ_FOREACH(entry, &list_main, link) {
 	if (object_match_name(entry->obj, name))
 	    return entry->obj;
     }
 
     for (needed = obj->needed;  needed != NULL;  needed = needed->next) {
 	if (strcmp(obj->strtab + needed->name, name) == 0 ||
 	  (needed->obj != NULL && object_match_name(needed->obj, name))) {
 	    /*
 	     * If there is DT_NEEDED for the name we are looking for,
 	     * we are all set.  Note that object might not be found if
 	     * dependency was not loaded yet, so the function can
 	     * return NULL here.  This is expected and handled
 	     * properly by the caller.
 	     */
 	    return (needed->obj);
 	}
     }
     _rtld_error("%s: Unexpected inconsistency: dependency %s not found",
 	obj->path, name);
     rtld_die();
 }
 
 static int
 check_object_provided_version(Obj_Entry *refobj, const Obj_Entry *depobj,
     const Elf_Vernaux *vna)
 {
     const Elf_Verdef *vd;
     const char *vername;
 
     vername = refobj->strtab + vna->vna_name;
     vd = depobj->verdef;
     if (vd == NULL) {
 	_rtld_error("%s: version %s required by %s not defined",
 	    depobj->path, vername, refobj->path);
 	return (-1);
     }
     for (;;) {
 	if (vd->vd_version != VER_DEF_CURRENT) {
 	    _rtld_error("%s: Unsupported version %d of Elf_Verdef entry",
 		depobj->path, vd->vd_version);
 	    return (-1);
 	}
 	if (vna->vna_hash == vd->vd_hash) {
 	    const Elf_Verdaux *aux = (const Elf_Verdaux *)
 		((char *)vd + vd->vd_aux);
 	    if (strcmp(vername, depobj->strtab + aux->vda_name) == 0)
 		return (0);
 	}
 	if (vd->vd_next == 0)
 	    break;
 	vd = (const Elf_Verdef *) ((char *)vd + vd->vd_next);
     }
     if (vna->vna_flags & VER_FLG_WEAK)
 	return (0);
     _rtld_error("%s: version %s required by %s not found",
 	depobj->path, vername, refobj->path);
     return (-1);
 }
 
 static int
 rtld_verify_object_versions(Obj_Entry *obj)
 {
     const Elf_Verneed *vn;
     const Elf_Verdef  *vd;
     const Elf_Verdaux *vda;
     const Elf_Vernaux *vna;
     const Obj_Entry *depobj;
     int maxvernum, vernum;
 
     if (obj->ver_checked)
 	return (0);
     obj->ver_checked = true;
 
     maxvernum = 0;
     /*
      * Walk over defined and required version records and figure out
      * max index used by any of them. Do very basic sanity checking
      * while there.
      */
     vn = obj->verneed;
     while (vn != NULL) {
 	if (vn->vn_version != VER_NEED_CURRENT) {
 	    _rtld_error("%s: Unsupported version %d of Elf_Verneed entry",
 		obj->path, vn->vn_version);
 	    return (-1);
 	}
 	vna = (const Elf_Vernaux *) ((char *)vn + vn->vn_aux);
 	for (;;) {
 	    vernum = VER_NEED_IDX(vna->vna_other);
 	    if (vernum > maxvernum)
 		maxvernum = vernum;
 	    if (vna->vna_next == 0)
 		 break;
 	    vna = (const Elf_Vernaux *) ((char *)vna + vna->vna_next);
 	}
 	if (vn->vn_next == 0)
 	    break;
 	vn = (const Elf_Verneed *) ((char *)vn + vn->vn_next);
     }
 
     vd = obj->verdef;
     while (vd != NULL) {
 	if (vd->vd_version != VER_DEF_CURRENT) {
 	    _rtld_error("%s: Unsupported version %d of Elf_Verdef entry",
 		obj->path, vd->vd_version);
 	    return (-1);
 	}
 	vernum = VER_DEF_IDX(vd->vd_ndx);
 	if (vernum > maxvernum)
 		maxvernum = vernum;
 	if (vd->vd_next == 0)
 	    break;
 	vd = (const Elf_Verdef *) ((char *)vd + vd->vd_next);
     }
 
     if (maxvernum == 0)
 	return (0);
 
     /*
      * Store version information in array indexable by version index.
      * Verify that object version requirements are satisfied along the
      * way.
      */
     obj->vernum = maxvernum + 1;
     obj->vertab = xcalloc(obj->vernum, sizeof(Ver_Entry));
 
     vd = obj->verdef;
     while (vd != NULL) {
 	if ((vd->vd_flags & VER_FLG_BASE) == 0) {
 	    vernum = VER_DEF_IDX(vd->vd_ndx);
 	    assert(vernum <= maxvernum);
 	    vda = (const Elf_Verdaux *)((char *)vd + vd->vd_aux);
 	    obj->vertab[vernum].hash = vd->vd_hash;
 	    obj->vertab[vernum].name = obj->strtab + vda->vda_name;
 	    obj->vertab[vernum].file = NULL;
 	    obj->vertab[vernum].flags = 0;
 	}
 	if (vd->vd_next == 0)
 	    break;
 	vd = (const Elf_Verdef *) ((char *)vd + vd->vd_next);
     }
 
     vn = obj->verneed;
     while (vn != NULL) {
 	depobj = locate_dependency(obj, obj->strtab + vn->vn_file);
 	if (depobj == NULL)
 	    return (-1);
 	vna = (const Elf_Vernaux *) ((char *)vn + vn->vn_aux);
 	for (;;) {
 	    if (check_object_provided_version(obj, depobj, vna))
 		return (-1);
 	    vernum = VER_NEED_IDX(vna->vna_other);
 	    assert(vernum <= maxvernum);
 	    obj->vertab[vernum].hash = vna->vna_hash;
 	    obj->vertab[vernum].name = obj->strtab + vna->vna_name;
 	    obj->vertab[vernum].file = obj->strtab + vn->vn_file;
 	    obj->vertab[vernum].flags = (vna->vna_other & VER_NEED_HIDDEN) ?
 		VER_INFO_HIDDEN : 0;
 	    if (vna->vna_next == 0)
 		 break;
 	    vna = (const Elf_Vernaux *) ((char *)vna + vna->vna_next);
 	}
 	if (vn->vn_next == 0)
 	    break;
 	vn = (const Elf_Verneed *) ((char *)vn + vn->vn_next);
     }
     return 0;
 }
 
 static int
 rtld_verify_versions(const Objlist *objlist)
 {
     Objlist_Entry *entry;
     int rc;
 
     rc = 0;
     STAILQ_FOREACH(entry, objlist, link) {
 	/*
 	 * Skip dummy objects or objects that have their version requirements
 	 * already checked.
 	 */
 	if (entry->obj->strtab == NULL || entry->obj->vertab != NULL)
 	    continue;
 	if (rtld_verify_object_versions(entry->obj) == -1) {
 	    rc = -1;
 	    if (ld_tracing == NULL)
 		break;
 	}
     }
     if (rc == 0 || ld_tracing != NULL)
     	rc = rtld_verify_object_versions(&obj_rtld);
     return rc;
 }
 
 const Ver_Entry *
 fetch_ventry(const Obj_Entry *obj, unsigned long symnum)
 {
     Elf_Versym vernum;
 
     if (obj->vertab) {
 	vernum = VER_NDX(obj->versyms[symnum]);
 	if (vernum >= obj->vernum) {
 	    _rtld_error("%s: symbol %s has wrong verneed value %d",
 		obj->path, obj->strtab + symnum, vernum);
 	} else if (obj->vertab[vernum].hash != 0) {
 	    return &obj->vertab[vernum];
 	}
     }
     return NULL;
 }
 
 int
 _rtld_get_stack_prot(void)
 {
 
 	return (stack_prot);
 }
 
 int
 _rtld_is_dlopened(void *arg)
 {
 	Obj_Entry *obj;
 	RtldLockState lockstate;
 	int res;
 
 	rlock_acquire(rtld_bind_lock, &lockstate);
 	obj = dlcheck(arg);
 	if (obj == NULL)
 		obj = obj_from_addr(arg);
 	if (obj == NULL) {
 		_rtld_error("No shared object contains address");
 		lock_release(rtld_bind_lock, &lockstate);
 		return (-1);
 	}
 	res = obj->dlopened ? 1 : 0;
 	lock_release(rtld_bind_lock, &lockstate);
 	return (res);
 }
 
 static void
 map_stacks_exec(RtldLockState *lockstate)
 {
 	void (*thr_map_stacks_exec)(void);
 
 	if ((max_stack_flags & PF_X) == 0 || (stack_prot & PROT_EXEC) != 0)
 		return;
 	thr_map_stacks_exec = (void (*)(void))(uintptr_t)
 	    get_program_var_addr("__pthread_map_stacks_exec", lockstate);
 	if (thr_map_stacks_exec != NULL) {
 		stack_prot |= PROT_EXEC;
 		thr_map_stacks_exec();
 	}
 }
 
 void
 symlook_init(SymLook *dst, const char *name)
 {
 
 	bzero(dst, sizeof(*dst));
 	dst->name = name;
 	dst->hash = elf_hash(name);
 	dst->hash_gnu = gnu_hash(name);
 }
 
 static void
 symlook_init_from_req(SymLook *dst, const SymLook *src)
 {
 
 	dst->name = src->name;
 	dst->hash = src->hash;
 	dst->hash_gnu = src->hash_gnu;
 	dst->ventry = src->ventry;
 	dst->flags = src->flags;
 	dst->defobj_out = NULL;
 	dst->sym_out = NULL;
 	dst->lockstate = src->lockstate;
 }
 
 
 /*
  * Parse a file descriptor number without pulling in more of libc (e.g. atoi).
  */
 static int
 parse_libdir(const char *str)
 {
 	static const int RADIX = 10;  /* XXXJA: possibly support hex? */
 	const char *orig;
 	int fd;
 	char c;
 
 	orig = str;
 	fd = 0;
 	for (c = *str; c != '\0'; c = *++str) {
 		if (c < '0' || c > '9')
 			return (-1);
 
 		fd *= RADIX;
 		fd += c - '0';
 	}
 
 	/* Make sure we actually parsed something. */
 	if (str == orig) {
 		_rtld_error("failed to parse directory FD from '%s'", str);
 		return (-1);
 	}
 	return (fd);
 }
 
 /*
  * Overrides for libc_pic-provided functions.
  */
 
 int
 __getosreldate(void)
 {
 	size_t len;
 	int oid[2];
 	int error, osrel;
 
 	if (osreldate != 0)
 		return (osreldate);
 
 	oid[0] = CTL_KERN;
 	oid[1] = KERN_OSRELDATE;
 	osrel = 0;
 	len = sizeof(osrel);
 	error = sysctl(oid, 2, &osrel, &len, NULL, 0);
 	if (error == 0 && osrel > 0 && len == sizeof(osrel))
 		osreldate = osrel;
 	return (osreldate);
 }
 
 void
 exit(int status)
 {
 
 	_exit(status);
 }
 
 void (*__cleanup)(void);
 int __isthreaded = 0;
 int _thread_autoinit_dummy_decl = 1;
 
 /*
  * No unresolved symbols for rtld.
  */
 void
 __pthread_cxa_finalize(struct dl_phdr_info *a)
 {
 }
 
 void
 __stack_chk_fail(void)
 {
 
 	_rtld_error("stack overflow detected; terminated");
 	rtld_die();
 }
 __weak_reference(__stack_chk_fail, __stack_chk_fail_local);
 
 void
 __chk_fail(void)
 {
 
 	_rtld_error("buffer overflow detected; terminated");
 	rtld_die();
 }
 
 const char *
 rtld_strerror(int errnum)
 {
 
 	if (errnum < 0 || errnum >= sys_nerr)
 		return ("Unknown error");
 	return (sys_errlist[errnum]);
 }
Index: projects/release-pkg/release/Makefile
===================================================================
--- projects/release-pkg/release/Makefile	(revision 293335)
+++ projects/release-pkg/release/Makefile	(revision 293336)
@@ -1,306 +1,310 @@
 # $FreeBSD$
 #
 # Makefile for building releases and release media.
 #
 # User-driven targets:
 #  cdrom: Builds release CD-ROM media (disc1.iso)
 #  dvdrom: Builds release DVD-ROM media (dvd1.iso)
 #  memstick: Builds memory stick image (memstick.img)
 #  mini-memstick: Builds minimal memory stick image (mini-memstick.img)
 #  ftp: Sets up FTP distribution area (ftp)
 #  release: Invokes real-release, vm-release, and cloudware-release targets
 #  real-release: Build all media and FTP distribution area
 #  vm-release: Build all virtual machine image targets
 #  cloudware-release: Build all cloud hosting provider targets
 #  install: Invokes the release-install and vm-install targets
 #  release-install: Copies all release installation media into ${DESTDIR}
 #  vm-install: Copies all virtual machine images into ${DESTDIR}
 #
 # Variables affecting the build process:
 #  WORLDDIR: location of src tree -- must have built world and default kernel
 #            (by default, the directory above this one)
 #  PORTSDIR: location of ports tree to distribute (default: /usr/ports)
 #  DOCDIR:   location of doc tree (default: /usr/doc)
 #  XTRADIR:  xtra-bits-dir argument for <arch>/mkisoimages.sh
 #  NOPKG:    if set, do not distribute third-party packages
 #  NOPORTS:  if set, do not distribute ports tree
 #  NOSRC:    if set, do not distribute source tree
 #  NODOC:    if set, do not generate release documentation
 #  WITH_DVD: if set, generate dvd1.iso
 #  WITH_COMPRESSED_IMAGES: if set, compress installation images with xz(1)
 #		(uncompressed images are not removed)
 #  WITH_VMIMAGES: if set, build virtual machine images with the release
 #  WITH_COMPRESSED_VMIMAGES: if set, compress virtual machine disk images
 #  		with xz(1) (extremely time consuming)
 #  WITH_CLOUDWARE: if set, build cloud hosting disk images with the release
 #  TARGET/TARGET_ARCH: architecture of built release
 #
 
 WORLDDIR?=	${.CURDIR}/..
 PORTSDIR?=	/usr/ports
 DOCDIR?=	/usr/doc
 RELNOTES_LANG?= en_US.ISO8859-1
 
 .if !defined(TARGET) || empty(TARGET)
 TARGET=		${MACHINE}
 .endif
 .if !defined(TARGET_ARCH) || empty(TARGET_ARCH)
 .if ${TARGET} == ${MACHINE}
 TARGET_ARCH=	${MACHINE_ARCH}
 .else
 TARGET_ARCH=	${TARGET}
 .endif
 .endif
 IMAKE=		${MAKE} TARGET_ARCH=${TARGET_ARCH} TARGET=${TARGET}
 DISTDIR=	dist
 
 # Define OSRELEASE by using newvars.sh
 .if !defined(OSRELEASE) || empty(OSRELEASE)
 .for _V in TYPE BRANCH REVISION
 ${_V}!=	eval $$(awk '/^${_V}=/{print}' ${.CURDIR}/../sys/conf/newvers.sh); echo $$${_V}
 .endfor
 .for _V in ${TARGET_ARCH}
 .if !empty(TARGET:M${_V})
 OSRELEASE=	${TYPE}-${REVISION}-${BRANCH}-${TARGET}
 VOLUME_LABEL=	${REVISION:C/[.-]/_/g}_${BRANCH:C/[.-]/_/g}_${TARGET}
 .else
 OSRELEASE=	${TYPE}-${REVISION}-${BRANCH}-${TARGET}-${TARGET_ARCH}
 VOLUME_LABEL=	${REVISION:C/[.-]/_/g}_${BRANCH:C/[.-]/_/g}_${TARGET_ARCH}
 .endif
 .endfor
 .endif
 
 .if !defined(VOLUME_LABEL) || empty(VOLUME_LABEL)
 VOLUME_LABEL=	FreeBSD_Install
 .endif
 
 .if !exists(${DOCDIR})
 NODOC= true
 .endif
 .if !exists(${PORTSDIR})
 NOPORTS= true
 .endif
 
 EXTRA_PACKAGES=
 .if !defined(NOPORTS)
 EXTRA_PACKAGES+= ports.txz
 .endif
 .if !defined(NOSRC)
 EXTRA_PACKAGES+= src.txz
 .endif
 .if !defined(NODOC)
 EXTRA_PACKAGES+= reldoc
 .endif
 
 RELEASE_TARGETS= ftp
 IMAGES=
 .if exists(${.CURDIR}/${TARGET}/mkisoimages.sh)
 RELEASE_TARGETS+= cdrom
 IMAGES+=	disc1.iso bootonly.iso
 . if defined(WITH_DVD) && !empty(WITH_DVD)
 RELEASE_TARGETS+= dvdrom
 IMAGES+=	dvd1.iso
 . endif
 .endif
 .if exists(${.CURDIR}/${TARGET}/make-memstick.sh)
 RELEASE_TARGETS+= memstick.img
 RELEASE_TARGETS+= mini-memstick.img
 IMAGES+=	memstick.img
 IMAGES+=	mini-memstick.img
 .endif
 
 CLEANFILES=	packagesystem *.txz MANIFEST release ${IMAGES}
 .if defined(WITH_COMPRESSED_IMAGES) && !empty(WITH_COMPRESSED_IMAGES)
 . for I in ${IMAGES}
 CLEANFILES+=	${I}.xz
 . endfor
 .endif
 .if defined(WITH_DVD) && !empty(WITH_DVD)
 CLEANFILES+=	pkg-stage
 .endif
 CLEANDIRS=	dist ftp disc1 bootonly dvd
 beforeclean:
 	chflags -R noschg .
 .include <bsd.obj.mk>
 clean: beforeclean
 
 base.txz:
 	mkdir -p ${DISTDIR}
 	cd ${WORLDDIR} && ${IMAKE} distributeworld DISTDIR=${.OBJDIR}/${DISTDIR}
 # Set up mergemaster root database
 	sh ${.CURDIR}/scripts/mm-mtree.sh -m ${WORLDDIR} -F \
 	    "TARGET_ARCH=${TARGET_ARCH} TARGET=${TARGET}" -D "${.OBJDIR}/${DISTDIR}/base"
 	etcupdate extract -B -M "TARGET_ARCH=${TARGET_ARCH} TARGET=${TARGET}" \
 	    -s ${WORLDDIR} -d "${.OBJDIR}/${DISTDIR}/base/var/db/etcupdate"
 # Package all components
 	cd ${WORLDDIR} && ${IMAKE} packageworld DISTDIR=${.OBJDIR}/${DISTDIR}
 	mv ${DISTDIR}/*.txz .
 
 kernel.txz:
 	mkdir -p ${DISTDIR}
 	cd ${WORLDDIR} && ${IMAKE} distributekernel packagekernel DISTDIR=${.OBJDIR}/${DISTDIR}
 	mv ${DISTDIR}/kernel*.txz .
 
 src.txz:
 	mkdir -p ${DISTDIR}/usr
 	ln -fs ${WORLDDIR} ${DISTDIR}/usr/src
 	cd ${DISTDIR} && tar cLvf - --exclude .svn --exclude .zfs \
 	    --exclude .git --exclude @ --exclude usr/src/release/dist usr/src | \
 	    ${XZ_CMD} > ${.OBJDIR}/src.txz
 
 ports.txz:
 	mkdir -p ${DISTDIR}/usr
 	ln -fs ${PORTSDIR} ${DISTDIR}/usr/ports
 	cd ${DISTDIR} && tar cLvf - \
 	    --exclude .git --exclude .svn \
 	    --exclude usr/ports/distfiles --exclude usr/ports/packages \
 	    --exclude 'usr/ports/INDEX*' --exclude work usr/ports | \
 	    ${XZ_CMD} > ${.OBJDIR}/ports.txz
 
 reldoc:
 	cd ${.CURDIR}/doc && ${MAKE} all install clean 'FORMATS=html txt' \
 	    INSTALL_COMPRESSED='' URLS_ABSOLUTE=YES DOCDIR=${.OBJDIR}/rdoc
 	mkdir -p reldoc
 .for i in hardware readme relnotes errata
 	ln -f rdoc/${RELNOTES_LANG}/${i}/article.txt reldoc/${i:tu}.TXT
 	ln -f rdoc/${RELNOTES_LANG}/${i}/article.html reldoc/${i:tu}.HTM
 .endfor
 	cp rdoc/${RELNOTES_LANG}/readme/docbook.css reldoc
 
 disc1: packagesystem
 # Install system
 	mkdir -p ${.TARGET}
 	cd ${WORLDDIR} && ${IMAKE} installkernel installworld distribution \
 		DESTDIR=${.OBJDIR}/${.TARGET} MK_RESCUE=no MK_KERNEL_SYMBOLS=no \
 		MK_PROFILE=no MK_SENDMAIL=no MK_TESTS=no MK_LIB32=no \
 		MK_DEBUG_FILES=no
 # Copy distfiles
 	mkdir -p ${.TARGET}/usr/freebsd-dist
 	for dist in MANIFEST $$(ls *.txz | grep -vE -- '(base|lib32)-dbg'); \
 	    do cp $${dist} ${.TARGET}/usr/freebsd-dist; \
 	done
 # Copy documentation, if generated
 .if !defined(NODOC)
 	cp reldoc/* ${.TARGET}
 .endif
 # Set up installation environment
 	ln -fs /tmp/bsdinstall_etc/resolv.conf ${.TARGET}/etc/resolv.conf
 	echo sendmail_enable=\"NONE\" > ${.TARGET}/etc/rc.conf
 	echo hostid_enable=\"NO\" >> ${.TARGET}/etc/rc.conf
 	echo debug.witness.trace=0 >> ${.TARGET}/etc/sysctl.conf
 	echo vfs.mountroot.timeout=\"10\" >> ${.TARGET}/boot/loader.conf
 	cp ${.CURDIR}/rc.local ${.TARGET}/etc
 	touch ${.TARGET}
 
 bootonly: packagesystem
 # Install system
 	mkdir -p ${.TARGET}
 	cd ${WORLDDIR} && ${IMAKE} installkernel installworld distribution \
 	    DESTDIR=${.OBJDIR}/${.TARGET} MK_AMD=no MK_AT=no \
 	    MK_GAMES=no MK_GROFF=no \
 	    MK_INSTALLLIB=no MK_LIB32=no MK_MAIL=no \
 	    MK_NCP=no MK_TOOLCHAIN=no MK_PROFILE=no \
 	    MK_INSTALLIB=no MK_RESCUE=no MK_DICT=no \
 	    MK_KERNEL_SYMBOLS=no MK_TESTS=no MK_DEBUG_FILES=no
 # Copy manifest only (no distfiles) to get checksums
 	mkdir -p ${.TARGET}/usr/freebsd-dist
 	cp MANIFEST ${.TARGET}/usr/freebsd-dist
 # Copy documentation, if generated
 .if !defined(NODOC)
 	cp reldoc/* ${.TARGET}
 .endif
 # Set up installation environment
 	ln -fs /tmp/bsdinstall_etc/resolv.conf ${.TARGET}/etc/resolv.conf
 	echo sendmail_enable=\"NONE\" > ${.TARGET}/etc/rc.conf
 	echo hostid_enable=\"NO\" >> ${.TARGET}/etc/rc.conf
 	echo debug.witness.trace=0 >> ${.TARGET}/etc/sysctl.conf
 	echo vfs.mountroot.timeout=\"10\" >> ${.TARGET}/boot/loader.conf
 	cp ${.CURDIR}/rc.local ${.TARGET}/etc
 
 dvd: packagesystem
 # Install system
 	mkdir -p ${.TARGET}
 	cd ${WORLDDIR} && ${IMAKE} installkernel installworld distribution \
 		DESTDIR=${.OBJDIR}/${.TARGET} MK_RESCUE=no MK_KERNEL_SYMBOLS=no \
 		MK_TESTS=no MK_DEBUG_FILES=no
 # Copy distfiles
 	mkdir -p ${.TARGET}/usr/freebsd-dist
 	for dist in MANIFEST $$(ls *.txz | grep -v -- '(base|lib32)-dbg'); \
 	    do cp $${dist} ${.TARGET}/usr/freebsd-dist; \
 	done
 # Copy documentation, if generated
 .if !defined(NODOC)
 	cp reldoc/* ${.TARGET}
 .endif
 # Set up installation environment
 	ln -fs /tmp/bsdinstall_etc/resolv.conf ${.TARGET}/etc/resolv.conf
 	echo sendmail_enable=\"NONE\" > ${.TARGET}/etc/rc.conf
 	echo hostid_enable=\"NO\" >> ${.TARGET}/etc/rc.conf
 	echo debug.witness.trace=0 >> ${.TARGET}/etc/sysctl.conf
 	echo vfs.mountroot.timeout=\"10\" >> ${.TARGET}/boot/loader.conf
 	cp ${.CURDIR}/rc.local ${.TARGET}/etc
 	touch ${.TARGET}
 
 release.iso: disc1.iso
 disc1.iso: disc1
 	sh ${.CURDIR}/${TARGET}/mkisoimages.sh -b ${VOLUME_LABEL}_CD ${.TARGET} disc1 ${XTRADIR}
 
 dvd1.iso: dvd pkg-stage
 	sh ${.CURDIR}/${TARGET}/mkisoimages.sh -b ${VOLUME_LABEL}_DVD ${.TARGET} dvd ${XTRADIR}
 
 bootonly.iso: bootonly
 	sh ${.CURDIR}/${TARGET}/mkisoimages.sh -b ${VOLUME_LABEL}_BO ${.TARGET} bootonly ${XTRADIR}
 
 memstick: memstick.img
 memstick.img: disc1
 	sh ${.CURDIR}/${TARGET}/make-memstick.sh disc1 ${.TARGET}
 
 mini-memstick: mini-memstick.img
 mini-memstick.img: bootonly
 	sh ${.CURDIR}/${TARGET}/make-memstick.sh bootonly ${.TARGET}
 
 packagesystem: base.txz kernel.txz ${EXTRA_PACKAGES}
 	sh ${.CURDIR}/scripts/make-manifest.sh *.txz > MANIFEST
 	touch ${.TARGET}
 
 pkg-stage:
 .if !defined(NOPKG)
 	env REPOS_DIR=${.CURDIR}/pkg_repos/ \
 		sh ${.CURDIR}/scripts/pkg-stage.sh
 	mkdir -p ${.OBJDIR}/dvd/packages/repos/
 	cp ${.CURDIR}/scripts/FreeBSD_install_cdrom.conf \
 		${.OBJDIR}/dvd/packages/repos/
 .endif
 	touch ${.TARGET}
 
 cdrom: disc1.iso bootonly.iso
 dvdrom: dvd1.iso
 ftp: packagesystem
 	rm -rf ftp
 	mkdir -p ftp
 	cp *.txz MANIFEST ftp
 
 release:	real-release vm-release cloudware-release
-	touch ${.OBJDIR}/${.TARGET}
+	${MAKE} -C ${.CURDIR} ${.MAKEFLAGS} release-done
+	true
+
+release-done:
+	touch release
 
 real-release:
 	${MAKE} -C ${.CURDIR} ${.MAKEFLAGS} obj
 	${MAKE} -C ${.CURDIR} ${.MAKEFLAGS} ${RELEASE_TARGETS}
 
 install:	release-install vm-install cloudware-install
 
 release-install:
 .if defined(DESTDIR) && !empty(DESTDIR)
 	mkdir -p ${DESTDIR}
 .endif
 	cp -a ftp ${DESTDIR}/
 .for I in ${IMAGES}
 	cp -p ${I} ${DESTDIR}/${OSRELEASE}-${I}
 . if defined(WITH_COMPRESSED_IMAGES) && !empty(WITH_COMPRESSED_IMAGES)
 	${XZ_CMD} -k ${DESTDIR}/${OSRELEASE}-${I}
 . endif
 .endfor
 	cd ${DESTDIR} && sha512 ${OSRELEASE}* > ${DESTDIR}/CHECKSUM.SHA512
 	cd ${DESTDIR} && sha256 ${OSRELEASE}* > ${DESTDIR}/CHECKSUM.SHA256
 
 .include "${.CURDIR}/Makefile.vm"
Index: projects/release-pkg/sbin/geom/class/eli/Makefile
===================================================================
--- projects/release-pkg/sbin/geom/class/eli/Makefile	(revision 293335)
+++ projects/release-pkg/sbin/geom/class/eli/Makefile	(revision 293336)
@@ -1,18 +1,19 @@
 # $FreeBSD$
 
 .PATH: ${.CURDIR}/../../misc ${.CURDIR}/../../../../sys/geom/eli ${.CURDIR}/../../../../sys/crypto/sha2
 
 GEOM_CLASS=	eli
 SRCS=	g_eli_crypto.c
+SRCS+=	g_eli_hmac.c
 SRCS+=	g_eli_key.c
 SRCS+=	pkcs5v2.c
 SRCS+=	sha256c.c
 SRCS+=	sha512c.c
 
 LIBADD=	md crypto
 
 WARNS?=	3
 
 CFLAGS+=-I${.CURDIR}/../../../../sys
 
 .include <bsd.lib.mk>
Index: projects/release-pkg/sbin
===================================================================
--- projects/release-pkg/sbin	(revision 293335)
+++ projects/release-pkg/sbin	(revision 293336)

Property changes on: projects/release-pkg/sbin
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sbin:r289119,289158,289371,289383-289384,293171-293335
Index: projects/release-pkg/share/man/man7/ascii.7
===================================================================
--- projects/release-pkg/share/man/man7/ascii.7	(revision 293335)
+++ projects/release-pkg/share/man/man7/ascii.7	(revision 293336)
@@ -1,113 +1,113 @@
 .\" Copyright (c) 1989, 1990, 1993
 .\"	The Regents of the University of California.  All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\" 3. Neither the name of the University nor the names of its contributors
 .\"    may be used to endorse or promote products derived from this software
 .\"    without specific prior written permission.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\"	@(#)ascii.7	8.1 (Berkeley) 6/5/93
 .\" $FreeBSD$
 .\"
-.Dd June 5, 1993
+.Dd January 6, 2016
 .Dt ASCII 7
 .Os
 .Sh NAME
 .Nm ascii
 .Nd octal, hexadecimal and decimal
 .Tn ASCII
 character sets
 .Sh DESCRIPTION
 The
 .Nm octal
 set:
 .Bd -literal -offset left
 000 NUL  001 SOH  002 STX  003 ETX  004 EOT  005 ENQ  006 ACK  007 BEL
-010 BS   011 HT   012 NL   013 VT   014 NP   015 CR   016 SO   017 SI
+010 BS   011 HT   012 LF   013 VT   014 FF   015 CR   016 SO   017 SI
 020 DLE  021 DC1  022 DC2  023 DC3  024 DC4  025 NAK  026 SYN  027 ETB
 030 CAN  031 EM   032 SUB  033 ESC  034 FS   035 GS   036 RS   037 US
 040 SP   041  !   042  "   043  #   044  $   045  %   046  &   047  '
 050  (   051  )   052  *   053  +   054  ,   055  -   056  .   057  /
 060  0   061  1   062  2   063  3   064  4   065  5   066  6   067  7
 070  8   071  9   072  :   073  ;   074  <   075  =   076  >   077  ?
 100  @   101  A   102  B   103  C   104  D   105  E   106  F   107  G
 110  H   111  I   112  J   113  K   114  L   115  M   116  N   117  O
 120  P   121  Q   122  R   123  S   124  T   125  U   126  V   127  W
 130  X   131  Y   132  Z   133  [   134  \e\   135  ]   136  ^   137  _
 140  `   141  a   142  b   143  c   144  d   145  e   146  f   147  g
 150  h   151  i   152  j   153  k   154  l   155  m   156  n   157  o
 160  p   161  q   162  r   163  s   164  t   165  u   166  v   167  w
 170  x   171  y   172  z   173  {   174  |   175  }   176  ~   177 DEL
 .Ed
 .Pp
 The
 .Nm hexadecimal
 set:
 .Bd -literal -offset left
 00 NUL   01 SOH   02 STX   03 ETX   04 EOT   05 ENQ   06 ACK   07 BEL
-08 BS    09 HT    0A NL    0B VT    0C NP    0D CR    0E SO    0F SI
+08 BS    09 HT    0A LF    0B VT    0C FF    0D CR    0E SO    0F SI
 10 DLE   11 DC1   12 DC2   13 DC3   14 DC4   15 NAK   16 SYN   17 ETB
 18 CAN   19 EM    1A SUB   1B ESC   1C FS    1D GS    1E RS    1F US
 20 SP    21  !    22  "    23  #    24  $    25  %    26  &    27  '
 28  (    29  )    2a  *    2b  +    2c  ,    2d  -    2e  .    2f  /
 30  0    31  1    32  2    33  3    34  4    35  5    36  6    37  7
 38  8    39  9    3a  :    3b  ;    3c  <    3d  =    3e  >    3f  ?
 40  @    41  A    42  B    43  C    44  D    45  E    46  F    47  G
 48  H    49  I    4a  J    4b  K    4c  L    4d  M    4e  N    4f  O
 50  P    51  Q    52  R    53  S    54  T    55  U    56  V    57  W
 58  X    59  Y    5a  Z    5b  [    5c  \e\    5d  ]    5e  ^    5f  _
 60  \`    61  a    62  b    63  c    64  d    65  e    66  f    67  g
 68  h    69  i    6a  j    6b  k    6c  l    6d  m    6e  n    6f  o
 70  p    71  q    72  r    73  s    74  t    75  u    76  v    77  w
 78  x    79  y    7a  z    7b  {    7c  |    7d  }    7e  ~    7f DEL
 .Ed
 .Pp
 The
 .Nm decimal
 set:
 .Bd -literal -offset left
   0 NUL    1 SOH    2 STX    3 ETX    4 EOT    5 ENQ    6 ACK    7 BEL
-  8 BS     9 HT    10 NL    11 VT    12 NP    13 CR    14 SO    15 SI
+  8 BS     9 HT    10 LF    11 VT    12 FF    13 CR    14 SO    15 SI
  16 DLE   17 DC1   18 DC2   19 DC3   20 DC4   21 NAK   22 SYN   23 ETB
  24 CAN   25 EM    26 SUB   27 ESC   28 FS    29 GS    30 RS    31 US
  32 SP    33  !    34  "    35  #    36  $    37  %    38  &    39  '
  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /
  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7
  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?
  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G
  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O
  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W
  88  X    89  Y    90  Z    91  [    92  \e\    93  ]    94  ^    95  _
  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g
 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o
 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w
 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 DEL
 .Ed
 .Sh FILES
 .Bl -tag -width /usr/share/misc/ascii -compact
 .It Pa /usr/share/misc/ascii
 .El
 .Sh HISTORY
 An
 .Nm
 manual page appeared in
 .At v7 .
Index: projects/release-pkg/share/misc/ascii
===================================================================
--- projects/release-pkg/share/misc/ascii	(revision 293335)
+++ projects/release-pkg/share/misc/ascii	(revision 293336)
@@ -1,50 +1,50 @@
 |000 nul|001 soh|002 stx|003 etx|004 eot|005 enq|006 ack|007 bel|
-|010 bs |011 ht |012 nl |013 vt |014 np |015 cr |016 so |017 si |
+|010 bs |011 ht |012 lf |013 vt |014 ff |015 cr |016 so |017 si |
 |020 dle|021 dc1|022 dc2|023 dc3|024 dc4|025 nak|026 syn|027 etb|
 |030 can|031 em |032 sub|033 esc|034 fs |035 gs |036 rs |037 us |
 |040 sp |041  ! |042  " |043  # |044  $ |045  % |046  & |047  ' |
 |050  ( |051  ) |052  * |053  + |054  , |055  - |056  . |057  / |
 |060  0 |061  1 |062  2 |063  3 |064  4 |065  5 |066  6 |067  7 |
 |070  8 |071  9 |072  : |073  ; |074  < |075  = |076  > |077  ? |
 |100  @ |101  A |102  B |103  C |104  D |105  E |106  F |107  G |
 |110  H |111  I |112  J |113  K |114  L |115  M |116  N |117  O |
 |120  P |121  Q |122  R |123  S |124  T |125  U |126  V |127  W |
 |130  X |131  Y |132  Z |133  [ |134  \ |135  ] |136  ^ |137  _ |
 |140  ` |141  a |142  b |143  c |144  d |145  e |146  f |147  g |
 |150  h |151  i |152  j |153  k |154  l |155  m |156  n |157  o |
 |160  p |161  q |162  r |163  s |164  t |165  u |166  v |167  w |
 |170  x |171  y |172  z |173  { |174  | |175  } |176  ~ |177 del|
 
 | 00 nul| 01 soh| 02 stx| 03 etx| 04 eot| 05 enq| 06 ack| 07 bel|
-| 08 bs | 09 ht | 0a nl | 0b vt | 0c np | 0d cr | 0e so | 0f si |
+| 08 bs | 09 ht | 0a lf | 0b vt | 0c ff | 0d cr | 0e so | 0f si |
 | 10 dle| 11 dc1| 12 dc2| 13 dc3| 14 dc4| 15 nak| 16 syn| 17 etb|
 | 18 can| 19 em | 1a sub| 1b esc| 1c fs | 1d gs | 1e rs | 1f us |
 | 20 sp | 21  ! | 22  " | 23  # | 24  $ | 25  % | 26  & | 27  ' |
 | 28  ( | 29  ) | 2a  * | 2b  + | 2c  , | 2d  - | 2e  . | 2f  / |
 | 30  0 | 31  1 | 32  2 | 33  3 | 34  4 | 35  5 | 36  6 | 37  7 |
 | 38  8 | 39  9 | 3a  : | 3b  ; | 3c  < | 3d  = | 3e  > | 3f  ? |
 | 40  @ | 41  A | 42  B | 43  C | 44  D | 45  E | 46  F | 47  G |
 | 48  H | 49  I | 4a  J | 4b  K | 4c  L | 4d  M | 4e  N | 4f  O |
 | 50  P | 51  Q | 52  R | 53  S | 54  T | 55  U | 56  V | 57  W |
 | 58  X | 59  Y | 5a  Z | 5b  [ | 5c  \ | 5d  ] | 5e  ^ | 5f  _ |
 | 60  ` | 61  a | 62  b | 63  c | 64  d | 65  e | 66  f | 67  g |
 | 68  h | 69  i | 6a  j | 6b  k | 6c  l | 6d  m | 6e  n | 6f  o |
 | 70  p | 71  q | 72  r | 73  s | 74  t | 75  u | 76  v | 77  w |
 | 78  x | 79  y | 7a  z | 7b  { | 7c  | | 7d  } | 7e  ~ | 7f del|
 
 |  0 nul|  1 soh|  2 stx|  3 etx|  4 eot|  5 enq|  6 ack|  7 bel|
-|  8 bs |  9 ht | 10 nl | 11 vt | 12 np | 13 cr | 14 so | 15 si |
+|  8 bs |  9 ht | 10 lf | 11 vt | 12 ff | 13 cr | 14 so | 15 si |
 | 16 dle| 17 dc1| 18 dc2| 19 dc3| 20 dc4| 21 nak| 22 syn| 23 etb|
 | 24 can| 25 em | 26 sub| 27 esc| 28 fs | 29 gs | 30 rs | 31 us |
 | 32 sp | 33  ! | 34  " | 35  # | 36  $ | 37  % | 38  & | 39  ' |
 | 40  ( | 41  ) | 42  * | 43  + | 44  , | 45  - | 46  . | 47  / |
 | 48  0 | 49  1 | 50  2 | 51  3 | 52  4 | 53  5 | 54  6 | 55  7 |
 | 56  8 | 57  9 | 58  : | 59  ; | 60  < | 61  = | 62  > | 63  ? |
 | 64  @ | 65  A | 66  B | 67  C | 68  D | 69  E | 70  F | 71  G |
 | 72  H | 73  I | 74  J | 75  K | 76  L | 77  M | 78  N | 79  O |
 | 80  P | 81  Q | 82  R | 83  S | 84  T | 85  U | 86  V | 87  W |
 | 88  X | 89  Y | 90  Z | 91  [ | 92  \ | 93  ] | 94  ^ | 95  _ |
 | 96  ` | 97  a | 98  b | 99  c |100  d |101  e |102  f |103  g |
 |104  h |105  i |106  j |107  k |108  l |109  m |110  n |111  o |
 |112  p |113  q |114  r |115  s |116  t |117  u |118  v |119  w |
 |120  x |121  y |122  z |123  { |124  | |125  } |126  ~ |127 del|

Property changes on: projects/release-pkg/share/misc/ascii
___________________________________________________________________
Added: fbsd:nokeywords
## -0,0 +1 ##
+yes
\ No newline at end of property
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: projects/release-pkg/share
===================================================================
--- projects/release-pkg/share	(revision 293335)
+++ projects/release-pkg/share	(revision 293336)

Property changes on: projects/release-pkg/share
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/share:r293225-293335
Index: projects/release-pkg/sys/arm/arm/db_interface.c
===================================================================
--- projects/release-pkg/sys/arm/arm/db_interface.c	(revision 293335)
+++ projects/release-pkg/sys/arm/arm/db_interface.c	(revision 293336)
@@ -1,326 +1,330 @@
 /*	$NetBSD: db_interface.c,v 1.33 2003/08/25 04:51:10 mrg Exp $	*/
 
 /*-
  * Copyright (c) 1996 Scott K. Stevens
  *
  * Mach Operating System
  * Copyright (c) 1991,1990 Carnegie Mellon University
  * All Rights Reserved.
  *
  * Permission to use, copy, modify and distribute this software and its
  * documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  *
  *	From: db_interface.c,v 2.4 1991/02/05 17:11:13 mrt (CMU)
  */
 
 /*
  * Interface to new debugger.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/reboot.h>
 #include <sys/systm.h>	/* just for boothowto */
 #include <sys/exec.h>
 #ifdef KDB
 #include <sys/kdb.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
 
 #include <machine/db_machdep.h>
 #include <machine/machdep.h>
 #include <machine/vmparam.h>
 #include <machine/cpu.h>
 
 #include <ddb/ddb.h>
 #include <ddb/db_access.h>
 #include <ddb/db_command.h>
 #include <ddb/db_output.h>
 #include <ddb/db_variables.h>
 #include <ddb/db_sym.h>
 #include <sys/cons.h>
 
 static int nil = 0;
 
 int db_access_und_sp (struct db_variable *, db_expr_t *, int);
 int db_access_abt_sp (struct db_variable *, db_expr_t *, int);
 int db_access_irq_sp (struct db_variable *, db_expr_t *, int);
 
 static db_varfcn_t db_frame;
 
 #define DB_OFFSET(x)	(db_expr_t *)offsetof(struct trapframe, x)
 struct db_variable db_regs[] = {
 	{ "spsr", DB_OFFSET(tf_spsr),	db_frame },
 	{ "r0", DB_OFFSET(tf_r0),	db_frame },
 	{ "r1", DB_OFFSET(tf_r1),	db_frame },
 	{ "r2", DB_OFFSET(tf_r2),	db_frame },
 	{ "r3", DB_OFFSET(tf_r3),	db_frame },
 	{ "r4", DB_OFFSET(tf_r4),	db_frame },
 	{ "r5", DB_OFFSET(tf_r5),	db_frame },
 	{ "r6", DB_OFFSET(tf_r6),	db_frame },
 	{ "r7", DB_OFFSET(tf_r7),	db_frame },
 	{ "r8", DB_OFFSET(tf_r8),	db_frame },
 	{ "r9", DB_OFFSET(tf_r9),	db_frame },
 	{ "r10", DB_OFFSET(tf_r10),	db_frame },
 	{ "r11", DB_OFFSET(tf_r11),	db_frame },
 	{ "r12", DB_OFFSET(tf_r12),	db_frame },
 	{ "usr_sp", DB_OFFSET(tf_usr_sp), db_frame },
 	{ "usr_lr", DB_OFFSET(tf_usr_lr), db_frame },
 	{ "svc_sp", DB_OFFSET(tf_svc_sp), db_frame },
 	{ "svc_lr", DB_OFFSET(tf_svc_lr), db_frame },
 	{ "pc", DB_OFFSET(tf_pc), 	db_frame },
 	{ "und_sp", &nil, db_access_und_sp, },
 	{ "abt_sp", &nil, db_access_abt_sp, },
 	{ "irq_sp", &nil, db_access_irq_sp, },
 };
 
 struct db_variable *db_eregs = db_regs + sizeof(db_regs)/sizeof(db_regs[0]);
 
 int
 db_access_und_sp(struct db_variable *vp, db_expr_t *valp, int rw)
 {
 
 	if (rw == DB_VAR_GET) {
 		*valp = get_stackptr(PSR_UND32_MODE);
 		return (1);
 	}
 	return (0);
 }
 
 int
 db_access_abt_sp(struct db_variable *vp, db_expr_t *valp, int rw)
 {
 
 	if (rw == DB_VAR_GET) {
 		*valp = get_stackptr(PSR_ABT32_MODE);
 		return (1);
 	}
 	return (0);
 }
 
 int
 db_access_irq_sp(struct db_variable *vp, db_expr_t *valp, int rw)
 {
 
 	if (rw == DB_VAR_GET) {
 		*valp = get_stackptr(PSR_IRQ32_MODE);
 		return (1);
 	}
 	return (0);
 }
 
 int db_frame(struct db_variable *vp, db_expr_t *valp, int rw)
 {
 	int *reg;
 
 	if (kdb_frame == NULL)
 		return (0);
 
 	reg = (int *)((uintptr_t)kdb_frame + (db_expr_t)vp->valuep);
 	if (rw == DB_VAR_GET)
 		*valp = *reg;
 	else
 		*reg = *valp;
 	return (1);
 }
 
 void
 db_show_mdpcpu(struct pcpu *pc)
 {
+
+#if __ARM_ARCH >= 6
+	db_printf("curpmap      = %p\n", pc->pc_curpmap);
+#endif
 }
 int
 db_validate_address(vm_offset_t addr)
 {
 	struct proc *p = curproc;
 	struct pmap *pmap;
 
 	if (!p || !p->p_vmspace || !p->p_vmspace->vm_map.pmap ||
 #ifndef ARM32_NEW_VM_LAYOUT
 	    addr >= VM_MAXUSER_ADDRESS
 #else
 	    addr >= VM_MIN_KERNEL_ADDRESS
 #endif
 	   )
 		pmap = pmap_kernel();
 	else
 		pmap = p->p_vmspace->vm_map.pmap;
 
 	return (pmap_extract(pmap, addr) == FALSE);
 }
 
 /*
  * Read bytes from kernel address space for debugger.
  */
 int
 db_read_bytes(addr, size, data)
 	vm_offset_t	addr;
 	size_t	size;
 	char	*data;
 {
 	char	*src = (char *)addr;
 
 	if (db_validate_address((u_int)src)) {
 		db_printf("address %p is invalid\n", src);
 		return (-1);
 	}
 
 	if (size == 4 && (addr & 3) == 0 && ((uintptr_t)data & 3) == 0) {
 		*((int*)data) = *((int*)src);
 		return (0);
 	}
 
 	if (size == 2 && (addr & 1) == 0 && ((uintptr_t)data & 1) == 0) {
 		*((short*)data) = *((short*)src);
 		return (0);
 	}
 
 	while (size-- > 0) {
 		if (db_validate_address((u_int)src)) {
 			db_printf("address %p is invalid\n", src);
 			return (-1);
 		}
 		*data++ = *src++;
 	}
 	return (0);
 }
 
 /*
  * Write bytes to kernel address space for debugger.
  */
 int
 db_write_bytes(vm_offset_t addr, size_t size, char *data)
 {
 	char *dst;
 	size_t loop;
 
 	dst = (char *)addr;
 	if (db_validate_address((u_int)dst)) {
 		db_printf("address %p is invalid\n", dst);
 		return (0);
 	}
 
 	if (size == 4 && (addr & 3) == 0 && ((uintptr_t)data & 3) == 0)
 		*((int*)dst) = *((int*)data);
 	else
 	if (size == 2 && (addr & 1) == 0 && ((uintptr_t)data & 1) == 0)
 		*((short*)dst) = *((short*)data);
 	else {
 		loop = size;
 		while (loop-- > 0) {
 			if (db_validate_address((u_int)dst)) {
 				db_printf("address %p is invalid\n", dst);
 				return (-1);
 			}
 			*dst++ = *data++;
 		}
 	}
 
 	/* make sure the caches and memory are in sync */
 	cpu_icache_sync_range(addr, size);
 
 	/* In case the current page tables have been modified ... */
 	cpu_tlb_flushID();
 	cpu_cpwait();
 	return (0);
 }
 
 
 static u_int
 db_fetch_reg(int reg)
 {
 
 	switch (reg) {
 	case 0:
 		return (kdb_frame->tf_r0);
 	case 1:
 		return (kdb_frame->tf_r1);
 	case 2:
 		return (kdb_frame->tf_r2);
 	case 3:
 		return (kdb_frame->tf_r3);
 	case 4:
 		return (kdb_frame->tf_r4);
 	case 5:
 		return (kdb_frame->tf_r5);
 	case 6:
 		return (kdb_frame->tf_r6);
 	case 7:
 		return (kdb_frame->tf_r7);
 	case 8:
 		return (kdb_frame->tf_r8);
 	case 9:
 		return (kdb_frame->tf_r9);
 	case 10:
 		return (kdb_frame->tf_r10);
 	case 11:
 		return (kdb_frame->tf_r11);
 	case 12:
 		return (kdb_frame->tf_r12);
 	case 13:
 		return (kdb_frame->tf_svc_sp);
 	case 14:
 		return (kdb_frame->tf_svc_lr);
 	case 15:
 		return (kdb_frame->tf_pc);
 	default:
 		panic("db_fetch_reg: botch");
 	}
 }
 
 static u_int
 db_branch_taken_read_int(void *cookie __unused, vm_offset_t offset, u_int *val)
 {
 	u_int ret;
 
 	db_read_bytes(offset, 4, (char *)&ret);
 	*val = ret;
 
 	return (0);
 }
 
 static u_int
 db_branch_taken_fetch_reg(void *cookie __unused, int reg)
 {
 
 	return (db_fetch_reg(reg));
 }
 
 u_int
 branch_taken(u_int insn, db_addr_t pc)
 {
 	register_t new_pc;
 	int ret;
 
 	ret = arm_predict_branch(NULL, insn, (register_t)pc, &new_pc,
 	    db_branch_taken_fetch_reg, db_branch_taken_read_int);
 
 	if (ret != 0)
 		kdb_reenter();
 
 	return (new_pc);
 }
Index: projects/release-pkg/sys/boot/efi/boot1/boot1.c
===================================================================
--- projects/release-pkg/sys/boot/efi/boot1/boot1.c	(revision 293335)
+++ projects/release-pkg/sys/boot/efi/boot1/boot1.c	(revision 293336)
@@ -1,576 +1,575 @@
 /*-
  * Copyright (c) 1998 Robert Nordier
  * All rights reserved.
  * Copyright (c) 2001 Robert Drehmel
  * All rights reserved.
  * Copyright (c) 2014 Nathan Whitehorn
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms are freely
  * permitted provided that the above copyright notice and this
  * paragraph and the following disclaimer are duplicated in all
  * such forms.
  *
  * This software is provided "AS IS" and without any express or
  * implied warranties, including, without limitation, the implied
  * warranties of merchantability and fitness for a particular
  * purpose.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/dirent.h>
 #include <machine/elf.h>
 #include <machine/stdarg.h>
 
 #include <efi.h>
 #include <eficonsctl.h>
 
 #define _PATH_LOADER	"/boot/loader.efi"
 #define _PATH_KERNEL	"/boot/kernel/kernel"
 
 #define BSIZEMAX	16384
 
 typedef int putc_func_t(char c, void *arg);
 
 struct sp_data {
 	char	*sp_buf;
 	u_int	sp_len;
 	u_int	sp_size;
 };
 
 static const char digits[] = "0123456789abcdef";
 
 static void panic(const char *fmt, ...) __dead2;
 static int printf(const char *fmt, ...);
 static int putchar(char c, void *arg);
 static int vprintf(const char *fmt, va_list ap);
 static int vsnprintf(char *str, size_t sz, const char *fmt, va_list ap);
 
 static int __printf(const char *fmt, putc_func_t *putc, void *arg, va_list ap);
 static int __putc(char c, void *arg);
 static int __puts(const char *s, putc_func_t *putc, void *arg);
 static int __sputc(char c, void *arg);
 static char *__uitoa(char *buf, u_int val, int base);
 static char *__ultoa(char *buf, u_long val, int base);
 
 static int domount(EFI_DEVICE_PATH *device, EFI_BLOCK_IO *blkio, int quiet);
 static void load(const char *fname);
 
 static EFI_SYSTEM_TABLE *systab;
 static EFI_HANDLE *image;
 
 static void
 bcopy(const void *src, void *dst, size_t len)
 {
 	const char *s = src;
 	char *d = dst;
 
 	while (len-- != 0)
 		*d++ = *s++;
 }
 
 static void
 memcpy(void *dst, const void *src, size_t len)
 {
 	bcopy(src, dst, len);
 }
 
 static void
 bzero(void *b, size_t len)
 {
 	char *p = b;
 
 	while (len-- != 0)
 		*p++ = 0;
 }
 
 static int
 strcmp(const char *s1, const char *s2)
 {
 	for (; *s1 == *s2 && *s1; s1++, s2++)
 		;
 	return ((u_char)*s1 - (u_char)*s2);
 }
 
 static EFI_GUID BlockIoProtocolGUID = BLOCK_IO_PROTOCOL;
 static EFI_GUID DevicePathGUID = DEVICE_PATH_PROTOCOL;
 static EFI_GUID LoadedImageGUID = LOADED_IMAGE_PROTOCOL;
 static EFI_GUID ConsoleControlGUID = EFI_CONSOLE_CONTROL_PROTOCOL_GUID;
 
 static EFI_BLOCK_IO *bootdev;
 static EFI_DEVICE_PATH *bootdevpath;
 static EFI_HANDLE *bootdevhandle;
 
 EFI_STATUS efi_main(EFI_HANDLE Ximage, EFI_SYSTEM_TABLE* Xsystab)
 {
 	EFI_HANDLE handles[128];
 	EFI_BLOCK_IO *blkio;
 	UINTN i, nparts = sizeof(handles), cols, rows, max_dim, best_mode;
 	EFI_STATUS status;
 	EFI_DEVICE_PATH *devpath;
 	EFI_BOOT_SERVICES *BS;
 	EFI_CONSOLE_CONTROL_PROTOCOL *ConsoleControl = NULL;
 	SIMPLE_TEXT_OUTPUT_INTERFACE *conout = NULL;
 	char *path = _PATH_LOADER;
 
 	systab = Xsystab;
 	image = Ximage;
 
 	BS = systab->BootServices;
 	status = BS->LocateProtocol(&ConsoleControlGUID, NULL,
 	    (VOID **)&ConsoleControl);
 	if (status == EFI_SUCCESS)
 		(void)ConsoleControl->SetMode(ConsoleControl,
 		    EfiConsoleControlScreenText);
 	/*
 	 * Reset the console and find the best text mode.
 	 */
 	conout = systab->ConOut;
 	conout->Reset(conout, TRUE);
 	max_dim = best_mode = 0;
 	for (i = 0; ; i++) {
-		status = conout->QueryMode(conout, i,
-		    &cols, &rows);
+		status = conout->QueryMode(conout, i, &cols, &rows);
 		if (EFI_ERROR(status))
 			break;
 		if (cols * rows > max_dim) {
 			max_dim = cols * rows;
 			best_mode = i;
 		}
 	}
 	if (max_dim > 0)
 		conout->SetMode(conout, best_mode);
 	conout->EnableCursor(conout, TRUE);
 	conout->ClearScreen(conout);
 
 	printf("\n"
 	       ">> FreeBSD EFI boot block\n");
 	printf("   Loader path: %s\n", path);
 
 	status = systab->BootServices->LocateHandle(ByProtocol,
 	    &BlockIoProtocolGUID, NULL, &nparts, handles);
 	nparts /= sizeof(handles[0]);
 
 	for (i = 0; i < nparts; i++) {
 		status = systab->BootServices->HandleProtocol(handles[i],
 		    &DevicePathGUID, (void **)&devpath);
 		if (EFI_ERROR(status))
 			continue;
 
 		while (!IsDevicePathEnd(NextDevicePathNode(devpath)))
 			devpath = NextDevicePathNode(devpath);
 
 		status = systab->BootServices->HandleProtocol(handles[i],
 		    &BlockIoProtocolGUID, (void **)&blkio);
 		if (EFI_ERROR(status))
 			continue;
 
 		if (!blkio->Media->LogicalPartition)
 			continue;
 
 		if (domount(devpath, blkio, 1) >= 0)
 			break;
 	}
 
 	if (i == nparts)
 		panic("No bootable partition found");
 
 	bootdevhandle = handles[i];
 	load(path);
 
 	panic("Load failed");
 
 	return EFI_SUCCESS;
 }
 
 static int
 dskread(void *buf, u_int64_t lba, int nblk)
 {
 	EFI_STATUS status;
 	int size;
 
 	lba = lba / (bootdev->Media->BlockSize / DEV_BSIZE);
 	size = nblk * DEV_BSIZE;
 	status = bootdev->ReadBlocks(bootdev, bootdev->Media->MediaId, lba,
 	    size, buf);
 
 	if (EFI_ERROR(status))
 		return (-1);
 
 	return (0);
 }
 
 #include "ufsread.c"
 
 static ssize_t
 fsstat(ufs_ino_t inode)
 {
 #ifndef UFS2_ONLY
 	static struct ufs1_dinode dp1;
 	ufs1_daddr_t addr1;
 #endif
 #ifndef UFS1_ONLY
 	static struct ufs2_dinode dp2;
 #endif
 	static struct fs fs;
 	static ufs_ino_t inomap;
 	char *blkbuf;
 	void *indbuf;
 	size_t n, nb, size, off, vboff;
 	ufs_lbn_t lbn;
 	ufs2_daddr_t addr2, vbaddr;
 	static ufs2_daddr_t blkmap, indmap;
 	u_int u;
 
 	blkbuf = dmadat->blkbuf;
 	indbuf = dmadat->indbuf;
 	if (!dsk_meta) {
 		inomap = 0;
 		for (n = 0; sblock_try[n] != -1; n++) {
 			if (dskread(dmadat->sbbuf, sblock_try[n] / DEV_BSIZE,
 			    SBLOCKSIZE / DEV_BSIZE))
 				return -1;
 			memcpy(&fs, dmadat->sbbuf, sizeof(struct fs));
 			if ((
 #if defined(UFS1_ONLY)
 			    fs.fs_magic == FS_UFS1_MAGIC
 #elif defined(UFS2_ONLY)
 			    (fs.fs_magic == FS_UFS2_MAGIC &&
 			    fs.fs_sblockloc == sblock_try[n])
 #else
 			    fs.fs_magic == FS_UFS1_MAGIC ||
 			    (fs.fs_magic == FS_UFS2_MAGIC &&
 			    fs.fs_sblockloc == sblock_try[n])
 #endif
 			    ) &&
 			    fs.fs_bsize <= MAXBSIZE &&
 			    fs.fs_bsize >= sizeof(struct fs))
 				break;
 		}
 		if (sblock_try[n] == -1) {
 			printf("Not ufs\n");
 			return -1;
 		}
 		dsk_meta++;
 	} else
 		memcpy(&fs, dmadat->sbbuf, sizeof(struct fs));
 	if (!inode)
 		return 0;
 	if (inomap != inode) {
 		n = IPERVBLK(&fs);
 		if (dskread(blkbuf, INO_TO_VBA(&fs, n, inode), DBPERVBLK))
 			return -1;
 		n = INO_TO_VBO(n, inode);
 #if defined(UFS1_ONLY)
 		memcpy(&dp1, (struct ufs1_dinode *)blkbuf + n,
 		    sizeof(struct ufs1_dinode));
 #elif defined(UFS2_ONLY)
 		memcpy(&dp2, (struct ufs2_dinode *)blkbuf + n,
 		    sizeof(struct ufs2_dinode));
 #else
 		if (fs.fs_magic == FS_UFS1_MAGIC)
 			memcpy(&dp1, (struct ufs1_dinode *)blkbuf + n,
 			    sizeof(struct ufs1_dinode));
 		else
 			memcpy(&dp2, (struct ufs2_dinode *)blkbuf + n,
 			    sizeof(struct ufs2_dinode));
 #endif
 		inomap = inode;
 		fs_off = 0;
 		blkmap = indmap = 0;
 	}
 	size = DIP(di_size);
 	n = size - fs_off;
 	return (n);
 }
 
 static struct dmadat __dmadat;
 
 static int
 domount(EFI_DEVICE_PATH *device, EFI_BLOCK_IO *blkio, int quiet)
 {
 
 	dmadat = &__dmadat;
 	bootdev = blkio;
 	bootdevpath = device;
 	if (fsread(0, NULL, 0)) {
 		if (!quiet)
 			printf("domount: can't read superblock\n");
 		return (-1);
 	}
 	if (!quiet)
 		printf("Succesfully mounted UFS filesystem\n");
 	return (0);
 }
 
 static void
 load(const char *fname)
 {
 	ufs_ino_t ino;
 	EFI_STATUS status;
 	EFI_HANDLE loaderhandle;
 	EFI_LOADED_IMAGE *loaded_image;
 	void *buffer;
 	size_t bufsize;
 
 	if ((ino = lookup(fname)) == 0) {
 		printf("File %s not found\n", fname);
 		return;
 	}
 
 	bufsize = fsstat(ino);
 	status = systab->BootServices->AllocatePool(EfiLoaderData,
 	    bufsize, &buffer);
 	fsread(ino, buffer, bufsize);
 
 	/* XXX: For secure boot, we need our own loader here */
 	status = systab->BootServices->LoadImage(TRUE, image, bootdevpath,
 	    buffer, bufsize, &loaderhandle);
 	if (EFI_ERROR(status))
 		printf("LoadImage failed with error %lu\n",
-		    status & ~EFI_ERROR_MASK);
+		    EFI_ERROR_CODE(status));
 
 	status = systab->BootServices->HandleProtocol(loaderhandle,
 	    &LoadedImageGUID, (VOID**)&loaded_image);
 	if (EFI_ERROR(status))
 		printf("HandleProtocol failed with error %lu\n",
-		    status & ~EFI_ERROR_MASK);
+		    EFI_ERROR_CODE(status));
 
 	loaded_image->DeviceHandle = bootdevhandle;
 
 	status = systab->BootServices->StartImage(loaderhandle, NULL, NULL);
 	if (EFI_ERROR(status))
 		printf("StartImage failed with error %lu\n",
-		    status & ~EFI_ERROR_MASK);
+		    EFI_ERROR_CODE(status));
 }
 
 static void
 panic(const char *fmt, ...)
 {
 	char buf[128];
 	va_list ap;
 
 	va_start(ap, fmt);
 	vsnprintf(buf, sizeof buf, fmt, ap);
 	printf("panic: %s\n", buf);
 	va_end(ap);
 
 	while (1) {}
 }
 
 static int
 printf(const char *fmt, ...)
 {
 	va_list ap;
 	int ret;
 
 	/* Don't annoy the user as we probe for partitions */
 	if (strcmp(fmt,"Not ufs\n") == 0)
 		return 0;
 
 	va_start(ap, fmt);
 	ret = vprintf(fmt, ap);
 	va_end(ap);
 	return (ret);
 }
 
 static int
 putchar(char c, void *arg)
 {
 	CHAR16 buf[2];
 
 	if (c == '\n') {
 		buf[0] = '\r';
 		buf[1] = 0;
 		systab->ConOut->OutputString(systab->ConOut, buf);
 	}
 	buf[0] = c;
 	buf[1] = 0;
 	systab->ConOut->OutputString(systab->ConOut, buf);
 	return (1);
 }
 
 static int
 vprintf(const char *fmt, va_list ap)
 {
 	int ret;
 
 	ret = __printf(fmt, putchar, 0, ap);
 	return (ret);
 }
 
 static int
 vsnprintf(char *str, size_t sz, const char *fmt, va_list ap)
 {
 	struct sp_data sp;
 	int ret;
 
 	sp.sp_buf = str;
 	sp.sp_len = 0;
 	sp.sp_size = sz;
 	ret = __printf(fmt, __sputc, &sp, ap);
 	return (ret);
 }
 
 static int
 __printf(const char *fmt, putc_func_t *putc, void *arg, va_list ap)
 {
 	char buf[(sizeof(long) * 8) + 1];
 	char *nbuf;
 	u_long ul;
 	u_int ui;
 	int lflag;
 	int sflag;
 	char *s;
 	int pad;
 	int ret;
 	int c;
 
 	nbuf = &buf[sizeof buf - 1];
 	ret = 0;
 	while ((c = *fmt++) != 0) {
 		if (c != '%') {
 			ret += putc(c, arg);
 			continue;
 		}
 		lflag = 0;
 		sflag = 0;
 		pad = 0;
 reswitch:	c = *fmt++;
 		switch (c) {
 		case '#':
 			sflag = 1;
 			goto reswitch;
 		case '%':
 			ret += putc('%', arg);
 			break;
 		case 'c':
 			c = va_arg(ap, int);
 			ret += putc(c, arg);
 			break;
 		case 'd':
 			if (lflag == 0) {
 				ui = (u_int)va_arg(ap, int);
 				if (ui < (int)ui) {
 					ui = -ui;
 					ret += putc('-', arg);
 				}
 				s = __uitoa(nbuf, ui, 10);
 			} else {
 				ul = (u_long)va_arg(ap, long);
 				if (ul < (long)ul) {
 					ul = -ul;
 					ret += putc('-', arg);
 				}
 				s = __ultoa(nbuf, ul, 10);
 			}
 			ret += __puts(s, putc, arg);
 			break;
 		case 'l':
 			lflag = 1;
 			goto reswitch;
 		case 'o':
 			if (lflag == 0) {
 				ui = (u_int)va_arg(ap, u_int);
 				s = __uitoa(nbuf, ui, 8);
 			} else {
 				ul = (u_long)va_arg(ap, u_long);
 				s = __ultoa(nbuf, ul, 8);
 			}
 			ret += __puts(s, putc, arg);
 			break;
 		case 'p':
 			ul = (u_long)va_arg(ap, void *);
 			s = __ultoa(nbuf, ul, 16);
 			ret += __puts("0x", putc, arg);
 			ret += __puts(s, putc, arg);
 			break;
 		case 's':
 			s = va_arg(ap, char *);
 			ret += __puts(s, putc, arg);
 			break;
 		case 'u':
 			if (lflag == 0) {
 				ui = va_arg(ap, u_int);
 				s = __uitoa(nbuf, ui, 10);
 			} else {
 				ul = va_arg(ap, u_long);
 				s = __ultoa(nbuf, ul, 10);
 			}
 			ret += __puts(s, putc, arg);
 			break;
 		case 'x':
 			if (lflag == 0) {
 				ui = va_arg(ap, u_int);
 				s = __uitoa(nbuf, ui, 16);
 			} else {
 				ul = va_arg(ap, u_long);
 				s = __ultoa(nbuf, ul, 16);
 			}
 			if (sflag)
 				ret += __puts("0x", putc, arg);
 			ret += __puts(s, putc, arg);
 			break;
 		case '0': case '1': case '2': case '3': case '4':
 		case '5': case '6': case '7': case '8': case '9':
 			pad = pad * 10 + c - '0';
 			goto reswitch;
 		default:
 			break;
 		}
 	}
 	return (ret);
 }
 
 static int
 __sputc(char c, void *arg)
 {
 	struct sp_data *sp;
 
 	sp = arg;
 	if (sp->sp_len < sp->sp_size)
 		sp->sp_buf[sp->sp_len++] = c;
 	sp->sp_buf[sp->sp_len] = '\0';
 	return (1);
 }
 
 static int
 __puts(const char *s, putc_func_t *putc, void *arg)
 {
 	const char *p;
 	int ret;
 
 	ret = 0;
 	for (p = s; *p != '\0'; p++)
 		ret += putc(*p, arg);
 	return (ret);
 }
 
 static char *
 __uitoa(char *buf, u_int ui, int base)
 {
 	char *p;
 
 	p = buf;
 	*p = '\0';
 	do
 		*--p = digits[ui % base];
 	while ((ui /= base) != 0);
 	return (p);
 }
 
 static char *
 __ultoa(char *buf, u_long ul, int base)
 {
 	char *p;
 
 	p = buf;
 	*p = '\0';
 	do
 		*--p = digits[ul % base];
 	while ((ul /= base) != 0);
 	return (p);
 }
Index: projects/release-pkg/sys/boot/efi/include/amd64/efibind.h
===================================================================
--- projects/release-pkg/sys/boot/efi/include/amd64/efibind.h	(revision 293335)
+++ projects/release-pkg/sys/boot/efi/include/amd64/efibind.h	(revision 293336)
@@ -1,271 +1,271 @@
 /* $FreeBSD$ */
 /*++
 
 Copyright (c)  1999 - 2003 Intel Corporation. All rights reserved
 This software and associated documentation (if any) is furnished
 under a license and may only be used or copied in accordance
 with the terms of the license. Except as permitted by such
 license, no part of this software or documentation may be
 reproduced, stored in a retrieval system, or transmitted in any
 form or by any means without the express written consent of
 Intel Corporation.
 
 Module Name:
 
     efefind.h
 
 Abstract:
 
     EFI to compile bindings
 
 
 
 
 Revision History
 
 --*/
 
 #pragma pack()
 
 
 #ifdef __FreeBSD__
 #include <sys/stdint.h>
 #else
 //
 // Basic int types of various widths
 //
 
 #if (__STDC_VERSION__ < 199901L )
 
     // No ANSI C 1999/2000 stdint.h integer width declarations 
 
-    #if _MSC_EXTENSIONS
+    #ifdef _MSC_EXTENSIONS
 
         // Use Microsoft C compiler integer width declarations 
 
         typedef unsigned __int64    uint64_t;
         typedef __int64             int64_t;
         typedef unsigned __int32    uint32_t;
         typedef __int32             int32_t;
         typedef unsigned short      uint16_t;
         typedef short               int16_t;
         typedef unsigned char       uint8_t;
         typedef char                int8_t;
     #else             
         #ifdef UNIX_LP64
 
             // Use LP64 programming model from C_FLAGS for integer width declarations 
 
             typedef unsigned long       uint64_t;
             typedef long                int64_t;
             typedef unsigned int        uint32_t;
             typedef int                 int32_t;
             typedef unsigned short      uint16_t;
             typedef short               int16_t;
             typedef unsigned char       uint8_t;
             typedef char                int8_t;
         #else
 
             // Assume P64 programming model from C_FLAGS for integer width declarations 
 
             typedef unsigned long long  uint64_t;
             typedef long long           int64_t;
             typedef unsigned int        uint32_t;
             typedef int                 int32_t;
             typedef unsigned short      uint16_t;
             typedef short               int16_t;
             typedef unsigned char       uint8_t;
             typedef char                int8_t;
         #endif
     #endif
 #endif
 #endif	/* __FreeBSD__ */
 
 //
 // Basic EFI types of various widths
 //
 
 #ifndef ACPI_THREAD_ID		/* ACPI's definitions are fine */
 #define ACPI_USE_SYSTEM_INTTYPES 1	/* Tell ACPI we've defined types */
 
 typedef uint64_t   UINT64;
 typedef int64_t    INT64;
 
 #ifndef _BASETSD_H_
     typedef uint32_t   UINT32;
     typedef int32_t    INT32;
 #endif
 
 typedef uint16_t   UINT16;
 typedef int16_t    INT16;
 typedef uint8_t    UINT8;
 typedef int8_t     INT8;
 
 #endif
 
 #undef VOID
 #define VOID    void
 
 
 typedef int64_t    INTN;
 typedef uint64_t   UINTN;
 
 #ifdef EFI_NT_EMULATOR
     #define POST_CODE(_Data)
 #else    
     #ifdef EFI_DEBUG
 #define POST_CODE(_Data)    __asm mov eax,(_Data) __asm out 0x80,al
     #else
         #define POST_CODE(_Data)
     #endif  
 #endif
 
 #define EFIERR(a)           (0x8000000000000000 | a)
 #define EFI_ERROR_MASK      0x8000000000000000
 #define EFIERR_OEM(a)       (0xc000000000000000 | a)      
 
 
 #define BAD_POINTER         0xFBFBFBFBFBFBFBFB
 #define MAX_ADDRESS         0xFFFFFFFFFFFFFFFF
 
 #define BREAKPOINT()        __asm { int 3 }
 
 //
 // Pointers must be aligned to these address to function
 //
 
 #define MIN_ALIGNMENT_SIZE  4
 
 #define ALIGN_VARIABLE(Value ,Adjustment) \
             (UINTN)Adjustment = 0; \
             if((UINTN)Value % MIN_ALIGNMENT_SIZE) \
                 (UINTN)Adjustment = MIN_ALIGNMENT_SIZE - ((UINTN)Value % MIN_ALIGNMENT_SIZE); \
             Value = (UINTN)Value + (UINTN)Adjustment
 
 
 //
 // Define macros to build data structure signatures from characters.
 //
 
 #define EFI_SIGNATURE_16(A,B)             ((A) | (B<<8))
 #define EFI_SIGNATURE_32(A,B,C,D)         (EFI_SIGNATURE_16(A,B)     | (EFI_SIGNATURE_16(C,D)     << 16))
 #define EFI_SIGNATURE_64(A,B,C,D,E,F,G,H) (EFI_SIGNATURE_32(A,B,C,D) | ((UINT64)(EFI_SIGNATURE_32(E,F,G,H)) << 32))
 
 //
 // EFIAPI - prototype calling convention for EFI function pointers
 // BOOTSERVICE - prototype for implementation of a boot service interface
 // RUNTIMESERVICE - prototype for implementation of a runtime service interface
 // RUNTIMEFUNCTION - prototype for implementation of a runtime function that is not a service
 // RUNTIME_CODE - pragma macro for declaring runtime code    
 //
 
 #ifdef	__amd64__
 #define	EFIAPI	__attribute__((ms_abi))
 #endif
 
 #ifndef EFIAPI                  // Forces EFI calling conventions reguardless of compiler options 
-    #if _MSC_EXTENSIONS
+    #ifdef _MSC_EXTENSIONS
         #define EFIAPI __cdecl  // Force C calling convention for Microsoft C compiler 
     #else
         #define EFIAPI          // Substitute expresion to force C calling convention 
     #endif
 #endif
 
 #define BOOTSERVICE
 //#define RUNTIMESERVICE(proto,a)    alloc_text("rtcode",a); proto a
 //#define RUNTIMEFUNCTION(proto,a)   alloc_text("rtcode",a); proto a
 #define RUNTIMESERVICE
 #define RUNTIMEFUNCTION
 
 
 #define RUNTIME_CODE(a)         alloc_text("rtcode", a)
 #define BEGIN_RUNTIME_DATA()    data_seg("rtdata")
 #define END_RUNTIME_DATA()      data_seg("")
 
 #define VOLATILE    volatile
 
 #define MEMORY_FENCE()    
 
 #ifdef EFI_NO_INTERFACE_DECL
   #define EFI_FORWARD_DECLARATION(x)
   #define EFI_INTERFACE_DECL(x)
 #else
   #define EFI_FORWARD_DECLARATION(x) typedef struct _##x x
   #define EFI_INTERFACE_DECL(x) typedef struct x
 #endif
 
 #ifdef EFI_NT_EMULATOR
 
 //
 // To help ensure proper coding of integrated drivers, they are
 // compiled as DLLs.  In NT they require a dll init entry pointer.
 // The macro puts a stub entry point into the DLL so it will load.
 //
 
 #define EFI_DRIVER_ENTRY_POINT(InitFunction)            \
     EFI_STATUS                                          \
     InitFunction (                                      \
       EFI_HANDLE  ImageHandle,                          \
       EFI_SYSTEM_TABLE  *SystemTable                    \
       );                                                \
                                                         \
     UINTN                                               \
     __stdcall                                           \
     _DllMainCRTStartup (                                \
         UINTN    Inst,                                  \
         UINTN    reason_for_call,                       \
         VOID    *rserved                                \
         )                                               \
     {                                                   \
         return 1;                                       \
     }                                                   \
                                                         \
     int                                                 \
     __declspec( dllexport )                             \
     __cdecl                                             \
     InitializeDriver (                                  \
         void *ImageHandle,                              \
         void *SystemTable                               \
         )                                               \
     {                                                   \
         return InitFunction(ImageHandle, SystemTable);  \
     }
 
 
     #define LOAD_INTERNAL_DRIVER(_if, type, name, entry)      \
         (_if)->LoadInternal(type, name, NULL)             
 
 #else // EFI_NT_EMULATOR 
 
 //
 // When build similiar to FW, then link everything together as
 // one big module.
 //
 
     #define EFI_DRIVER_ENTRY_POINT(InitFunction)
 
     #define LOAD_INTERNAL_DRIVER(_if, type, name, entry)    \
             (_if)->LoadInternal(type, name, entry)
 
 #endif // EFI_FW_NT 
 
 #ifdef __FreeBSD__
 #define INTERFACE_DECL(x) struct x
 #else
 //
 // Some compilers don't support the forward reference construct:
 //  typedef struct XXXXX
 //
 // The following macro provide a workaround for such cases.
 //
 #ifdef NO_INTERFACE_DECL
 #define INTERFACE_DECL(x)
 #else
 #define INTERFACE_DECL(x) typedef struct x
 #endif
 #endif	/* __FreeBSD__ */
 
-#if _MSC_EXTENSIONS
+#ifdef _MSC_EXTENSIONS
 #pragma warning ( disable : 4731 )  // Suppress warnings about modification of EBP
 #endif
 
Index: projects/release-pkg/sys/boot/efi/include/arm64/efibind.h
===================================================================
--- projects/release-pkg/sys/boot/efi/include/arm64/efibind.h	(revision 293335)
+++ projects/release-pkg/sys/boot/efi/include/arm64/efibind.h	(revision 293336)
@@ -1,219 +1,219 @@
 /* $FreeBSD$ */
 /*++
 
 Copyright (c)  1999 - 2003 Intel Corporation. All rights reserved
 This software and associated documentation (if any) is furnished
 under a license and may only be used or copied in accordance
 with the terms of the license. Except as permitted by such
 license, no part of this software or documentation may be
 reproduced, stored in a retrieval system, or transmitted in any
 form or by any means without the express written consent of
 Intel Corporation.
 
 Module Name:
 
     efefind.h
 
 Abstract:
 
     EFI to compile bindings
 
 
 
 
 Revision History
 
 --*/
 
 #pragma pack()
 
 
 #ifdef __FreeBSD__
 #include <sys/stdint.h>
 #else
 //
 // Basic int types of various widths
 //
 
 #if (__STDC_VERSION__ < 199901L )
 
     // No ANSI C 1999/2000 stdint.h integer width declarations 
 
-    #if _MSC_EXTENSIONS
+    #ifdef _MSC_EXTENSIONS
 
         // Use Microsoft C compiler integer width declarations 
 
         typedef unsigned __int64    uint64_t;
         typedef __int64             int64_t;
         typedef unsigned __int32    uint32_t;
         typedef __int32             int32_t;
         typedef unsigned __int16    uint16_t;
         typedef __int16             int16_t;
         typedef unsigned __int8     uint8_t;
         typedef __int8              int8_t;
     #else             
         #ifdef UNIX_LP64
 
             // Use LP64 programming model from C_FLAGS for integer width declarations 
 
             typedef unsigned long       uint64_t;
             typedef long                int64_t;
             typedef unsigned int        uint32_t;
             typedef int                 int32_t;
             typedef unsigned short      uint16_t;
             typedef short               int16_t;
             typedef unsigned char       uint8_t;
             typedef char                int8_t;
         #else
 
             // Assume P64 programming model from C_FLAGS for integer width declarations 
 
             typedef unsigned long long  uint64_t;
             typedef long long           int64_t;
             typedef unsigned int        uint32_t;
             typedef int                 int32_t;
             typedef unsigned short      uint16_t;
             typedef short               int16_t;
             typedef unsigned char       uint8_t;
             typedef char                int8_t;
         #endif
     #endif
 #endif
 #endif	/* __FreeBSD__ */
 
 //
 // Basic EFI types of various widths
 //
 
 
 typedef uint64_t   UINT64;
 typedef int64_t    INT64;
 typedef uint32_t   UINT32;
 typedef int32_t    INT32;
 typedef uint16_t   UINT16;
 typedef int16_t    INT16;
 typedef uint8_t    UINT8;
 typedef int8_t     INT8;
 
 
 #undef VOID
 #define VOID    void
 
 
 typedef int64_t    INTN;
 typedef uint64_t   UINTN;
 
 //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 // BugBug: Code to debug
 //
 #define BIT63   0x8000000000000000
 
 #define PLATFORM_IOBASE_ADDRESS   (0xffffc000000 | BIT63)                                               
 #define PORT_TO_MEMD(_Port) (PLATFORM_IOBASE_ADDRESS | ( ( ( (_Port) & 0xfffc) << 10 ) | ( (_Port) & 0x0fff) ) )
                                                                            
 //                                                                  
 // Macro's with casts make this much easier to use and read.
 //
 #define PORT_TO_MEM8D(_Port)  (*(UINT8  *)(PORT_TO_MEMD(_Port)))
 #define POST_CODE(_Data)  (PORT_TO_MEM8D(0x80) = (_Data))
 //
 // BugBug: End Debug Code!!!
 //+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
 #define EFIERR(a)           (0x8000000000000000 | a)
 #define EFI_ERROR_MASK      0x8000000000000000
 #define EFIERR_OEM(a)       (0xc000000000000000 | a)      
 
 #define BAD_POINTER         0xFBFBFBFBFBFBFBFB
 #define MAX_ADDRESS         0xFFFFFFFFFFFFFFFF
 
 #pragma intrinsic (__break)  
 #define BREAKPOINT()  __break(0)
 
 //
 // Pointers must be aligned to these address to function
 //  you will get an alignment fault if this value is less than 8
 //
 #define MIN_ALIGNMENT_SIZE  8
 
 #define ALIGN_VARIABLE(Value , Adjustment) \
             (UINTN) Adjustment = 0; \
             if((UINTN)Value % MIN_ALIGNMENT_SIZE) \
                 (UINTN)Adjustment = MIN_ALIGNMENT_SIZE - ((UINTN)Value % MIN_ALIGNMENT_SIZE); \
             Value = (UINTN)Value + (UINTN)Adjustment
 
 //
 // Define macros to create data structure signatures.
 //
 
 #define EFI_SIGNATURE_16(A,B)             ((A) | (B<<8))
 #define EFI_SIGNATURE_32(A,B,C,D)         (EFI_SIGNATURE_16(A,B)     | (EFI_SIGNATURE_16(C,D)     << 16))
 #define EFI_SIGNATURE_64(A,B,C,D,E,F,G,H) (EFI_SIGNATURE_32(A,B,C,D) | ((UINT64)(EFI_SIGNATURE_32(E,F,G,H)) << 32))
 
 //
 // EFIAPI - prototype calling convention for EFI function pointers
 // BOOTSERVICE - prototype for implementation of a boot service interface
 // RUNTIMESERVICE - prototype for implementation of a runtime service interface
 // RUNTIMEFUNCTION - prototype for implementation of a runtime function that is not a service
 // RUNTIME_CODE - pragma macro for declaring runtime code    
 //
 
 #ifndef EFIAPI                  // Forces EFI calling conventions reguardless of compiler options 
-    #if _MSC_EXTENSIONS
+    #ifdef _MSC_EXTENSIONS
         #define EFIAPI __cdecl  // Force C calling convention for Microsoft C compiler 
     #else
         #define EFIAPI          // Substitute expresion to force C calling convention 
     #endif
 #endif
 
 #define BOOTSERVICE
 #define RUNTIMESERVICE
 #define RUNTIMEFUNCTION
 
 #define RUNTIME_CODE(a)         alloc_text("rtcode", a)
 #define BEGIN_RUNTIME_DATA()    data_seg("rtdata")
 #define END_RUNTIME_DATA()      data_seg()
 
 #define VOLATILE    volatile
 
 //
 // BugBug: Need to find out if this is portable accross compliers.
 //
 void __mfa (void);                       
 #pragma intrinsic (__mfa)  
 #define MEMORY_FENCE()    __mfa()
 
 #ifdef EFI_NO_INTERFACE_DECL
   #define EFI_FORWARD_DECLARATION(x)
   #define EFI_INTERFACE_DECL(x)
 #else
   #define EFI_FORWARD_DECLARATION(x) typedef struct _##x x
   #define EFI_INTERFACE_DECL(x) typedef struct x
 #endif
 
 //
 // When build similiar to FW, then link everything together as
 // one big module.
 //
 
 #define EFI_DRIVER_ENTRY_POINT(InitFunction)
 
 #define LOAD_INTERNAL_DRIVER(_if, type, name, entry)    \
             (_if)->LoadInternal(type, name, entry)
 //        entry(NULL, ST)
 
 #ifdef __FreeBSD__
 #define INTERFACE_DECL(x) struct x
 #else
 //
 // Some compilers don't support the forward reference construct:
 //  typedef struct XXXXX
 //
 // The following macro provide a workaround for such cases.
 //
 #ifdef NO_INTERFACE_DECL
 #define INTERFACE_DECL(x)
 #else
 #define INTERFACE_DECL(x) typedef struct x
 #endif
 #endif
Index: projects/release-pkg/sys/boot/efi/include/efierr.h
===================================================================
--- projects/release-pkg/sys/boot/efi/include/efierr.h	(revision 293335)
+++ projects/release-pkg/sys/boot/efi/include/efierr.h	(revision 293336)
@@ -1,67 +1,68 @@
 /* $FreeBSD$ */
 #ifndef _EFI_ERR_H
 #define _EFI_ERR_H
 
 /*++
 
 Copyright (c)  1999 - 2002 Intel Corporation. All rights reserved
 This software and associated documentation (if any) is furnished
 under a license and may only be used or copied in accordance
 with the terms of the license. Except as permitted by such
 license, no part of this software or documentation may be
 reproduced, stored in a retrieval system, or transmitted in any
 form or by any means without the express written consent of
 Intel Corporation.
 
 Module Name:
 
     efierr.h
 
 Abstract:
 
     EFI error codes
 
 
 
 
 Revision History
 
 --*/
 
 
 #define EFIWARN(a)                            (a)
-#define EFI_ERROR(a)              (((INTN) a) < 0)
+#define EFI_ERROR(a)             (((INTN) a) < 0)
+#define EFI_ERROR_CODE(a)   (a & ~EFI_ERROR_MASK)
 
 
 #define EFI_SUCCESS                             0
 #define EFI_LOAD_ERROR                  EFIERR(1)
 #define EFI_INVALID_PARAMETER           EFIERR(2)
 #define EFI_UNSUPPORTED                 EFIERR(3)
 #define EFI_BAD_BUFFER_SIZE             EFIERR(4)
 #define EFI_BUFFER_TOO_SMALL            EFIERR(5)
 #define EFI_NOT_READY                   EFIERR(6)
 #define EFI_DEVICE_ERROR                EFIERR(7)
 #define EFI_WRITE_PROTECTED             EFIERR(8)
 #define EFI_OUT_OF_RESOURCES            EFIERR(9)
 #define EFI_VOLUME_CORRUPTED            EFIERR(10)
 #define EFI_VOLUME_FULL                 EFIERR(11)
 #define EFI_NO_MEDIA                    EFIERR(12)
 #define EFI_MEDIA_CHANGED               EFIERR(13)
 #define EFI_NOT_FOUND                   EFIERR(14)
 #define EFI_ACCESS_DENIED               EFIERR(15)
 #define EFI_NO_RESPONSE                 EFIERR(16)
 #define EFI_NO_MAPPING                  EFIERR(17)
 #define EFI_TIMEOUT                     EFIERR(18)
 #define EFI_NOT_STARTED                 EFIERR(19)
 #define EFI_ALREADY_STARTED             EFIERR(20)
 #define EFI_ABORTED                     EFIERR(21)
 #define EFI_ICMP_ERROR                  EFIERR(22)
 #define EFI_TFTP_ERROR                  EFIERR(23)
 #define EFI_PROTOCOL_ERROR              EFIERR(24)
 
 #define EFI_WARN_UNKNOWN_GLYPH          EFIWARN(1)
 #define EFI_WARN_DELETE_FAILURE         EFIWARN(2)
 #define EFI_WARN_WRITE_FAILURE          EFIWARN(3)
 #define EFI_WARN_BUFFER_TOO_SMALL       EFIWARN(4)
 
 #endif
Index: projects/release-pkg/sys/boot/efi/include/i386/efibind.h
===================================================================
--- projects/release-pkg/sys/boot/efi/include/i386/efibind.h	(revision 293335)
+++ projects/release-pkg/sys/boot/efi/include/i386/efibind.h	(revision 293336)
@@ -1,267 +1,267 @@
 /* $FreeBSD$ */
 /*++
 
 Copyright (c)  1999 - 2003 Intel Corporation. All rights reserved
 This software and associated documentation (if any) is furnished
 under a license and may only be used or copied in accordance
 with the terms of the license. Except as permitted by such
 license, no part of this software or documentation may be
 reproduced, stored in a retrieval system, or transmitted in any
 form or by any means without the express written consent of
 Intel Corporation.
 
 Module Name:
 
     efefind.h
 
 Abstract:
 
     EFI to compile bindings
 
 
 
 
 Revision History
 
 --*/
 
 #pragma pack()
 
 
 #ifdef __FreeBSD__
 #include <sys/stdint.h>
 #else
 //
 // Basic int types of various widths
 //
 
 #if (__STDC_VERSION__ < 199901L )
 
     // No ANSI C 1999/2000 stdint.h integer width declarations 
 
-    #if _MSC_EXTENSIONS
+    #ifdef _MSC_EXTENSIONS
 
         // Use Microsoft C compiler integer width declarations 
 
         typedef unsigned __int64    uint64_t;
         typedef __int64             int64_t;
         typedef unsigned __int32    uint32_t;
         typedef __int32             int32_t;
         typedef unsigned short      uint16_t;
         typedef short               int16_t;
         typedef unsigned char       uint8_t;
         typedef char                int8_t;
     #else             
         #ifdef UNIX_LP64
 
             // Use LP64 programming model from C_FLAGS for integer width declarations 
 
             typedef unsigned long       uint64_t;
             typedef long                int64_t;
             typedef unsigned int        uint32_t;
             typedef int                 int32_t;
             typedef unsigned short      uint16_t;
             typedef short               int16_t;
             typedef unsigned char       uint8_t;
             typedef char                int8_t;
         #else
 
             // Assume P64 programming model from C_FLAGS for integer width declarations 
 
             typedef unsigned long long  uint64_t;
             typedef long long           int64_t;
             typedef unsigned int        uint32_t;
             typedef int                 int32_t;
             typedef unsigned short      uint16_t;
             typedef short               int16_t;
             typedef unsigned char       uint8_t;
             typedef char                int8_t;
         #endif
     #endif
 #endif
 #endif	/* __FreeBSD__ */
 
 //
 // Basic EFI types of various widths
 //
 
 #ifndef ACPI_THREAD_ID		/* ACPI's definitions are fine, use those */
 #define ACPI_USE_SYSTEM_INTTYPES 1	/* Tell ACPI we've defined types */
 
 typedef uint64_t   UINT64;
 typedef int64_t    INT64;
 
 #ifndef _BASETSD_H_
     typedef uint32_t   UINT32;
     typedef int32_t    INT32;
 #endif
 
 typedef uint16_t   UINT16;
 typedef int16_t    INT16;
 typedef uint8_t    UINT8;
 typedef int8_t     INT8;
 
 #endif
 
 #undef VOID
 #define VOID    void
 
 
 typedef int32_t    INTN;
 typedef uint32_t   UINTN;
 
 #ifdef EFI_NT_EMULATOR
     #define POST_CODE(_Data)
 #else    
     #ifdef EFI_DEBUG
 #define POST_CODE(_Data)    __asm mov eax,(_Data) __asm out 0x80,al
     #else
         #define POST_CODE(_Data)
     #endif  
 #endif
 
 #define EFIERR(a)           (0x80000000 | a)
 #define EFI_ERROR_MASK      0x80000000
 #define EFIERR_OEM(a)       (0xc0000000 | a)      
 
 
 #define BAD_POINTER         0xFBFBFBFB
 #define MAX_ADDRESS         0xFFFFFFFF
 
 #define BREAKPOINT()        __asm { int 3 }
 
 //
 // Pointers must be aligned to these address to function
 //
 
 #define MIN_ALIGNMENT_SIZE  4
 
 #define ALIGN_VARIABLE(Value ,Adjustment) \
             (UINTN)Adjustment = 0; \
             if((UINTN)Value % MIN_ALIGNMENT_SIZE) \
                 (UINTN)Adjustment = MIN_ALIGNMENT_SIZE - ((UINTN)Value % MIN_ALIGNMENT_SIZE); \
             Value = (UINTN)Value + (UINTN)Adjustment
 
 
 //
 // Define macros to build data structure signatures from characters.
 //
 
 #define EFI_SIGNATURE_16(A,B)             ((A) | (B<<8))
 #define EFI_SIGNATURE_32(A,B,C,D)         (EFI_SIGNATURE_16(A,B)     | (EFI_SIGNATURE_16(C,D)     << 16))
 #define EFI_SIGNATURE_64(A,B,C,D,E,F,G,H) (EFI_SIGNATURE_32(A,B,C,D) | ((UINT64)(EFI_SIGNATURE_32(E,F,G,H)) << 32))
 
 //
 // EFIAPI - prototype calling convention for EFI function pointers
 // BOOTSERVICE - prototype for implementation of a boot service interface
 // RUNTIMESERVICE - prototype for implementation of a runtime service interface
 // RUNTIMEFUNCTION - prototype for implementation of a runtime function that is not a service
 // RUNTIME_CODE - pragma macro for declaring runtime code    
 //
 
 #ifndef EFIAPI                  // Forces EFI calling conventions reguardless of compiler options 
-    #if _MSC_EXTENSIONS
+    #ifdef _MSC_EXTENSIONS
         #define EFIAPI __cdecl  // Force C calling convention for Microsoft C compiler 
     #else
         #define EFIAPI          // Substitute expresion to force C calling convention 
     #endif
 #endif
 
 #define BOOTSERVICE
 //#define RUNTIMESERVICE(proto,a)    alloc_text("rtcode",a); proto a
 //#define RUNTIMEFUNCTION(proto,a)   alloc_text("rtcode",a); proto a
 #define RUNTIMESERVICE
 #define RUNTIMEFUNCTION
 
 
 #define RUNTIME_CODE(a)         alloc_text("rtcode", a)
 #define BEGIN_RUNTIME_DATA()    data_seg("rtdata")
 #define END_RUNTIME_DATA()      data_seg()
 
 #define VOLATILE    volatile
 
 #define MEMORY_FENCE()    
 
 #ifdef EFI_NO_INTERFACE_DECL
   #define EFI_FORWARD_DECLARATION(x)
   #define EFI_INTERFACE_DECL(x)
 #else
   #define EFI_FORWARD_DECLARATION(x) typedef struct _##x x
   #define EFI_INTERFACE_DECL(x) typedef struct x
 #endif
 
 #ifdef EFI_NT_EMULATOR
 
 //
 // To help ensure proper coding of integrated drivers, they are
 // compiled as DLLs.  In NT they require a dll init entry pointer.
 // The macro puts a stub entry point into the DLL so it will load.
 //
 
 #define EFI_DRIVER_ENTRY_POINT(InitFunction)            \
     EFI_STATUS                                          \
     InitFunction (                                      \
       EFI_HANDLE  ImageHandle,                          \
       EFI_SYSTEM_TABLE  *SystemTable                    \
       );                                                \
                                                         \
     UINTN                                               \
     __stdcall                                           \
     _DllMainCRTStartup (                                \
         UINTN    Inst,                                  \
         UINTN    reason_for_call,                       \
         VOID    *rserved                                \
         )                                               \
     {                                                   \
         return 1;                                       \
     }                                                   \
                                                         \
     int                                                 \
     __declspec( dllexport )                             \
     __cdecl                                             \
     InitializeDriver (                                  \
         void *ImageHandle,                              \
         void *SystemTable                               \
         )                                               \
     {                                                   \
         return InitFunction(ImageHandle, SystemTable);  \
     }
 
 
     #define LOAD_INTERNAL_DRIVER(_if, type, name, entry)      \
         (_if)->LoadInternal(type, name, NULL)             
 
 #else // EFI_NT_EMULATOR 
 
 //
 // When build similiar to FW, then link everything together as
 // one big module.
 //
 
     #define EFI_DRIVER_ENTRY_POINT(InitFunction)
 
     #define LOAD_INTERNAL_DRIVER(_if, type, name, entry)    \
             (_if)->LoadInternal(type, name, entry)
 
 #endif // EFI_FW_NT 
 
 #ifdef __FreeBSD__
 #define INTERFACE_DECL(x) struct x
 #else
 //
 // Some compilers don't support the forward reference construct:
 //  typedef struct XXXXX
 //
 // The following macro provide a workaround for such cases.
 //
 #ifdef NO_INTERFACE_DECL
 #define INTERFACE_DECL(x)
 #else
 #define INTERFACE_DECL(x) typedef struct x
 #endif
 #endif	/* __FreeBSD__ */
 
-#if _MSC_EXTENSIONS
+#ifdef _MSC_EXTENSIONS
 #pragma warning ( disable : 4731 )  // Suppress warnings about modification of EBP
 #endif
 
Index: projects/release-pkg/sys/boot/efi/libefi/Makefile
===================================================================
--- projects/release-pkg/sys/boot/efi/libefi/Makefile	(revision 293335)
+++ projects/release-pkg/sys/boot/efi/libefi/Makefile	(revision 293336)
@@ -1,25 +1,26 @@
 # $FreeBSD$
 
 LIB=	efi
 INTERNALLIB=
 
 SRCS=	delay.c efi_console.c efinet.c efipart.c errno.c handles.c \
 	libefi.c time.c
 
 .if ${MACHINE_CPUARCH} == "aarch64"
 CFLAGS+=	-msoft-float -mgeneral-regs-only
 .endif
 .if ${MACHINE_ARCH} == "amd64"
 CFLAGS+= -fPIC -mno-red-zone
 .endif
 CFLAGS+= -I${.CURDIR}/../include
 CFLAGS+= -I${.CURDIR}/../include/${MACHINE}
 CFLAGS+= -I${.CURDIR}/../../../../lib/libstand
 
 # Pick up the bootstrap header for some interface items
 CFLAGS+= -I${.CURDIR}/../../common
 
 # Handle FreeBSD specific %b and %D printf format specifiers
 CFLAGS+= ${FORMAT_EXTENSIONS}
+CFLAGS+= -DTERM_EMU
 
 .include <bsd.lib.mk>
Index: projects/release-pkg/sys/boot/efi/libefi/efi_console.c
===================================================================
--- projects/release-pkg/sys/boot/efi/libefi/efi_console.c	(revision 293335)
+++ projects/release-pkg/sys/boot/efi/libefi/efi_console.c	(revision 293336)
@@ -1,99 +1,483 @@
 /*-
  * Copyright (c) 2000 Doug Rabson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <efi.h>
 #include <efilib.h>
 
 #include "bootstrap.h"
 
 static SIMPLE_TEXT_OUTPUT_INTERFACE	*conout;
 static SIMPLE_INPUT_INTERFACE		*conin;
 
+#ifdef TERM_EMU
+#define	DEFAULT_FGCOLOR	EFI_LIGHTGRAY
+#define	DEFAULT_BGCOLOR	EFI_BLACK
+
+#define	MAXARGS	8
+static int args[MAXARGS], argc;
+static int fg_c, bg_c, curx, cury;
+static int esc;
+
+void get_pos(int *x, int *y);
+void curs_move(int *_x, int *_y, int x, int y);
+static void CL(int);
+#endif
+
+static void efi_cons_probe(struct console *);
+static int efi_cons_init(int);
+void efi_cons_putchar(int);
+int efi_cons_getchar(void);
+void efi_cons_efiputchar(int);
+int efi_cons_poll(void);
+
+struct console efi_console = {
+	"efi",
+	"EFI console",
+	0,
+	efi_cons_probe,
+	efi_cons_init,
+	efi_cons_putchar,
+	efi_cons_getchar,
+	efi_cons_poll
+};
+
+#ifdef TERM_EMU
+
+/* Get cursor position. */
+void
+get_pos(int *x, int *y)
+{
+	*x = conout->Mode->CursorColumn;
+	*y = conout->Mode->CursorRow;
+}
+
+/* Move cursor to x rows and y cols (0-based). */
+void
+curs_move(int *_x, int *_y, int x, int y)
+{
+	conout->SetCursorPosition(conout, x, y);
+	if (_x != NULL)
+		*_x = conout->Mode->CursorColumn;
+	if (_y != NULL)
+		*_y = conout->Mode->CursorRow;
+}
+
+/* Clear internal state of the terminal emulation code. */
+void
+end_term(void)
+{
+	esc = 0;
+	argc = -1;
+}
+
+#endif
+
 static void
 efi_cons_probe(struct console *cp)
 {
 	conout = ST->ConOut;
 	conin = ST->ConIn;
 	cp->c_flags |= C_PRESENTIN | C_PRESENTOUT;
 }
 
 static int
 efi_cons_init(int arg)
 {
-	conout->SetAttribute(conout, EFI_TEXT_ATTR(EFI_LIGHTGRAY, EFI_BLACK));
+	conout->SetAttribute(conout, EFI_TEXT_ATTR(DEFAULT_FGCOLOR,
+	    DEFAULT_BGCOLOR));
+#ifdef TERM_EMU
+	end_term();
+	get_pos(&curx, &cury);
+	curs_move(&curx, &cury, curx, cury);
+	fg_c = DEFAULT_FGCOLOR;
+	bg_c = DEFAULT_BGCOLOR;
+#endif
+	conout->EnableCursor(conout, TRUE);
 	return 0;
 }
 
+static void
+efi_cons_rawputchar(int c)
+{
+	int i;
+	UINTN x, y;
+	conout->QueryMode(conout, conout->Mode->Mode, &x, &y);
+
+	if (c == '\t')
+		/* XXX lame tab expansion */
+		for (i = 0; i < 8; i++)
+			efi_cons_rawputchar(' ');
+	else {
+#ifndef	TERM_EMU
+		if (c == '\n')
+			efi_cons_efiputchar('\r');
+		else
+			efi_cons_efiputchar(c);
+#else
+		switch (c) {
+		case '\r':
+			curx = 0;
+			curs_move(&curx, &cury, curx, cury);
+			return;
+		case '\n':
+			cury++;
+			if (cury >= y) {
+				efi_cons_efiputchar('\n');
+				cury--;
+			} else
+				curs_move(&curx, &cury, curx, cury);
+			return;
+		case '\b':
+			if (curx > 0) {
+				curx--;
+				curs_move(&curx, &cury, curx, cury);
+			}
+			return;
+		default:
+			efi_cons_efiputchar(c);
+			curx++;
+			if (curx > x-1) {
+				curx = 0;
+				cury++;
+			}
+			if (cury > y-1) {
+				curx = 0;
+				cury--;
+			}
+		}
+		curs_move(&curx, &cury, curx, cury);
+#endif
+	}
+}
+
+/* Gracefully exit ESC-sequence processing in case of misunderstanding. */
+static void
+bail_out(int c)
+{
+	char buf[16], *ch;
+	int i;
+
+	if (esc) {
+		efi_cons_rawputchar('\033');
+		if (esc != '\033')
+			efi_cons_rawputchar(esc);
+		for (i = 0; i <= argc; ++i) {
+			sprintf(buf, "%d", args[i]);
+			ch = buf;
+			while (*ch)
+				efi_cons_rawputchar(*ch++);
+		}
+	}
+	efi_cons_rawputchar(c);
+	end_term();
+}
+
+/* Clear display from current position to end of screen. */
+static void
+CD(void) {
+	int i;
+	UINTN x, y;
+
+	get_pos(&curx, &cury);
+	if (curx == 0 && cury == 0) {
+		conout->ClearScreen(conout);
+		end_term();
+		return;
+	}
+
+	conout->QueryMode(conout, conout->Mode->Mode, &x, &y);
+	CL(0);  /* clear current line from cursor to end */
+	for (i = cury + 1; i < y-1; i++) {
+		curs_move(NULL, NULL, 0, i);
+		CL(0);
+	}
+	curs_move(NULL, NULL, curx, cury);
+	end_term();
+}
+
+/*
+ * Absolute cursor move to args[0] rows and args[1] columns
+ * (the coordinates are 1-based).
+ */
+static void
+CM(void)
+{
+	if (args[0] > 0)
+		args[0]--;
+	if (args[1] > 0)
+		args[1]--;
+	curs_move(&curx, &cury, args[1], args[0]);
+	end_term();
+}
+
+/* Home cursor (left top corner), also called from mode command. */
 void
-efi_cons_putchar(int c)
+HO(void)
 {
-	CHAR16 buf[2];
+	argc = 1;
+	args[0] = args[1] = 1;
+	CM();
+}
 
-	if (c == '\n')
-		efi_cons_putchar('\r');
+/* Clear line from current position to end of line */
+static void
+CL(int direction)
+{
+	int i, len;
+	UINTN x, y;
+	CHAR16 *line;
 
-	buf[0] = c;
-	buf[1] = 0;
+	conout->QueryMode(conout, conout->Mode->Mode, &x, &y);
+	switch (direction) {
+	case 0:         /* from cursor to end */
+		len = x - curx + 1;
+		break;
+	case 1:         /* from beginning to cursor */
+		len = curx;
+		break;
+	case 2:         /* entire line */
+		len = x;
+		break;
+	}
 
-	conout->OutputString(conout, buf);
+	if (cury == y - 1)
+		len--;
+
+	line = malloc(len * sizeof (CHAR16));
+	if (line == NULL) {
+		printf("out of memory\n");
+		return;
+	}
+	for (i = 0; i < len; i++)
+		line[i] = ' ';
+	line[len-1] = 0;
+
+	if (direction != 0)
+		curs_move(NULL, NULL, 0, cury);
+
+	conout->OutputString(conout, line);
+	/* restore cursor position */
+	curs_move(NULL, NULL, curx, cury);
+	free(line);
+	end_term();
 }
 
+static void
+get_arg(int c)
+{
+	if (argc < 0)
+		argc = 0;
+	args[argc] *= 10;
+	args[argc] += c - '0';
+}
+
+/* Emulate basic capabilities of cons25 terminal */
+static void
+efi_term_emu(int c)
+{
+	static int ansi_col[] = {
+		0, 4, 2, 6, 1, 5, 3, 7
+	};
+	int t, i;
+
+	switch (esc) {
+	case 0:
+		switch (c) {
+		case '\033':
+			esc = c;
+			break;
+		default:
+			efi_cons_rawputchar(c);
+			break;
+		}
+		break;
+	case '\033':
+		switch (c) {
+		case '[':
+			esc = c;
+			args[0] = 0;
+			argc = -1;
+			break;
+		default:
+			bail_out(c);
+			break;
+		}
+		break;
+	case '[':
+		switch (c) {
+		case ';':
+			if (argc < 0)
+				argc = 0;
+			else if (argc + 1 >= MAXARGS)
+				bail_out(c);
+			else
+				args[++argc] = 0;
+			break;
+		case 'H':               /* ho = \E[H */
+			if (argc < 0)
+				HO();
+			else if (argc == 1)
+				CM();
+			else
+				bail_out(c);
+			break;
+		case 'J':               /* cd = \E[J */
+			if (argc < 0)
+				CD();
+			else
+				bail_out(c);
+			break;
+		case 'm':
+			if (argc < 0) {
+				fg_c = DEFAULT_FGCOLOR;
+				bg_c = DEFAULT_BGCOLOR;
+			}
+			for (i = 0; i <= argc; ++i) {
+				switch (args[i]) {
+				case 0:         /* back to normal */
+					fg_c = DEFAULT_FGCOLOR;
+					bg_c = DEFAULT_BGCOLOR;
+					break;
+				case 1:         /* bold */
+					fg_c |= 0x8;
+					break;
+				case 4:         /* underline */
+				case 5:         /* blink */
+					bg_c |= 0x8;
+					break;
+				case 7:         /* reverse */
+					t = fg_c;
+					fg_c = bg_c;
+					bg_c = t;
+					break;
+				case 30: case 31: case 32: case 33:
+				case 34: case 35: case 36: case 37:
+					fg_c = ansi_col[args[i] - 30];
+					break;
+				case 39:        /* normal */
+					fg_c = DEFAULT_FGCOLOR;
+					break;
+				case 40: case 41: case 42: case 43:
+				case 44: case 45: case 46: case 47:
+					bg_c = ansi_col[args[i] - 40];
+					break;
+				case 49:        /* normal */
+					bg_c = DEFAULT_BGCOLOR;
+					break;
+				}
+			}
+			conout->SetAttribute(conout, EFI_TEXT_ATTR(fg_c, bg_c));
+			end_term();
+			break;
+		default:
+			if (isdigit(c))
+				get_arg(c);
+			else
+				bail_out(c);
+			break;
+		}
+		break;
+	default:
+		bail_out(c);
+		break;
+	}
+}
+
+void
+efi_cons_putchar(int c)
+{
+#ifdef TERM_EMU
+	efi_term_emu(c);
+#else
+	efi_cons_rawputchar(c);
+#endif
+}
+
 int
 efi_cons_getchar()
 {
 	EFI_INPUT_KEY key;
 	EFI_STATUS status;
 	UINTN junk;
 
 	/* Try to read a key stroke. We wait for one if none is pending. */
 	status = conin->ReadKeyStroke(conin, &key);
 	if (status == EFI_NOT_READY) {
 		BS->WaitForEvent(1, &conin->WaitForKey, &junk);
 		status = conin->ReadKeyStroke(conin, &key);
 	}
+	switch (key.ScanCode) {
+	case 0x17: /* ESC */
+		return (0x1b);  /* esc */
+	}
+
+	/* this can return  */
 	return (key.UnicodeChar);
 }
 
 int
 efi_cons_poll()
 {
 	/* This can clear the signaled state. */
 	return (BS->CheckEvent(conin->WaitForKey) == EFI_SUCCESS);
 }
 
-struct console efi_console = {
-	"efi",
-	"EFI console",
-	0,
-	efi_cons_probe,
-	efi_cons_init,
-	efi_cons_putchar,
-	efi_cons_getchar,
-	efi_cons_poll
-};
+/* Plain direct access to EFI OutputString(). */
+void
+efi_cons_efiputchar(int c)
+{
+	CHAR16 buf[2];
+
+	/*
+	 * translate box chars to unicode
+	 */
+	switch (c) {
+	/* single frame */
+	case 0xb3: buf[0] = BOXDRAW_VERTICAL; break;
+	case 0xbf: buf[0] = BOXDRAW_DOWN_LEFT; break;
+	case 0xc0: buf[0] = BOXDRAW_UP_RIGHT; break;
+	case 0xc4: buf[0] = BOXDRAW_HORIZONTAL; break;
+	case 0xda: buf[0] = BOXDRAW_DOWN_RIGHT; break;
+	case 0xd9: buf[0] = BOXDRAW_UP_LEFT; break;
+
+	/* double frame */
+	case 0xba: buf[0] = BOXDRAW_DOUBLE_VERTICAL; break;
+	case 0xbb: buf[0] = BOXDRAW_DOUBLE_DOWN_LEFT; break;
+	case 0xbc: buf[0] = BOXDRAW_DOUBLE_UP_LEFT; break;
+	case 0xc8: buf[0] = BOXDRAW_DOUBLE_UP_RIGHT; break;
+	case 0xc9: buf[0] = BOXDRAW_DOUBLE_DOWN_RIGHT; break;
+	case 0xcd: buf[0] = BOXDRAW_DOUBLE_HORIZONTAL; break;
+
+	default:
+		buf[0] = c;
+	}
+        buf[1] = 0;     /* terminate string */
+
+	conout->OutputString(conout, buf);
+}
Index: projects/release-pkg/sys/boot/efi/loader/arch/amd64/framebuffer.c
===================================================================
--- projects/release-pkg/sys/boot/efi/loader/arch/amd64/framebuffer.c	(revision 293335)
+++ projects/release-pkg/sys/boot/efi/loader/arch/amd64/framebuffer.c	(revision 293336)
@@ -1,564 +1,564 @@
 /*-
  * Copyright (c) 2013 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Benno Rice under sponsorship from
  * the FreeBSD Foundation.
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <bootstrap.h>
 #include <sys/endian.h>
 #include <stand.h>
 
 #include <efi.h>
 #include <efilib.h>
 #include <efiuga.h>
 #include <efipciio.h>
 #include <machine/metadata.h>
 
 static EFI_GUID gop_guid = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID;
 static EFI_GUID pciio_guid = EFI_PCI_IO_PROTOCOL_GUID;
 static EFI_GUID uga_guid = EFI_UGA_DRAW_PROTOCOL_GUID;
 
 static u_int
 efifb_color_depth(struct efi_fb *efifb)
 {
 	uint32_t mask;
 	u_int depth;
 
 	mask = efifb->fb_mask_red | efifb->fb_mask_green |
 	    efifb->fb_mask_blue | efifb->fb_mask_reserved;
 	if (mask == 0)
 		return (0);
 	for (depth = 1; mask != 1; depth++)
 		mask >>= 1;
 	return (depth);
 }
 
 static int
 efifb_mask_from_pixfmt(struct efi_fb *efifb, EFI_GRAPHICS_PIXEL_FORMAT pixfmt,
     EFI_PIXEL_BITMASK *pixinfo)
 {
 	int result;
 
 	result = 0;
 	switch (pixfmt) {
 	case PixelRedGreenBlueReserved8BitPerColor:
 		efifb->fb_mask_red = 0x000000ff;
 		efifb->fb_mask_green = 0x0000ff00;
 		efifb->fb_mask_blue = 0x00ff0000;
 		efifb->fb_mask_reserved = 0xff000000;
 		break;
 	case PixelBlueGreenRedReserved8BitPerColor:
 		efifb->fb_mask_red = 0x00ff0000;
 		efifb->fb_mask_green = 0x0000ff00;
 		efifb->fb_mask_blue = 0x000000ff;
 		efifb->fb_mask_reserved = 0xff000000;
 		break;
 	case PixelBitMask:
 		efifb->fb_mask_red = pixinfo->RedMask;
 		efifb->fb_mask_green = pixinfo->GreenMask;
 		efifb->fb_mask_blue = pixinfo->BlueMask;
 		efifb->fb_mask_reserved = pixinfo->ReservedMask;
 		break;
 	default:
 		result = 1;
 		break;
 	}
 	return (result);
 }
 
 static int
 efifb_from_gop(struct efi_fb *efifb, EFI_GRAPHICS_OUTPUT_PROTOCOL_MODE *mode,
     EFI_GRAPHICS_OUTPUT_MODE_INFORMATION *info)
 {
 	int result;
 
 	efifb->fb_addr = mode->FrameBufferBase;
 	efifb->fb_size = mode->FrameBufferSize;
 	efifb->fb_height = info->VerticalResolution;
 	efifb->fb_width = info->HorizontalResolution;
 	efifb->fb_stride = info->PixelsPerScanLine;
 	result = efifb_mask_from_pixfmt(efifb, info->PixelFormat,
 	    &info->PixelInformation);
 	return (result);
 }
 
 static ssize_t
 efifb_uga_find_pixel(EFI_UGA_DRAW_PROTOCOL *uga, u_int line,
     EFI_PCI_IO_PROTOCOL *pciio, uint64_t addr, uint64_t size)
 {
 	EFI_UGA_PIXEL pix0, pix1;
 	uint8_t *data1, *data2;
 	size_t count, maxcount = 1024;
 	ssize_t ofs;
 	EFI_STATUS status;
 	u_int idx;
 
 	status = uga->Blt(uga, &pix0, EfiUgaVideoToBltBuffer,
 	    0, line, 0, 0, 1, 1, 0);
 	if (EFI_ERROR(status)) {
 		printf("UGA BLT operation failed (video->buffer)");
 		return (-1);
 	}
 	pix1.Red = ~pix0.Red;
 	pix1.Green = ~pix0.Green;
 	pix1.Blue = ~pix0.Blue;
 	pix1.Reserved = 0;
 
 	data1 = calloc(maxcount, 2);
 	if (data1 == NULL) {
 		printf("Unable to allocate memory");
 		return (-1);
 	}
 	data2 = data1 + maxcount;
 
 	ofs = 0;
 	while (size > 0) {
 		count = min(size, maxcount);
 
 		status = pciio->Mem.Read(pciio, EfiPciIoWidthUint32,
 		    EFI_PCI_IO_PASS_THROUGH_BAR, addr + ofs, count >> 2,
 		    data1);
 		if (EFI_ERROR(status)) {
 			printf("Error reading frame buffer (before)");
 			goto fail;
 		}
 		status = uga->Blt(uga, &pix1, EfiUgaBltBufferToVideo,
 		    0, 0, 0, line, 1, 1, 0);
 		if (EFI_ERROR(status)) {
 			printf("UGA BLT operation failed (modify)");
 			goto fail;
 		}
 		status = pciio->Mem.Read(pciio, EfiPciIoWidthUint32,
 		    EFI_PCI_IO_PASS_THROUGH_BAR, addr + ofs, count >> 2,
 		    data2);
 		if (EFI_ERROR(status)) {
 			printf("Error reading frame buffer (after)");
 			goto fail;
 		}
 		status = uga->Blt(uga, &pix0, EfiUgaBltBufferToVideo,
 		    0, 0, 0, line, 1, 1, 0);
 		if (EFI_ERROR(status)) {
 			printf("UGA BLT operation failed (restore)");
 			goto fail;
 		}
 		for (idx = 0; idx < count; idx++) {
 			if (data1[idx] != data2[idx]) {
 				free(data1);
 				return (ofs + (idx & ~3));
 			}
 		}
 		ofs += count;
 		size -= count;
 	}
 	printf("No change detected in frame buffer");
 
  fail:
-	printf(" -- error %lu\n", status & ~EFI_ERROR_MASK);
+	printf(" -- error %lu\n", EFI_ERROR_CODE(status));
 	free(data1);
 	return (-1);
 }
 
 static EFI_PCI_IO_PROTOCOL *
 efifb_uga_get_pciio(void)
 {
 	EFI_PCI_IO_PROTOCOL *pciio;
 	EFI_HANDLE *buf, *hp;
 	EFI_STATUS status;
 	UINTN bufsz;
 
 	/* Get all handles that support the UGA protocol. */
 	bufsz = 0;
 	status = BS->LocateHandle(ByProtocol, &uga_guid, NULL, &bufsz, NULL);
 	if (status != EFI_BUFFER_TOO_SMALL)
 		return (NULL);
 	buf = malloc(bufsz);
 	status = BS->LocateHandle(ByProtocol, &uga_guid, NULL, &bufsz, buf);
 	if (status != EFI_SUCCESS) {
 		free(buf);
 		return (NULL);
 	}
 	bufsz /= sizeof(EFI_HANDLE);
 
 	/* Get the PCI I/O interface of the first handle that supports it. */
 	pciio = NULL;
 	for (hp = buf; hp < buf + bufsz; hp++) {
 		status = BS->HandleProtocol(*hp, &pciio_guid, (void **)&pciio);
 		if (status == EFI_SUCCESS) {
 			free(buf);
 			return (pciio);
 		}
 	}
 	free(buf);
 	return (NULL);
 }
 
 static EFI_STATUS
 efifb_uga_locate_framebuffer(EFI_PCI_IO_PROTOCOL *pciio, uint64_t *addrp,
     uint64_t *sizep)
 {
 	uint8_t *resattr;
 	uint64_t addr, size;
 	EFI_STATUS status;
 	u_int bar;
 
 	if (pciio == NULL)
 		return (EFI_DEVICE_ERROR);
 
 	/* Attempt to get the frame buffer address (imprecise). */
 	*addrp = 0;
 	*sizep = 0;
 	for (bar = 0; bar < 6; bar++) {
 		status = pciio->GetBarAttributes(pciio, bar, NULL,
 		    (void **)&resattr);
 		if (status != EFI_SUCCESS)
 			continue;
 		/* XXX magic offsets and constants. */
 		if (resattr[0] == 0x87 && resattr[3] == 0) {
 			/* 32-bit address space descriptor (MEMIO) */
 			addr = le32dec(resattr + 10);
 			size = le32dec(resattr + 22);
 		} else if (resattr[0] == 0x8a && resattr[3] == 0) {
 			/* 64-bit address space descriptor (MEMIO) */
 			addr = le64dec(resattr + 14);
 			size = le64dec(resattr + 38);
 		} else {
 			addr = 0;
 			size = 0;
 		}
 		BS->FreePool(resattr);
 		if (addr == 0 || size == 0)
 			continue;
 
 		/* We assume the largest BAR is the frame buffer. */
 		if (size > *sizep) {
 			*addrp = addr;
 			*sizep = size;
 		}
 	}
 	return ((*addrp == 0 || *sizep == 0) ? EFI_DEVICE_ERROR : 0);
 }
 
 static int
 efifb_from_uga(struct efi_fb *efifb, EFI_UGA_DRAW_PROTOCOL *uga)
 {
 	EFI_PCI_IO_PROTOCOL *pciio;
 	char *ev, *p;
 	EFI_STATUS status;
 	ssize_t offset;
 	uint64_t fbaddr, fbsize;
 	uint32_t horiz, vert, stride;
 	uint32_t np, depth, refresh;
 
 	status = uga->GetMode(uga, &horiz, &vert, &depth, &refresh);
 	if (EFI_ERROR(status))
 		return (1);
 	efifb->fb_height = vert;
 	efifb->fb_width = horiz;
 	/* Paranoia... */
 	if (efifb->fb_height == 0 || efifb->fb_width == 0)
 		return (1);
 
 	/* The color masks are fixed AFAICT. */
 	efifb_mask_from_pixfmt(efifb, PixelBlueGreenRedReserved8BitPerColor,
 	    NULL);
 
 	/* pciio can be NULL on return! */
 	pciio = efifb_uga_get_pciio();
 
 	/* Try to find the frame buffer. */
 	status = efifb_uga_locate_framebuffer(pciio, &efifb->fb_addr,
 	    &efifb->fb_size);
 	if (EFI_ERROR(status)) {
 		efifb->fb_addr = 0;
 		efifb->fb_size = 0;
 	}
 
 	/*
 	 * There's no reliable way to detect the frame buffer or the
 	 * offset within the frame buffer of the visible region, nor
 	 * the stride. Our only option is to look at the system and
 	 * fill in the blanks based on that. Luckily, UGA was mostly
 	 * only used on Apple hardware.
 	 */
 	offset = -1;
 	ev = getenv("smbios.system.maker");
 	if (ev != NULL && !strcmp(ev, "Apple Inc.")) {
 		ev = getenv("smbios.system.product");
 		if (ev != NULL && !strcmp(ev, "iMac7,1")) {
 			/* These are the expected values we should have. */
 			horiz = 1680;
 			vert = 1050;
 			fbaddr = 0xc0000000;
 			/* These are the missing bits. */
 			offset = 0x10000;
 			stride = 1728;
 		} else if (ev != NULL && !strcmp(ev, "MacBook3,1")) {
 			/* These are the expected values we should have. */
 			horiz = 1280;
 			vert = 800;
 			fbaddr = 0xc0000000;
 			/* These are the missing bits. */
 			offset = 0x0;
 			stride = 2048;
 		}
 	}
 
 	/*
 	 * If this is hardware we know, make sure that it looks familiar
 	 * before we accept our hardcoded values.
 	 */
 	if (offset >= 0 && efifb->fb_width == horiz &&
 	    efifb->fb_height == vert && efifb->fb_addr == fbaddr) {
 		efifb->fb_addr += offset;
 		efifb->fb_size -= offset;
 		efifb->fb_stride = stride;
 		return (0);
 	} else if (offset >= 0) {
 		printf("Hardware make/model known, but graphics not "
 		    "as expected.\n");
 		printf("Console may not work!\n");
 	}
 
 	/*
 	 * The stride is equal or larger to the width. Often it's the
 	 * next larger power of two. We'll start with that...
 	 */
 	efifb->fb_stride = efifb->fb_width;
 	do {
 		np = efifb->fb_stride & (efifb->fb_stride - 1);
 		if (np) {
 			efifb->fb_stride |= (np - 1);
 			efifb->fb_stride++;
 		}
 	} while (np);
 
 	ev = getenv("hw.efifb.address");
 	if (ev == NULL) {
 		if (efifb->fb_addr == 0) {
 			printf("Please set hw.efifb.address and "
 			    "hw.efifb.stride.\n");
 			return (1);
 		}
 
 		/*
 		 * The visible part of the frame buffer may not start at
 		 * offset 0, so try to detect it. Note that we may not
 		 * always be able to read from the frame buffer, which
 		 * means that we may not be able to detect anything. In
 		 * that case, we would take a long time scanning for a
 		 * pixel change in the frame buffer, which would have it
 		 * appear that we're hanging, so we limit the scan to
 		 * 1/256th of the frame buffer. This number is mostly
 		 * based on PR 202730 and the fact that on a MacBoook,
 		 * where we can't read from the frame buffer the offset
 		 * of the visible region is 0. In short: we want to scan
 		 * enough to handle all adapters that have an offset
 		 * larger than 0 and we want to scan as little as we can
 		 * to not appear to hang when we can't read from the
 		 * frame buffer.
 		 */
 		offset = efifb_uga_find_pixel(uga, 0, pciio, efifb->fb_addr,
 		    efifb->fb_size >> 8);
 		if (offset == -1) {
 			printf("Unable to reliably detect frame buffer.\n");
 		} else if (offset > 0) {
 			efifb->fb_addr += offset;
 			efifb->fb_size -= offset;
 		}
 	} else {
 		offset = 0;
 		efifb->fb_size = efifb->fb_height * efifb->fb_stride * 4;
 		efifb->fb_addr = strtoul(ev, &p, 0);
 		if (*p != '\0')
 			return (1);
 	}
 
 	ev = getenv("hw.efifb.stride");
 	if (ev == NULL) {
 		if (pciio != NULL && offset != -1) {
 			/* Determine the stride. */
 			offset = efifb_uga_find_pixel(uga, 1, pciio,
 			    efifb->fb_addr, horiz * 8);
 			if (offset != -1)
 				efifb->fb_stride = offset >> 2;
 		} else {
 			printf("Unable to reliably detect the stride.\n");
 		}
 	} else {
 		efifb->fb_stride = strtoul(ev, &p, 0);
 		if (*p != '\0')
 			return (1);
 	}
 
 	/*
 	 * We finalized on the stride, so recalculate the size of the
 	 * frame buffer.
 	 */
 	efifb->fb_size = efifb->fb_height * efifb->fb_stride * 4;
 	return (0);
 }
 
 int
 efi_find_framebuffer(struct efi_fb *efifb)
 {
 	EFI_GRAPHICS_OUTPUT *gop;
 	EFI_UGA_DRAW_PROTOCOL *uga;
 	EFI_STATUS status;
 
 	status = BS->LocateProtocol(&gop_guid, NULL, (VOID **)&gop);
 	if (status == EFI_SUCCESS)
 		return (efifb_from_gop(efifb, gop->Mode, gop->Mode->Info));
 
 	status = BS->LocateProtocol(&uga_guid, NULL, (VOID **)&uga);
 	if (status == EFI_SUCCESS)
 		return (efifb_from_uga(efifb, uga));
 
 	return (1);
 }
 
 static void
 print_efifb(int mode, struct efi_fb *efifb, int verbose)
 {
 	u_int depth;
 
 	if (mode >= 0)
 		printf("mode %d: ", mode);
 	depth = efifb_color_depth(efifb);
 	printf("%ux%ux%u, stride=%u", efifb->fb_width, efifb->fb_height,
 	    depth, efifb->fb_stride);
 	if (verbose) {
 		printf("\n    frame buffer: address=%jx, size=%jx",
 		    (uintmax_t)efifb->fb_addr, (uintmax_t)efifb->fb_size);
 		printf("\n    color mask: R=%08x, G=%08x, B=%08x\n",
 		    efifb->fb_mask_red, efifb->fb_mask_green,
 		    efifb->fb_mask_blue);
 	}
 }
 
 COMMAND_SET(gop, "gop", "graphics output protocol", command_gop);
 
 static int
 command_gop(int argc, char *argv[])
 {
 	struct efi_fb efifb;
 	EFI_GRAPHICS_OUTPUT *gop;
 	EFI_STATUS status;
 	u_int mode;
 
 	status = BS->LocateProtocol(&gop_guid, NULL, (VOID **)&gop);
 	if (EFI_ERROR(status)) {
 		sprintf(command_errbuf, "%s: Graphics Output Protocol not "
-		    "present (error=%lu)", argv[0], status & ~EFI_ERROR_MASK);
+		    "present (error=%lu)", argv[0], EFI_ERROR_CODE(status));
 		return (CMD_ERROR);
 	}
 
 	if (argc < 2)
 		goto usage;
 
 	if (!strcmp(argv[1], "set")) {
 		char *cp;
 
 		if (argc != 3)
 			goto usage;
 		mode = strtol(argv[2], &cp, 0);
 		if (cp[0] != '\0') {
 			sprintf(command_errbuf, "mode is an integer");
 			return (CMD_ERROR);
 		}
 		status = gop->SetMode(gop, mode);
 		if (EFI_ERROR(status)) {
 			sprintf(command_errbuf, "%s: Unable to set mode to "
 			    "%u (error=%lu)", argv[0], mode,
-			    status & ~EFI_ERROR_MASK);
+			    EFI_ERROR_CODE(status));
 			return (CMD_ERROR);
 		}
 	} else if (!strcmp(argv[1], "get")) {
 		if (argc != 2)
 			goto usage;
 		efifb_from_gop(&efifb, gop->Mode, gop->Mode->Info);
 		print_efifb(gop->Mode->Mode, &efifb, 1);
 		printf("\n");
 	} else if (!strcmp(argv[1], "list")) {
 		EFI_GRAPHICS_OUTPUT_MODE_INFORMATION *info;
 		UINTN infosz;
 
 		if (argc != 2)
 			goto usage;
 		pager_open();
 		for (mode = 0; mode < gop->Mode->MaxMode; mode++) {
 			status = gop->QueryMode(gop, mode, &infosz, &info);
 			if (EFI_ERROR(status))
 				continue;
 			efifb_from_gop(&efifb, gop->Mode, info);
 			print_efifb(mode, &efifb, 0);
 			if (pager_output("\n"))
 				break;
 		}
 		pager_close();
 	}
 	return (CMD_OK);
 
  usage:
 	sprintf(command_errbuf, "usage: %s [list | get | set <mode>]",
 	    argv[0]);
 	return (CMD_ERROR);
 }
 
 COMMAND_SET(uga, "uga", "universal graphics adapter", command_uga);
 
 static int
 command_uga(int argc, char *argv[])
 {
 	struct efi_fb efifb;
 	EFI_UGA_DRAW_PROTOCOL *uga;
 	EFI_STATUS status;
 
 	status = BS->LocateProtocol(&uga_guid, NULL, (VOID **)&uga);
 	if (EFI_ERROR(status)) {
 		sprintf(command_errbuf, "%s: UGA Protocol not present "
-		    "(error=%lu)", argv[0], status & ~EFI_ERROR_MASK);
+		    "(error=%lu)", argv[0], EFI_ERROR_CODE(status));
 		return (CMD_ERROR);
 	}
 
 	if (argc != 1)
 		goto usage;
 
 	if (efifb_from_uga(&efifb, uga) != CMD_OK) {
 		sprintf(command_errbuf, "%s: Unable to get UGA information",
 		    argv[0]);
 		return (CMD_ERROR);
 	}
 
 	print_efifb(-1, &efifb, 1);
 	printf("\n");
 	return (CMD_OK);
 
  usage:
 	sprintf(command_errbuf, "usage: %s", argv[0]);
 	return (CMD_ERROR);
 }
Index: projects/release-pkg/sys/boot/efi/loader/bootinfo.c
===================================================================
--- projects/release-pkg/sys/boot/efi/loader/bootinfo.c	(revision 293335)
+++ projects/release-pkg/sys/boot/efi/loader/bootinfo.c	(revision 293336)
@@ -1,462 +1,461 @@
 /*-
  * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
  * Copyright (c) 2004, 2006 Marcel Moolenaar
  * Copyright (c) 2014 The FreeBSD Foundation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <stand.h>
 #include <string.h>
 #include <sys/param.h>
 #include <sys/reboot.h>
 #include <sys/linker.h>
 #include <sys/boot.h>
 #include <machine/cpufunc.h>
 #include <machine/elf.h>
 #include <machine/metadata.h>
 #include <machine/psl.h>
 
 #include <efi.h>
 #include <efilib.h>
 
 #include "bootstrap.h"
 #include "loader_efi.h"
 
 #if defined(__amd64__)
 #include <machine/specialreg.h>
 #include "framebuffer.h"
 #endif
 
 #if defined(LOADER_FDT_SUPPORT)
 #include <fdt_platform.h>
 #endif
 
 extern EFI_SYSTEM_TABLE	*ST;
 
 static const char howto_switches[] = "aCdrgDmphsv";
 static int howto_masks[] = {
 	RB_ASKNAME, RB_CDROM, RB_KDB, RB_DFLTROOT, RB_GDB, RB_MULTIPLE,
 	RB_MUTE, RB_PAUSE, RB_SERIAL, RB_SINGLE, RB_VERBOSE
 };
 
 static int
 bi_getboothowto(char *kargs)
 {
 	const char *sw;
 	char *opts;
 	char *console;
 	int howto, i;
 
 	howto = 0;
 
 	/* Get the boot options from the environment first. */
 	for (i = 0; howto_names[i].ev != NULL; i++) {
 		if (getenv(howto_names[i].ev) != NULL)
 			howto |= howto_names[i].mask;
 	}
 
 	console = getenv("console");
 	if (console != NULL) {
 		if (strcmp(console, "comconsole") == 0)
 			howto |= RB_SERIAL;
 		if (strcmp(console, "nullconsole") == 0)
 			howto |= RB_MUTE;
 	}
 
 	/* Parse kargs */
 	if (kargs == NULL)
 		return (howto);
 
 	opts = strchr(kargs, '-');
 	while (opts != NULL) {
 		while (*(++opts) != '\0') {
 			sw = strchr(howto_switches, *opts);
 			if (sw == NULL)
 				break;
 			howto |= howto_masks[sw - howto_switches];
 		}
 		opts = strchr(opts, '-');
 	}
 
 	return (howto);
 }
 
 /*
  * Copy the environment into the load area starting at (addr).
  * Each variable is formatted as <name>=<value>, with a single nul
  * separating each variable, and a double nul terminating the environment.
  */
 static vm_offset_t
 bi_copyenv(vm_offset_t start)
 {
 	struct env_var *ep;
 	vm_offset_t addr, last;
 	size_t len;
 
 	addr = last = start;
 
 	/* Traverse the environment. */
 	for (ep = environ; ep != NULL; ep = ep->ev_next) {
 		len = strlen(ep->ev_name);
 		if (archsw.arch_copyin(ep->ev_name, addr, len) != len)
 			break;
 		addr += len;
 		if (archsw.arch_copyin("=", addr, 1) != 1)
 			break;
 		addr++;
 		if (ep->ev_value != NULL) {
 			len = strlen(ep->ev_value);
 			if (archsw.arch_copyin(ep->ev_value, addr, len) != len)
 				break;
 			addr += len;
 		}
 		if (archsw.arch_copyin("", addr, 1) != 1)
 			break;
 		last = ++addr;
 	}
 
 	if (archsw.arch_copyin("", last++, 1) != 1)
 		last = start;
 	return(last);
 }
 
 /*
  * Copy module-related data into the load area, where it can be
  * used as a directory for loaded modules.
  *
  * Module data is presented in a self-describing format.  Each datum
  * is preceded by a 32-bit identifier and a 32-bit size field.
  *
  * Currently, the following data are saved:
  *
  * MOD_NAME	(variable)		module name (string)
  * MOD_TYPE	(variable)		module type (string)
  * MOD_ARGS	(variable)		module parameters (string)
  * MOD_ADDR	sizeof(vm_offset_t)	module load address
  * MOD_SIZE	sizeof(size_t)		module size
  * MOD_METADATA	(variable)		type-specific metadata
  */
 #define	COPY32(v, a, c) {					\
 	uint32_t x = (v);					\
 	if (c)							\
 		archsw.arch_copyin(&x, a, sizeof(x));		\
 	a += sizeof(x);						\
 }
 
 #define	MOD_STR(t, a, s, c) {					\
 	COPY32(t, a, c);					\
 	COPY32(strlen(s) + 1, a, c);				\
 	if (c)							\
 		archsw.arch_copyin(s, a, strlen(s) + 1);	\
 	a += roundup(strlen(s) + 1, sizeof(u_long));		\
 }
 
 #define	MOD_NAME(a, s, c)	MOD_STR(MODINFO_NAME, a, s, c)
 #define	MOD_TYPE(a, s, c)	MOD_STR(MODINFO_TYPE, a, s, c)
 #define	MOD_ARGS(a, s, c)	MOD_STR(MODINFO_ARGS, a, s, c)
 
 #define	MOD_VAR(t, a, s, c) {					\
 	COPY32(t, a, c);					\
 	COPY32(sizeof(s), a, c);				\
 	if (c)							\
 		archsw.arch_copyin(&s, a, sizeof(s));		\
 	a += roundup(sizeof(s), sizeof(u_long));		\
 }
 
 #define	MOD_ADDR(a, s, c)	MOD_VAR(MODINFO_ADDR, a, s, c)
 #define	MOD_SIZE(a, s, c)	MOD_VAR(MODINFO_SIZE, a, s, c)
 
 #define	MOD_METADATA(a, mm, c) {				\
 	COPY32(MODINFO_METADATA | mm->md_type, a, c);		\
 	COPY32(mm->md_size, a, c);				\
 	if (c)							\
 		archsw.arch_copyin(mm->md_data, a, mm->md_size);	\
 	a += roundup(mm->md_size, sizeof(u_long));		\
 }
 
 #define	MOD_END(a, c) {						\
 	COPY32(MODINFO_END, a, c);				\
 	COPY32(0, a, c);					\
 }
 
 static vm_offset_t
 bi_copymodules(vm_offset_t addr)
 {
 	struct preloaded_file *fp;
 	struct file_metadata *md;
 	int c;
 	uint64_t v;
 
 	c = addr != 0;
 	/* Start with the first module on the list, should be the kernel. */
 	for (fp = file_findfile(NULL, NULL); fp != NULL; fp = fp->f_next) {
 		MOD_NAME(addr, fp->f_name, c); /* This must come first. */
 		MOD_TYPE(addr, fp->f_type, c);
 		if (fp->f_args)
 			MOD_ARGS(addr, fp->f_args, c);
 		v = fp->f_addr;
 #if defined(__arm__)
 		v -= __elfN(relocation_offset);
 #endif
 		MOD_ADDR(addr, v, c);
 		v = fp->f_size;
 		MOD_SIZE(addr, v, c);
 		for (md = fp->f_metadata; md != NULL; md = md->md_next)
 			if (!(md->md_type & MODINFOMD_NOCOPY))
 				MOD_METADATA(addr, md, c);
 	}
 	MOD_END(addr, c);
 	return(addr);
 }
 
 static int
 bi_load_efi_data(struct preloaded_file *kfp)
 {
 	EFI_MEMORY_DESCRIPTOR *mm;
 	EFI_PHYSICAL_ADDRESS addr;
 	EFI_STATUS status;
 	size_t efisz;
 	UINTN efi_mapkey;
 	UINTN mmsz, pages, retry, sz;
 	UINT32 mmver;
 	struct efi_map_header *efihdr;
 
 #if defined(__amd64__)
 	struct efi_fb efifb;
 
 	if (efi_find_framebuffer(&efifb) == 0) {
 		printf("EFI framebuffer information:\n");
 		printf("addr, size     0x%lx, 0x%lx\n", efifb.fb_addr,
 		    efifb.fb_size);
 		printf("dimensions     %d x %d\n", efifb.fb_width,
 		    efifb.fb_height);
 		printf("stride         %d\n", efifb.fb_stride);
 		printf("masks          0x%08x, 0x%08x, 0x%08x, 0x%08x\n",
 		    efifb.fb_mask_red, efifb.fb_mask_green, efifb.fb_mask_blue,
 		    efifb.fb_mask_reserved);
 
 		file_addmetadata(kfp, MODINFOMD_EFI_FB, sizeof(efifb), &efifb);
 	}
 #endif
 
 	efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf;
 
 	/*
 	 * It is possible that the first call to ExitBootServices may change
 	 * the map key. Fetch a new map key and retry ExitBootServices in that
 	 * case.
 	 */
 	for (retry = 2; retry > 0; retry--) {
 		/*
 		 * Allocate enough pages to hold the bootinfo block and the
 		 * memory map EFI will return to us. The memory map has an
 		 * unknown size, so we have to determine that first. Note that
 		 * the AllocatePages call can itself modify the memory map, so
 		 * we have to take that into account as well. The changes to
 		 * the memory map are caused by splitting a range of free
 		 * memory into two (AFAICT), so that one is marked as being
 		 * loader data.
 		 */
 		sz = 0;
 		BS->GetMemoryMap(&sz, NULL, &efi_mapkey, &mmsz, &mmver);
 		sz += mmsz;
 		sz = (sz + 0xf) & ~0xf;
 		pages = EFI_SIZE_TO_PAGES(sz + efisz);
 		status = BS->AllocatePages(AllocateAnyPages, EfiLoaderData,
 		     pages, &addr);
 		if (EFI_ERROR(status)) {
 			printf("%s: AllocatePages error %lu\n", __func__,
-			    (unsigned long)(status & ~EFI_ERROR_MASK));
+			    EFI_ERROR_CODE(status));
 			return (ENOMEM);
 		}
 
 		/*
 		 * Read the memory map and stash it after bootinfo. Align the
 		 * memory map on a 16-byte boundary (the bootinfo block is page
 		 * aligned).
 		 */
 		efihdr = (struct efi_map_header *)addr;
 		mm = (void *)((uint8_t *)efihdr + efisz);
 		sz = (EFI_PAGE_SIZE * pages) - efisz;
 
 		status = BS->GetMemoryMap(&sz, mm, &efi_mapkey, &mmsz, &mmver);
 		if (EFI_ERROR(status)) {
 			printf("%s: GetMemoryMap error %lu\n", __func__,
-			    (unsigned long)(status & ~EFI_ERROR_MASK));
+			    EFI_ERROR_CODE(status));
 			return (EINVAL);
 		}
 		status = BS->ExitBootServices(IH, efi_mapkey);
 		if (EFI_ERROR(status) == 0) {
 			efihdr->memory_size = sz;
 			efihdr->descriptor_size = mmsz;
 			efihdr->descriptor_version = mmver;
 			file_addmetadata(kfp, MODINFOMD_EFI_MAP, efisz + sz,
 			    efihdr);
 			return (0);
 		}
 		BS->FreePages(addr, pages);
 	}
-	printf("ExitBootServices error %lu\n",
-	    (unsigned long)(status & ~EFI_ERROR_MASK));
+	printf("ExitBootServices error %lu\n", EFI_ERROR_CODE(status));
 	return (EINVAL);
 }
 
 /*
  * Load the information expected by an amd64 kernel.
  *
  * - The 'boothowto' argument is constructed.
  * - The 'bootdev' argument is constructed.
  * - The 'bootinfo' struct is constructed, and copied into the kernel space.
  * - The kernel environment is copied into kernel space.
  * - Module metadata are formatted and placed in kernel space.
  */
 int
 bi_load(char *args, vm_offset_t *modulep, vm_offset_t *kernendp)
 {
 	struct preloaded_file *xp, *kfp;
 	struct devdesc *rootdev;
 	struct file_metadata *md;
 	vm_offset_t addr;
 	uint64_t kernend;
 	uint64_t envp;
 	vm_offset_t size;
 	char *rootdevname;
 	int howto;
 #if defined(LOADER_FDT_SUPPORT)
 	vm_offset_t dtbp;
 	int dtb_size;
 #endif
 #if defined(__arm__)
 	vm_offset_t vaddr;
 	int i;
 	/*
 	 * These metadata addreses must be converted for kernel after
 	 * relocation.
 	 */
 	uint32_t		mdt[] = {
 	    MODINFOMD_SSYM, MODINFOMD_ESYM, MODINFOMD_KERNEND,
 	    MODINFOMD_ENVP,
 #if defined(LOADER_FDT_SUPPORT)
 	    MODINFOMD_DTBP
 #endif
 	};
 #endif
 
 	howto = bi_getboothowto(args);
 
 	/*
 	 * Allow the environment variable 'rootdev' to override the supplied
 	 * device. This should perhaps go to MI code and/or have $rootdev
 	 * tested/set by MI code before launching the kernel.
 	 */
 	rootdevname = getenv("rootdev");
 	archsw.arch_getdev((void**)(&rootdev), rootdevname, NULL);
 	if (rootdev == NULL) {
 		printf("Can't determine root device.\n");
 		return(EINVAL);
 	}
 
 	/* Try reading the /etc/fstab file to select the root device */
 	getrootmount(efi_fmtdev((void *)rootdev));
 
 	addr = 0;
 	for (xp = file_findfile(NULL, NULL); xp != NULL; xp = xp->f_next) {
 		if (addr < (xp->f_addr + xp->f_size))
 			addr = xp->f_addr + xp->f_size;
 	}
 
 	/* Pad to a page boundary. */
 	addr = roundup(addr, PAGE_SIZE);
 
 	/* Copy our environment. */
 	envp = addr;
 	addr = bi_copyenv(addr);
 
 	/* Pad to a page boundary. */
 	addr = roundup(addr, PAGE_SIZE);
 
 #if defined(LOADER_FDT_SUPPORT)
 	/* Handle device tree blob */
 	dtbp = addr;
 	dtb_size = fdt_copy(addr);
 		
 	/* Pad to a page boundary */
 	if (dtb_size)
 		addr += roundup(dtb_size, PAGE_SIZE);
 #endif
 
 	kfp = file_findfile(NULL, "elf kernel");
 	if (kfp == NULL)
 		kfp = file_findfile(NULL, "elf64 kernel");
 	if (kfp == NULL)
 		panic("can't find kernel file");
 	kernend = 0;	/* fill it in later */
 	file_addmetadata(kfp, MODINFOMD_HOWTO, sizeof howto, &howto);
 	file_addmetadata(kfp, MODINFOMD_ENVP, sizeof envp, &envp);
 #if defined(LOADER_FDT_SUPPORT)
 	if (dtb_size)
 		file_addmetadata(kfp, MODINFOMD_DTBP, sizeof dtbp, &dtbp);
 	else
 		pager_output("WARNING! Trying to fire up the kernel, but no "
 		    "device tree blob found!\n");
 #endif
 	file_addmetadata(kfp, MODINFOMD_KERNEND, sizeof kernend, &kernend);
 	file_addmetadata(kfp, MODINFOMD_FW_HANDLE, sizeof ST, &ST);
 
 	bi_load_efi_data(kfp);
 
 	/* Figure out the size and location of the metadata. */
 	*modulep = addr;
 	size = bi_copymodules(0);
 	kernend = roundup(addr + size, PAGE_SIZE);
 	*kernendp = kernend;
 
 	/* patch MODINFOMD_KERNEND */
 	md = file_findmetadata(kfp, MODINFOMD_KERNEND);
 	bcopy(&kernend, md->md_data, sizeof kernend);
 
 #if defined(__arm__)
 	*modulep -= __elfN(relocation_offset);
 
 	/* Do relocation fixup on metadata of each module. */
 	for (xp = file_findfile(NULL, NULL); xp != NULL; xp = xp->f_next) {
 		for (i = 0; i < sizeof mdt / sizeof mdt[0]; i++) {
 			md = file_findmetadata(xp, mdt[i]);
 			if (md) {
 				bcopy(md->md_data, &vaddr, sizeof vaddr);
 				vaddr -= __elfN(relocation_offset);
 				bcopy(&vaddr, md->md_data, sizeof vaddr);
 			}
 		}
 	}
 #endif
 
 	/* Copy module list and metadata. */
 	(void)bi_copymodules(addr);
 
 	return (0);
 }
Index: projects/release-pkg/sys/boot/efi/loader/copy.c
===================================================================
--- projects/release-pkg/sys/boot/efi/loader/copy.c	(revision 293335)
+++ projects/release-pkg/sys/boot/efi/loader/copy.c	(revision 293336)
@@ -1,139 +1,139 @@
 /*-
  * Copyright (c) 2013 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Benno Rice under sponsorship from
  * the FreeBSD Foundation.
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 
 #include <stand.h>
 #include <bootstrap.h>
 
 #include <efi.h>
 #include <efilib.h>
 
 #ifndef EFI_STAGING_SIZE
 #define	EFI_STAGING_SIZE	48
 #endif
 
 #define	STAGE_PAGES	EFI_SIZE_TO_PAGES((EFI_STAGING_SIZE) * 1024 * 1024)
 
 EFI_PHYSICAL_ADDRESS	staging, staging_end;
 int			stage_offset_set = 0;
 ssize_t			stage_offset;
 
 int
 efi_copy_init(void)
 {
 	EFI_STATUS	status;
 
 	status = BS->AllocatePages(AllocateAnyPages, EfiLoaderData,
 	    STAGE_PAGES, &staging);
 	if (EFI_ERROR(status)) {
 		printf("failed to allocate staging area: %lu\n",
-		    (unsigned long)(status & EFI_ERROR_MASK));
+		    EFI_ERROR_CODE(status));
 		return (status);
 	}
 	staging_end = staging + STAGE_PAGES * EFI_PAGE_SIZE;
 
 #if defined(__aarch64__) || defined(__arm__)
 	/*
 	 * Round the kernel load address to a 2MiB value. This is needed
 	 * because the kernel builds a page table based on where it has
 	 * been loaded in physical address space. As the kernel will use
 	 * either a 1MiB or 2MiB page for this we need to make sure it
 	 * is correctly aligned for both cases.
 	 */
 	staging = roundup2(staging, 2 * 1024 * 1024);
 #endif
 
 	return (0);
 }
 
 void *
 efi_translate(vm_offset_t ptr)
 {
 
 	return ((void *)(ptr + stage_offset));
 }
 
 ssize_t
 efi_copyin(const void *src, vm_offset_t dest, const size_t len)
 {
 
 	if (!stage_offset_set) {
 		stage_offset = (vm_offset_t)staging - dest;
 		stage_offset_set = 1;
 	}
 
 	/* XXX: Callers do not check for failure. */
 	if (dest + stage_offset + len > staging_end) {
 		errno = ENOMEM;
 		return (-1);
 	}
 	bcopy(src, (void *)(dest + stage_offset), len);
 	return (len);
 }
 
 ssize_t
 efi_copyout(const vm_offset_t src, void *dest, const size_t len)
 {
 
 	/* XXX: Callers do not check for failure. */
 	if (src + stage_offset + len > staging_end) {
 		errno = ENOMEM;
 		return (-1);
 	}
 	bcopy((void *)(src + stage_offset), dest, len);
 	return (len);
 }
 
 
 ssize_t
 efi_readin(const int fd, vm_offset_t dest, const size_t len)
 {
 
 	if (dest + stage_offset + len > staging_end) {
 		errno = ENOMEM;
 		return (-1);
 	}
 	return (read(fd, (void *)(dest + stage_offset), len));
 }
 
 void
 efi_copy_finish(void)
 {
 	uint64_t	*src, *dst, *last;
 
 	src = (uint64_t *)staging;
 	dst = (uint64_t *)(staging - stage_offset);
 	last = (uint64_t *)staging_end;
 
 	while (src < last)
 		*dst++ = *src++;
 }
Index: projects/release-pkg/sys/boot/efi/loader/devicename.c
===================================================================
--- projects/release-pkg/sys/boot/efi/loader/devicename.c	(revision 293335)
+++ projects/release-pkg/sys/boot/efi/loader/devicename.c	(revision 293336)
@@ -1,169 +1,169 @@
 /*-
  * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
  * Copyright (c) 2006 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <stand.h>
 #include <string.h>
 #include <sys/disklabel.h>
 #include "bootstrap.h"
 
 #include <efi.h>
 #include <efilib.h>
 
 static int efi_parsedev(struct devdesc **, const char *, const char **);
 
 /*
  * Point (dev) at an allocated device specifier for the device matching the
  * path in (devspec). If it contains an explicit device specification,
  * use that.  If not, use the default device.
  */
 int
 efi_getdev(void **vdev, const char *devspec, const char **path)
 {
 	struct devdesc **dev = (struct devdesc **)vdev;
 	int rv;
 
 	/*
 	 * If it looks like this is just a path and no device, then
 	 * use the current device instead.
 	 */
 	if (devspec == NULL || *devspec == '/' || !strchr(devspec, ':')) {
 		rv = efi_parsedev(dev, getenv("currdev"), NULL);
 		if (rv == 0 && path != NULL)
 			*path = devspec;
 		return (rv);
 	}
 
 	/* Parse the device name off the beginning of the devspec. */
 	return (efi_parsedev(dev, devspec, path));
 }
 
 /*
  * Point (dev) at an allocated device specifier matching the string version
  * at the beginning of (devspec).  Return a pointer to the remaining
  * text in (path).
  *
  * In all cases, the beginning of (devspec) is compared to the names
  * of known devices in the device switch, and then any following text
  * is parsed according to the rules applied to the device type.
  *
  * For disk-type devices, the syntax is:
  *
  * fs<unit>:
  */
 static int
 efi_parsedev(struct devdesc **dev, const char *devspec, const char **path)
 {
 	struct devdesc *idev;
 	struct devsw *dv;
 	char *cp;
 	const char *np;
 	int i, err;
 
 	/* minimum length check */
 	if (strlen(devspec) < 2)
 		return (EINVAL);
 
 	/* look for a device that matches */
 	for (i = 0; devsw[i] != NULL; i++) {
 		dv = devsw[i];
 		if (!strncmp(devspec, dv->dv_name, strlen(dv->dv_name)))
 			break;
 	}
 	if (devsw[i] == NULL)
 		return (ENOENT);
 
 	idev = malloc(sizeof(struct devdesc));
 	if (idev == NULL)
 		return (ENOMEM);
 
 	idev->d_dev = dv;
 	idev->d_type = dv->dv_type;
 	idev->d_unit = -1;
 
 	err = 0;
 	np = devspec + strlen(dv->dv_name);
 	if (*np != '\0' && *np != ':') {
 		idev->d_unit = strtol(np, &cp, 0);
 		if (cp == np) {
 			idev->d_unit = -1;
 			free(idev);
 			return (EUNIT);
 		}
 	}
 	if (*cp != '\0' && *cp != ':') {
 		free(idev);
 		return (EINVAL);
 	}
 
 	if (path != NULL)
 		*path = (*cp == 0) ? cp : cp + 1;
 	if (dev != NULL)
 		*dev = idev;
 	else
 		free(idev);
 	return (0);
 }
 
 char *
 efi_fmtdev(void *vdev)
 {
 	struct devdesc *dev = (struct devdesc *)vdev;
 	static char buf[32];	/* XXX device length constant? */
 
 	switch(dev->d_type) {
 	case DEVT_NONE:
 		strcpy(buf, "(no device)");
 		break;
 
 	default:
 		sprintf(buf, "%s%d:", dev->d_dev->dv_name, dev->d_unit);
 		break;
 	}
 
-	return(buf);
+	return (buf);
 }
 
 /*
  * Set currdev to suit the value being supplied in (value)
  */
 int
 efi_setcurrdev(struct env_var *ev, int flags, const void *value)
 {
 	struct devdesc *ncurr;
 	int rv;
 
 	rv = efi_parsedev(&ncurr, value, NULL);
 	if (rv != 0)
-		return(rv);
+		return (rv);
 
 	free(ncurr);
 	env_setenv(ev->ev_name, flags | EV_NOHOOK, value, NULL, NULL);
 	return (0);
 }
Index: projects/release-pkg/sys/boot/efi/loader/main.c
===================================================================
--- projects/release-pkg/sys/boot/efi/loader/main.c	(revision 293335)
+++ projects/release-pkg/sys/boot/efi/loader/main.c	(revision 293336)
@@ -1,446 +1,441 @@
 /*-
  * Copyright (c) 2008-2010 Rui Paulo
  * Copyright (c) 2006 Marcel Moolenaar
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <stand.h>
 #include <string.h>
 #include <setjmp.h>
 
 #include <efi.h>
 #include <efilib.h>
 
 #include <bootstrap.h>
 #include <smbios.h>
 
 #include "loader_efi.h"
 
 extern char bootprog_name[];
 extern char bootprog_rev[];
 extern char bootprog_date[];
 extern char bootprog_maker[];
 
 struct devdesc currdev;		/* our current device */
 struct arch_switch archsw;	/* MI/MD interface boundary */
 
 EFI_GUID acpi = ACPI_TABLE_GUID;
 EFI_GUID acpi20 = ACPI_20_TABLE_GUID;
 EFI_GUID devid = DEVICE_PATH_PROTOCOL;
 EFI_GUID imgid = LOADED_IMAGE_PROTOCOL;
 EFI_GUID mps = MPS_TABLE_GUID;
 EFI_GUID netid = EFI_SIMPLE_NETWORK_PROTOCOL;
 EFI_GUID smbios = SMBIOS_TABLE_GUID;
 EFI_GUID dxe = DXE_SERVICES_TABLE_GUID;
 EFI_GUID hoblist = HOB_LIST_TABLE_GUID;
 EFI_GUID memtype = MEMORY_TYPE_INFORMATION_TABLE_GUID;
 EFI_GUID debugimg = DEBUG_IMAGE_INFO_TABLE_GUID;
 EFI_GUID fdtdtb = FDT_TABLE_GUID;
 
 EFI_STATUS
 main(int argc, CHAR16 *argv[])
 {
 	char var[128];
 	EFI_LOADED_IMAGE *img;
 	EFI_GUID *guid;
 	int i, j, vargood;
 
 	/*
 	 * XXX Chicken-and-egg problem; we want to have console output
 	 * early, but some console attributes may depend on reading from
 	 * eg. the boot device, which we can't do yet.  We can use
 	 * printf() etc. once this is done.
 	 */
 	cons_probe();
 
 	/*
 	 * Loop through the args, and for each one that contains an '=' that is
 	 * not the first character, add it to the environment.  This allows
 	 * loader and kernel env vars to be passed on the command line.  Convert
 	 * args from UCS-2 to ASCII (16 to 8 bit) as they are copied.
 	 */
 	for (i = 1; i < argc; i++) {
 		vargood = 0;
 		for (j = 0; argv[i][j] != 0; j++) {
 			if (j == sizeof(var)) {
 				vargood = 0;
 				break;
 			}
 			if (j > 0 && argv[i][j] == '=')
 				vargood = 1;
 			var[j] = (char)argv[i][j];
 		}
 		if (vargood) {
 			var[j] = 0;
 			putenv(var);
 		}
 	}
 
 	if (efi_copy_init()) {
 		printf("failed to allocate staging area\n");
 		return (EFI_BUFFER_TOO_SMALL);
 	}
 
 	/*
 	 * March through the device switch probing for things.
 	 */
 	for (i = 0; devsw[i] != NULL; i++)
 		if (devsw[i]->dv_init != NULL)
 			(devsw[i]->dv_init)();
 
 	/* Get our loaded image protocol interface structure. */
 	BS->HandleProtocol(IH, &imgid, (VOID**)&img);
 
 	printf("Image base: 0x%lx\n", (u_long)img->ImageBase);
 	printf("EFI version: %d.%02d\n", ST->Hdr.Revision >> 16,
 	    ST->Hdr.Revision & 0xffff);
 	printf("EFI Firmware: ");
 	/* printf doesn't understand EFI Unicode */
 	ST->ConOut->OutputString(ST->ConOut, ST->FirmwareVendor);
 	printf(" (rev %d.%02d)\n", ST->FirmwareRevision >> 16,
 	    ST->FirmwareRevision & 0xffff);
 
 	printf("\n");
 	printf("%s, Revision %s\n", bootprog_name, bootprog_rev);
 	printf("(%s, %s)\n", bootprog_maker, bootprog_date);
 
 	efi_handle_lookup(img->DeviceHandle, &currdev.d_dev, &currdev.d_unit);
 	currdev.d_type = currdev.d_dev->dv_type;
 
 	/*
 	 * Disable the watchdog timer. By default the boot manager sets
 	 * the timer to 5 minutes before invoking a boot option. If we
 	 * want to return to the boot manager, we have to disable the
 	 * watchdog timer and since we're an interactive program, we don't
 	 * want to wait until the user types "quit". The timer may have
 	 * fired by then. We don't care if this fails. It does not prevent
 	 * normal functioning in any way...
 	 */
 	BS->SetWatchdogTimer(0, 0, 0, NULL);
 
 	env_setenv("currdev", EV_VOLATILE, efi_fmtdev(&currdev),
 	    efi_setcurrdev, env_nounset);
 	env_setenv("loaddev", EV_VOLATILE, efi_fmtdev(&currdev), env_noset,
 	    env_nounset);
 
 	setenv("LINES", "24", 1);	/* optional */
 
 	archsw.arch_autoload = efi_autoload;
 	archsw.arch_getdev = efi_getdev;
 	archsw.arch_copyin = efi_copyin;
 	archsw.arch_copyout = efi_copyout;
 	archsw.arch_readin = efi_readin;
 
 	for (i = 0; i < ST->NumberOfTableEntries; i++) {
 		guid = &ST->ConfigurationTable[i].VendorGuid;
 		if (!memcmp(guid, &smbios, sizeof(EFI_GUID))) {
 			smbios_detect(ST->ConfigurationTable[i].VendorTable);
 			break;
 		}
 	}
 
 	interact(NULL);			/* doesn't return */
 
 	return (EFI_SUCCESS);		/* keep compiler happy */
 }
 
 COMMAND_SET(reboot, "reboot", "reboot the system", command_reboot);
 
 static int
 command_reboot(int argc, char *argv[])
 {
 	int i;
 
 	for (i = 0; devsw[i] != NULL; ++i)
 		if (devsw[i]->dv_cleanup != NULL)
 			(devsw[i]->dv_cleanup)();
 
 	RS->ResetSystem(EfiResetCold, EFI_SUCCESS, 23,
 	    (CHAR16 *)"Reboot from the loader");
 
 	/* NOTREACHED */
 	return (CMD_ERROR);
 }
 
 COMMAND_SET(quit, "quit", "exit the loader", command_quit);
 
 static int
 command_quit(int argc, char *argv[])
 {
 	exit(0);
 	return (CMD_OK);
 }
 
 COMMAND_SET(memmap, "memmap", "print memory map", command_memmap);
 
 static int
 command_memmap(int argc, char *argv[])
 {
 	UINTN sz;
 	EFI_MEMORY_DESCRIPTOR *map, *p;
 	UINTN key, dsz;
 	UINT32 dver;
 	EFI_STATUS status;
 	int i, ndesc;
 	static char *types[] = {
 	    "Reserved",
 	    "LoaderCode",
 	    "LoaderData",
 	    "BootServicesCode",
 	    "BootServicesData",
 	    "RuntimeServicesCode",
 	    "RuntimeServicesData",
 	    "ConventionalMemory",
 	    "UnusableMemory",
 	    "ACPIReclaimMemory",
 	    "ACPIMemoryNVS",
 	    "MemoryMappedIO",
 	    "MemoryMappedIOPortSpace",
 	    "PalCode"
 	};
 
 	sz = 0;
 	status = BS->GetMemoryMap(&sz, 0, &key, &dsz, &dver);
 	if (status != EFI_BUFFER_TOO_SMALL) {
 		printf("Can't determine memory map size\n");
-		return CMD_ERROR;
+		return (CMD_ERROR);
 	}
 	map = malloc(sz);
 	status = BS->GetMemoryMap(&sz, map, &key, &dsz, &dver);
 	if (EFI_ERROR(status)) {
 		printf("Can't read memory map\n");
-		return CMD_ERROR;
+		return (CMD_ERROR);
 	}
 
 	ndesc = sz / dsz;
 	printf("%23s %12s %12s %8s %4s\n",
-	       "Type", "Physical", "Virtual", "#Pages", "Attr");
+	    "Type", "Physical", "Virtual", "#Pages", "Attr");
 
 	for (i = 0, p = map; i < ndesc;
 	     i++, p = NextMemoryDescriptor(p, dsz)) {
-	    printf("%23s %012lx %012lx %08lx ",
-		   types[p->Type],
-		   p->PhysicalStart,
-		   p->VirtualStart,
-		   p->NumberOfPages);
-	    if (p->Attribute & EFI_MEMORY_UC)
-		printf("UC ");
-	    if (p->Attribute & EFI_MEMORY_WC)
-		printf("WC ");
-	    if (p->Attribute & EFI_MEMORY_WT)
-		printf("WT ");
-	    if (p->Attribute & EFI_MEMORY_WB)
-		printf("WB ");
-	    if (p->Attribute & EFI_MEMORY_UCE)
-		printf("UCE ");
-	    if (p->Attribute & EFI_MEMORY_WP)
-		printf("WP ");
-	    if (p->Attribute & EFI_MEMORY_RP)
-		printf("RP ");
-	    if (p->Attribute & EFI_MEMORY_XP)
-		printf("XP ");
-	    printf("\n");
+		printf("%23s %012lx %012lx %08lx ", types[p->Type],
+		   p->PhysicalStart, p->VirtualStart, p->NumberOfPages);
+		if (p->Attribute & EFI_MEMORY_UC)
+			printf("UC ");
+		if (p->Attribute & EFI_MEMORY_WC)
+			printf("WC ");
+		if (p->Attribute & EFI_MEMORY_WT)
+			printf("WT ");
+		if (p->Attribute & EFI_MEMORY_WB)
+			printf("WB ");
+		if (p->Attribute & EFI_MEMORY_UCE)
+			printf("UCE ");
+		if (p->Attribute & EFI_MEMORY_WP)
+			printf("WP ");
+		if (p->Attribute & EFI_MEMORY_RP)
+			printf("RP ");
+		if (p->Attribute & EFI_MEMORY_XP)
+			printf("XP ");
+		printf("\n");
 	}
 
-	return CMD_OK;
+	return (CMD_OK);
 }
 
-COMMAND_SET(configuration, "configuration",
-	    "print configuration tables", command_configuration);
+COMMAND_SET(configuration, "configuration", "print configuration tables",
+    command_configuration);
 
 static const char *
 guid_to_string(EFI_GUID *guid)
 {
 	static char buf[40];
 
 	sprintf(buf, "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
 	    guid->Data1, guid->Data2, guid->Data3, guid->Data4[0],
 	    guid->Data4[1], guid->Data4[2], guid->Data4[3], guid->Data4[4],
 	    guid->Data4[5], guid->Data4[6], guid->Data4[7]);
 	return (buf);
 }
 
 static int
 command_configuration(int argc, char *argv[])
 {
 	int i;
 
 	printf("NumberOfTableEntries=%ld\n", ST->NumberOfTableEntries);
 	for (i = 0; i < ST->NumberOfTableEntries; i++) {
 		EFI_GUID *guid;
 
 		printf("  ");
 		guid = &ST->ConfigurationTable[i].VendorGuid;
 		if (!memcmp(guid, &mps, sizeof(EFI_GUID)))
 			printf("MPS Table");
 		else if (!memcmp(guid, &acpi, sizeof(EFI_GUID)))
 			printf("ACPI Table");
 		else if (!memcmp(guid, &acpi20, sizeof(EFI_GUID)))
 			printf("ACPI 2.0 Table");
 		else if (!memcmp(guid, &smbios, sizeof(EFI_GUID)))
 			printf("SMBIOS Table");
 		else if (!memcmp(guid, &dxe, sizeof(EFI_GUID)))
 			printf("DXE Table");
 		else if (!memcmp(guid, &hoblist, sizeof(EFI_GUID)))
 			printf("HOB List Table");
 		else if (!memcmp(guid, &memtype, sizeof(EFI_GUID)))
 			printf("Memory Type Information Table");
 		else if (!memcmp(guid, &debugimg, sizeof(EFI_GUID)))
 			printf("Debug Image Info Table");
 		else if (!memcmp(guid, &fdtdtb, sizeof(EFI_GUID)))
 			printf("FDT Table");
 		else
 			printf("Unknown Table (%s)", guid_to_string(guid));
 		printf(" at %p\n", ST->ConfigurationTable[i].VendorTable);
 	}
 
-	return CMD_OK;
+	return (CMD_OK);
 }
 
 
 COMMAND_SET(mode, "mode", "change or display EFI text modes", command_mode);
 
 static int
 command_mode(int argc, char *argv[])
 {
 	UINTN cols, rows;
 	unsigned int mode;
 	int i;
 	char *cp;
 	char rowenv[8];
 	EFI_STATUS status;
 	SIMPLE_TEXT_OUTPUT_INTERFACE *conout;
+	extern void HO(void);
 
 	conout = ST->ConOut;
 
 	if (argc > 1) {
 		mode = strtol(argv[1], &cp, 0);
 		if (cp[0] != '\0') {
 			printf("Invalid mode\n");
 			return (CMD_ERROR);
 		}
 		status = conout->QueryMode(conout, mode, &cols, &rows);
 		if (EFI_ERROR(status)) {
 			printf("invalid mode %d\n", mode);
 			return (CMD_ERROR);
 		}
 		status = conout->SetMode(conout, mode);
 		if (EFI_ERROR(status)) {
 			printf("couldn't set mode %d\n", mode);
 			return (CMD_ERROR);
 		}
 		sprintf(rowenv, "%u", (unsigned)rows);
 		setenv("LINES", rowenv, 1);
-
+		HO();		/* set cursor */
 		return (CMD_OK);
 	}
 
 	printf("Current mode: %d\n", conout->Mode->Mode);
 	for (i = 0; i <= conout->Mode->MaxMode; i++) {
 		status = conout->QueryMode(conout, i, &cols, &rows);
 		if (EFI_ERROR(status))
 			continue;
 		printf("Mode %d: %u columns, %u rows\n", i, (unsigned)cols,
 		    (unsigned)rows);
 	}
 
 	if (i != 0)
 		printf("Select a mode with the command \"mode <number>\"\n");
 
 	return (CMD_OK);
 }
 
 
 COMMAND_SET(nvram, "nvram", "get or set NVRAM variables", command_nvram);
 
 static int
 command_nvram(int argc, char *argv[])
 {
 	CHAR16 var[128];
 	CHAR16 *data;
 	EFI_STATUS status;
 	EFI_GUID varguid = { 0,0,0,{0,0,0,0,0,0,0,0} };
 	UINTN varsz, datasz;
 	SIMPLE_TEXT_OUTPUT_INTERFACE *conout;
 	int i;
 
 	conout = ST->ConOut;
 
 	/* Initiate the search */
 	status = RS->GetNextVariableName(&varsz, NULL, NULL);
 
 	for (; status != EFI_NOT_FOUND; ) {
-		status = RS->GetNextVariableName(&varsz, var,
-		    &varguid);
+		status = RS->GetNextVariableName(&varsz, var, &varguid);
 		//if (EFI_ERROR(status))
 			//break;
 
 		conout->OutputString(conout, var);
 		printf("=");
 		datasz = 0;
-		status = RS->GetVariable(var, &varguid, NULL, &datasz,
-		    NULL);
+		status = RS->GetVariable(var, &varguid, NULL, &datasz, NULL);
 		/* XXX: check status */
 		data = malloc(datasz);
-		status = RS->GetVariable(var, &varguid, NULL, &datasz,
-		    data);
+		status = RS->GetVariable(var, &varguid, NULL, &datasz, data);
 		if (EFI_ERROR(status))
 			printf("<error retrieving variable>");
 		else {
 			for (i = 0; i < datasz; i++) {
 				if (isalnum(data[i]) || isspace(data[i]))
 					printf("%c", data[i]);
 				else
 					printf("\\x%02x", data[i]);
 			}
 		}
 		/* XXX */
 		pager_output("\n");
 		free(data);
 	}
 
 	return (CMD_OK);
 }
 
 #ifdef LOADER_FDT_SUPPORT
 extern int command_fdt_internal(int argc, char *argv[]);
 
 /*
  * Since proper fdt command handling function is defined in fdt_loader_cmd.c,
  * and declaring it as extern is in contradiction with COMMAND_SET() macro
  * (which uses static pointer), we're defining wrapper function, which
  * calls the proper fdt handling routine.
  */
 static int
 command_fdt(int argc, char *argv[])
 {
 
 	return (command_fdt_internal(argc, argv));
 }
 
 COMMAND_SET(fdt, "fdt", "flattened device tree handling", command_fdt);
 #endif
Index: projects/release-pkg/sys/boot/ficl/amd64/sysdep.c
===================================================================
--- projects/release-pkg/sys/boot/ficl/amd64/sysdep.c	(revision 293335)
+++ projects/release-pkg/sys/boot/ficl/amd64/sysdep.c	(revision 293336)
@@ -1,99 +1,99 @@
 /*******************************************************************
 ** s y s d e p . c
 ** Forth Inspired Command Language
 ** Author: John Sadler (john_sadler@alum.mit.edu)
 ** Created: 16 Oct 1997
 ** Implementations of FICL external interface functions... 
 **
 *******************************************************************/
 
 /* $FreeBSD$ */
 
 #ifdef TESTMAIN
 #include <stdio.h>
 #include <stdlib.h>
 #else
 #include <stand.h>
 #endif
 #include "ficl.h"
 
 /*
 *******************  FreeBSD  P O R T   B E G I N S   H E R E ******************** Michael Smith
 */
 
 #if PORTABLE_LONGMULDIV == 0
 DPUNS ficlLongMul(FICL_UNS x, FICL_UNS y)
 {
     DPUNS q;
     u_int64_t qx;
 
     qx = (u_int64_t)x * (u_int64_t) y;
 
     q.hi = (u_int32_t)( qx >> 32 );
     q.lo = (u_int32_t)( qx & 0xFFFFFFFFL);
 
     return q;
 }
 
 UNSQR ficlLongDiv(DPUNS q, FICL_UNS y)
 {
     UNSQR result;
     u_int64_t qx, qh;
 
     qh = q.hi;
     qx = (qh << 32) | q.lo;
 
     result.quot = qx / y;
     result.rem  = qx % y;
 
     return result;
 }
 #endif
 
 void  ficlTextOut(FICL_VM *pVM, char *msg, int fNewline)
 {
     IGNORE(pVM);
 
     while(*msg != 0)
-	putchar(*(msg++));
+	putchar((unsigned char)*(msg++));
     if (fNewline)
 	putchar('\n');
 
    return;
 }
 
 void *ficlMalloc (size_t size)
 {
     return malloc(size);
 }
 
 void *ficlRealloc (void *p, size_t size)
 {
     return realloc(p, size);
 }
 
 void  ficlFree   (void *p)
 {
     free(p);
 }
 
 
 /*
 ** Stub function for dictionary access control - does nothing
 ** by default, user can redefine to guarantee exclusive dict
 ** access to a single thread for updates. All dict update code
 ** is guaranteed to be bracketed as follows:
 ** ficlLockDictionary(TRUE);
 ** <code that updates dictionary>
 ** ficlLockDictionary(FALSE);
 **
 ** Returns zero if successful, nonzero if unable to acquire lock
 ** befor timeout (optional - could also block forever)
 */
 #if FICL_MULTITHREAD
 int ficlLockDictionary(short fLock)
 {
 	IGNORE(fLock);
 	return 0;
 }
 #endif /* FICL_MULTITHREAD */
Index: projects/release-pkg/sys/boot/forth/beastie.4th
===================================================================
--- projects/release-pkg/sys/boot/forth/beastie.4th	(revision 293335)
+++ projects/release-pkg/sys/boot/forth/beastie.4th	(revision 293336)
@@ -1,115 +1,110 @@
 \ Copyright (c) 2003 Scott Long <scottl@FreeBSD.org>
 \ Copyright (c) 2003 Aleksander Fafula <alex@fafula.com>
 \ Copyright (c) 2006-2015 Devin Teske <dteske@FreeBSD.org>
 \ All rights reserved.
 \ 
 \ Redistribution and use in source and binary forms, with or without
 \ modification, are permitted provided that the following conditions
 \ are met:
 \ 1. Redistributions of source code must retain the above copyright
 \    notice, this list of conditions and the following disclaimer.
 \ 2. Redistributions in binary form must reproduce the above copyright
 \    notice, this list of conditions and the following disclaimer in the
 \    documentation and/or other materials provided with the distribution.
 \ 
 \ THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 \ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 \ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 \ ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 \ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 \ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 \ OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 \ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 \ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 \ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 \ SUCH DAMAGE.
 \ 
 \ $FreeBSD$
 
 marker task-beastie.4th
 
 only forth definitions
 
 variable logoX
 variable logoY
 
 \ Initialize logo placement to defaults
 46 logoX !
 4  logoY !
 
 \ This function draws any number of beastie logos at (loader_logo_x,
 \ loader_logo_y) if defined, else (46,4) (to the right of the menu). To choose
 \ your beastie, set the variable `loader_logo' to the respective logo name.
 \ 
 \ NOTE: Each is defined as a logo function in /boot/logo-${loader_logo}.4th
 \ NOTE: If `/boot/logo-${loader_logo}.4th' does not exist or does not define
 \       a `logo' function, no beastie is drawn.
 \ 
 : draw-beastie ( -- ) \ at (loader_logo_x,loader_logo_y), else (46,4)
 
 	s" loader_logo_x" getenv dup -1 <> if
 		?number 1 = if logoX ! then
 	else drop then
 	s" loader_logo_y" getenv dup -1 <> if
 		?number 1 = if logoY ! then
 	else drop then
 
 
 	\ If `logo' is defined, execute it
 	s" logo" sfind ( -- xt|0 bool ) if
 		logoX @ logoY @ rot execute
 	else
 		\ Not defined; try-include desired logo file
 		drop ( xt = 0 ) \ cruft
 		s" loader_logo" getenv dup -1 = over 0= or if
 			dup 0= if 2drop else drop then \ getenv result unused
 			loader_color? if
 				s" try-include /boot/logo-orb.4th"
 			else
 				s" try-include /boot/logo-orbbw.4th"
 			then
 		else
 			2drop ( c-addr/u -- ) \ getenv result unused
 			s" try-include /boot/logo-${loader_logo}.4th"
 		then
 		evaluate
 		1 spaces
 
 		\ Execute `logo' if defined now
 		s" logo" sfind if
 			logoX @ logoY @ rot execute
 		else drop then
 	then
 ;
 
 also support-functions
 
 : beastie-start ( -- ) \ starts the menu
-	s" console" getenv dup -1 <> if
-		s" efi" 2swap contains? if
-			s" set beastie_disable=YES" evaluate
-		then
-	else drop then
 	s" beastie_disable" getenv dup -1 <> if
 		s" YES" compare-insensitive 0= if
 			any_conf_read? if
 				load_xen_throw
 				load_kernel
 				load_modules
 			then
 			exit \ to autoboot (default)
 		then
 	else drop then
 
 	s" loader_delay" getenv -1 = if
 		s" include /boot/menu.rc" evaluate
 	else
 		drop
 		." Loading Menu (Ctrl-C to Abort)" cr
 		s" set delay_command='include /boot/menu.rc'" evaluate
 		s" set delay_showdots" evaluate
 		delay_execute
 	then
 ;
 
 only forth definitions
Index: projects/release-pkg/sys/boot/forth/beastie.4th.8
===================================================================
--- projects/release-pkg/sys/boot/forth/beastie.4th.8	(revision 293335)
+++ projects/release-pkg/sys/boot/forth/beastie.4th.8	(revision 293336)
@@ -1,174 +1,173 @@
 .\" Copyright (c) 2011-2012 Devin Teske
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd April 27, 2014
+.Dd January 6, 2016
 .Dt BEASTIE.4TH 8
 .Os
 .Sh NAME
 .Nm beastie.4th
 .Nd FreeBSD ASCII art boot module
 .Sh DESCRIPTION
 The file that goes by the name of
 .Nm
 is a set of commands designed to draw the ASCII art FreeBSD mascot
 .Nd known simply as
 .Ic beastie
 .Nd to the right of the boot loader menu.
 The commands of
 .Nm
 by themselves are not enough for most uses.
 Please refer to the
 examples below for the most common situations, and to
 .Xr loader 8
 for additional commands.
 .Pp
 Before using any of the commands provided in
 .Nm ,
 it must be included
 through the command:
 .Pp
 .Dl include beastie.4th
 .Pp
 This line is present in the default
 .Pa /boot/loader.rc
 file, so it is not needed (and should not be re-issued) in a normal setup.
 .Pp
 The commands provided by it are:
 .Pp
 .Bl -tag -width disable-module_module -compact -offset indent
 .It Ic draw-beastie
 Draws the FreeBSD logo.
 .Pp
 The logo that is drawn is configured by setting the
 .Ic loader_logo
 variable in
 .Xr loader.conf 5
 to one of
 .Dq Li beastie ,
 .Dq Li beastiebw ,
 .Dq Li fbsdbw ,
 .Dq Li orb ,
 and
 .Dq Li orbbw
 (the default).
 .Pp
 The position of the logo can be configured by setting the
 .Ic loader_logo_x
 and
 .Ic loader_logo_y
 variables in
 .Xr loader.conf 5 .
 The default values are 46 (x) and 4 (y).
 .Pp
 .It Ic clear-beastie
 Clears the screen of beastie.
 .Pp
 .It Ic beastie-start
 Initializes the interactive boot loader menu.
 .Pp
 The
 .Ic loader_delay
 variable can be configured in
 .Xr loader.conf 5
 to the number of seconds you would like to delay loading the boot menu.
 During the delay the user can press Ctrl-C to fall back to
 .Ic autoboot
 or ENTER to proceed.
 The default behavior is to not delay.
 .El
 .Pp
 The environment variables that effect its behavior are:
 .Bl -tag -width bootfile -offset indent
 .It Va loader_logo
 Selects the desired logo in the beastie boot menu. Possible values are:
 .Dq Li fbsdbw ,
 .Dq Li beastie ,
 .Dq Li beastiebw ,
 .Dq Li orb ,
 .Dq Li orbbw
 (default), and
 .Dq Li none .
 .It Va loader_logo_x
 Sets the desired column position of the logo. Default is 46.
 .It Va loader_logo_y
 Sets the desired row position of the logo. Default is 4.
 .It Va beastie_disable
 If set to
 .Dq YES ,
 the beastie boot menu will be skipped.
-The beastie boot menu is always skipped if booting UEFI or running non-x86
-hardware.
+The beastie boot menu is always skipped if running non-x86 hardware.
 .It Va loader_delay
 If set to a number higher than zero, introduces a delay before starting the
 beastie boot menu. During the delay the user can press either Ctrl-C to skip
 the menu or ENTER to proceed to the menu. The default is to not delay when
 loading the menu.
 .El
 .Sh FILES
 .Bl -tag -width /boot/loader.4th -compact
 .It Pa /boot/loader
 The
 .Xr loader 8 .
 .It Pa /boot/beastie.4th
 .Nm
 itself.
 .It Pa /boot/loader.rc
 .Xr loader 8
 bootstrapping script.
 .El
 .Sh EXAMPLES
 Standard i386
 .Pa /boot/loader.rc :
 .Pp
 .Bd -literal -offset indent -compact
 include /boot/beastie.4th
 beastie-start
 .Ed
 .Pp
 Set a different logo in
 .Xr loader.conf 5 :
 .Pp
 .Bd -literal -offset indent -compact
 loader_logo="beastie"
 .Ed
 .Sh SEE ALSO
 .Xr loader.conf 5 ,
 .Xr loader 8 ,
 .Xr loader.4th 8
 .Sh HISTORY
 The
 .Nm
 set of commands first appeared in
 .Fx 5.1 .
 .Sh AUTHORS
 The
 .Nm
 set of commands was written by
 .An -nosplit
 .An Scott Long Aq scottl@FreeBSD.org ,
 .An Aleksander Fafula Aq alex@fafula.com
 and
 .An Devin Teske Aq dteske@FreeBSD.org .
Index: projects/release-pkg/sys/boot/forth/loader.conf.5
===================================================================
--- projects/release-pkg/sys/boot/forth/loader.conf.5	(revision 293335)
+++ projects/release-pkg/sys/boot/forth/loader.conf.5	(revision 293336)
@@ -1,306 +1,305 @@
 .\" Copyright (c) 1999 Daniel C. Sobral
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
-.Dd April 27, 2014
+.Dd January 6, 2016
 .Dt LOADER.CONF 5
 .Os
 .Sh NAME
 .Nm loader.conf
 .Nd "system bootstrap configuration information"
 .Sh DESCRIPTION
 The file
 .Nm
 contains descriptive information on bootstrapping the system.
 Through
 it you can specify the kernel to be booted, parameters to be passed to
 it, and additional modules to be loaded; and generally set all variables
 described in
 .Xr loader 8 .
 .Pp
 The file
 .Pa /boot/loader.rc
 must contain the following two lines for
 .Nm
 to be automatically processed:
 .Pp
 .Dl include /boot/loader.4th
 .Dl start
 .Pp
 If no
 .Pa /boot/loader.rc
 exists at installworld time, one with the above lines will be installed.
 .Sh SYNTAX
 Though
 .Nm Ns 's
 format was defined explicitly to resemble
 .Xr rc.conf 5 ,
 and can be sourced by
 .Xr sh 1 ,
 some settings are treated in a special fashion.
 Also, the
 behavior of some settings is defined by the setting's suffix;
 the prefix identifies which module the setting controls.
 .Pp
 The general parsing rules are:
 .Bl -bullet
 .It
 Spaces and empty lines are ignored.
 .It
 A # sign will mark the remainder of the line as a comment.
 .It
 Only one setting can be present on each line.
 .El
 .Pp
 All settings have the following format:
 .Pp
 .Dl variable="value"
 .Pp
 Unless it belongs to one of the classes of settings that receive special
 treatment, a setting will set the value of a
 .Xr loader 8
 environment variable.
 The settings that receive special
 treatment are listed below.
 Settings beginning with
 .Qq *
 below define the modules to be loaded and
 may have any prefix; the prefix identifies a module.
 All such settings sharing a common
 prefix refer to the same module.
 .Bl -tag -width Ar
 .It Ar exec
 Immediately executes a
 .Xr loader 8
 command.
 This type of setting cannot be processed by programs other
 than
 .Xr loader 8 ,
 so its use should be avoided.
 Multiple instances of it will be processed
 independently.
 .It Ar loader_conf_files
 Defines additional configuration files to be processed right after the
 present file.
 .It Ar kernel
 Name of the kernel to be loaded.
 If no kernel name is set, no additional
 modules will be loaded.
 The name must be a subdirectory of
 .Pa /boot
 that contains a kernel.
 .It Ar kernel_options
 Flags to be passed to the kernel.
 .It Ar password
 Protect boot menu with a password without interrupting
 .Ic autoboot
 process.
 The password should be in clear text format.
 If a password is set, boot menu will not appear until any key is pressed during
 countdown period specified by
 .Va autoboot_delay
 variable or
 .Ic autoboot
 process fails.
 In both cases user should provide specified password to be able to access boot
 menu.
 .It Ar bootlock_password
 Provides a password to be required by check-password before execution is
 allowed to continue.
 The password should be in clear text format.
 If a password is set, the user must provide specified password to boot.
 .It Ar verbose_loading
 If set to
 .Dq YES ,
 module names will be displayed as they are loaded.
 .It Ar *_load
 If set to
 .Dq YES ,
 that module will be loaded.
 If no name is defined (see below), the
 module's name is taken to be the same as the prefix.
 .It Ar *_name
 Defines the name of the module.
 .It Ar *_type
 Defines the module's type.
 If none is given, it defaults to a kld module.
 .It Ar *_flags
 Flags and parameters to be passed to the module.
 .It Ar *_before
 Commands to be executed before the module is loaded.
 Use of this setting
 should be avoided.
 .It Ar *_after
 Commands to be executed after the module is loaded.
 Use of this setting
 should be avoided.
 .It Ar *_error
 Commands to be executed if the loading of a module fails.
 Except for the
 special value
 .Dq abort ,
 which aborts the bootstrap process, use of this setting should be avoided.
 .El
 .Pp
 .Em WARNING:
 developers should never use these suffixes for any kernel environment
 variables (tunables) or conflicts will result.
 .Sh DEFAULT SETTINGS
 Most of
 .Nm Ns 's
 default settings can be ignored.
 The few of them which are important
 or useful are:
 .Bl -tag -width bootfile -offset indent
 .It Va bitmap_load
 .Pq Dq NO
 If set to
 .Dq YES ,
 a bitmap will be loaded to be displayed on screen while booting.
 .It Va bitmap_name
 .Pq Dq Pa /boot/splash.bmp
 Name of the bitmap to be loaded.
 Any other name can be used.
 .It Va comconsole_speed
 .Dq ( 9600
 or the value of the
 .Va BOOT_COMCONSOLE_SPEED
 variable when
 .Xr loader 8
 was compiled).
 Sets the speed of the serial console.
 If the previous boot loader stage specified that a serial console
 is in use then the default speed is determined from the current
 serial port speed setting.
 .It Va console
 .Pq Dq vidconsole
 .Dq comconsole
 selects serial console,
 .Dq vidconsole
 selects the video console,
 .Dq nullconsole
 selects a mute console
 (useful for systems with neither a video console nor a serial port), and
 .Dq spinconsole
 selects the video console which prevents any input and hides all output
 replacing it with
 .Dq spinning
 character (useful for embedded products and such).
 .It Va kernel
 .Pq Dq kernel
 .It Va kernels
 .Pq Dq kernel kernel.old
 Space or comma separated list of kernels to present in the boot menu.
 .It Va loader_conf_files
 .Pq Dq Pa /boot/loader.conf /boot/loader.conf.local
 .It Va splash_bmp_load
 .Pq Dq NO
 If set to
 .Dq YES ,
 will load the splash screen module, making it possible to display a bmp image
 on the screen while booting.
 .It Va splash_pcx_load
 .Pq Dq NO
 If set to
 .Dq YES ,
 will load the splash screen module, making it possible to display a pcx image
 on the screen while booting.
 .It Va vesa_load
 .Pq Dq NO
 If set to
 .Dq YES ,
 the vesa module will be loaded, enabling bitmaps above VGA resolution to
 be displayed.
 .It Va beastie_disable
 If set to
 .Dq YES ,
 the beastie boot menu will be skipped.
-The beastie boot menu is always skipped if booting UEFI or running non-x86
-hardware.
+The beastie boot menu is always skipped if running non-x86 hardware.
 .It Va loader_logo Pq Dq Li orbbw
 Selects a desired logo in the beastie boot menu.
 Possible values are:
 .Dq Li orbbw ,
 .Dq Li orb ,
 .Dq Li fbsdbw ,
 .Dq Li beastiebw ,
 .Dq Li beastie ,
 and
 .Dq Li none .
 .It Va loader_color
 If set to
 .Dq NO ,
 the beastie boot menu will be displayed without ANSI coloring.
 .It Va entropy_cache_load
 .Pq Dq YES
 If set to
 .Dq NO ,
 the very early
 boot-time entropy file
 will not be loaded.
 See the entropy entries in
 .Xr rc.conf 5 .
 .It Va entropy_cache_name
 .Pq Dq /boot/entropy
 The name of the very early
 boot-time entropy cache file.
 .El
 .Sh FILES
 .Bl -tag -width /boot/defaults/loader.conf -compact
 .It Pa /boot/defaults/loader.conf
 default settings -- do not change this file.
 .It Pa /boot/loader.4th
 defines the commands used by loader to read and process
 .Nm .
 .It Pa /boot/loader.conf
 user defined settings.
 .It Pa /boot/loader.conf.local
 machine-specific settings for sites with a common loader.conf.
 .It Pa /boot/loader.rc
 contains the instructions to automatically process
 .Nm .
 .El
 .Sh SEE ALSO
 .Xr rc.conf 5 ,
 .Xr boot 8 ,
 .Xr loader 8 ,
 .Xr loader.4th 8
 .Sh HISTORY
 The file
 .Nm
 first appeared in
 .Fx 3.2 .
 .Sh AUTHORS
 This manual page was written by
 .An Daniel C. Sobral Aq dcs@FreeBSD.org .
 .Sh BUGS
 The
 .Xr loader 8
 stops reading
 .Nm
 when it encounters a syntax error, so any options which are vital for
 booting a particular system (i.e.\&
 .Dq Va hw.ata.ata_dma Ns "=0" )
 should precede any experimental additions to
 .Nm .
Index: projects/release-pkg/sys/boot/zfs/zfs.c
===================================================================
--- projects/release-pkg/sys/boot/zfs/zfs.c	(revision 293335)
+++ projects/release-pkg/sys/boot/zfs/zfs.c	(revision 293336)
@@ -1,860 +1,860 @@
 /*-
  * Copyright (c) 2007 Doug Rabson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$FreeBSD$
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  *	Stand-alone file reading package.
  */
 
 #include <sys/disk.h>
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/queue.h>
 #include <part.h>
 #include <stddef.h>
 #include <stdarg.h>
 #include <string.h>
 #include <stand.h>
 #include <bootstrap.h>
 
 #include "libzfs.h"
 
 #include "zfsimpl.c"
 
 /* Define the range of indexes to be populated with ZFS Boot Environments */
 #define		ZFS_BE_FIRST	4
 #define		ZFS_BE_LAST	8
 
 static int	zfs_open(const char *path, struct open_file *f);
 static int	zfs_write(struct open_file *f, void *buf, size_t size, size_t *resid);
 static int	zfs_close(struct open_file *f);
 static int	zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid);
 static off_t	zfs_seek(struct open_file *f, off_t offset, int where);
 static int	zfs_stat(struct open_file *f, struct stat *sb);
 static int	zfs_readdir(struct open_file *f, struct dirent *d);
 
 struct devsw zfs_dev;
 
 struct fs_ops zfs_fsops = {
 	"zfs",
 	zfs_open,
 	zfs_close,
 	zfs_read,
 	zfs_write,
 	zfs_seek,
 	zfs_stat,
 	zfs_readdir
 };
 
 /*
  * In-core open file.
  */
 struct file {
 	off_t		f_seekp;	/* seek pointer */
 	dnode_phys_t	f_dnode;
 	uint64_t	f_zap_type;	/* zap type for readdir */
 	uint64_t	f_num_leafs;	/* number of fzap leaf blocks */
 	zap_leaf_phys_t	*f_zap_leaf;	/* zap leaf buffer */
 };
 
 static int	zfs_env_index;
 static int	zfs_env_count;
 
 SLIST_HEAD(zfs_be_list, zfs_be_entry) zfs_be_head = SLIST_HEAD_INITIALIZER(zfs_be_head);
 struct zfs_be_list *zfs_be_headp;
 struct zfs_be_entry {
 	const char *name;
 	SLIST_ENTRY(zfs_be_entry) entries;
 } *zfs_be, *zfs_be_tmp;
 
 /*
  * Open a file.
  */
 static int
 zfs_open(const char *upath, struct open_file *f)
 {
 	struct zfsmount *mount = (struct zfsmount *)f->f_devdata;
 	struct file *fp;
 	int rc;
 
 	if (f->f_dev != &zfs_dev)
 		return (EINVAL);
 
 	/* allocate file system specific data structure */
 	fp = malloc(sizeof(struct file));
 	bzero(fp, sizeof(struct file));
 	f->f_fsdata = (void *)fp;
 
 	rc = zfs_lookup(mount, upath, &fp->f_dnode);
 	fp->f_seekp = 0;
 	if (rc) {
 		f->f_fsdata = NULL;
 		free(fp);
 	}
 	return (rc);
 }
 
 static int
 zfs_close(struct open_file *f)
 {
 	struct file *fp = (struct file *)f->f_fsdata;
 
 	dnode_cache_obj = 0;
 	f->f_fsdata = (void *)0;
 	if (fp == (struct file *)0)
 		return (0);
 
 	free(fp);
 	return (0);
 }
 
 /*
  * Copy a portion of a file into kernel memory.
  * Cross block boundaries when necessary.
  */
 static int
 zfs_read(struct open_file *f, void *start, size_t size, size_t *resid	/* out */)
 {
 	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
 	struct file *fp = (struct file *)f->f_fsdata;
 	struct stat sb;
 	size_t n;
 	int rc;
 
 	rc = zfs_stat(f, &sb);
 	if (rc)
 		return (rc);
 	n = size;
 	if (fp->f_seekp + n > sb.st_size)
 		n = sb.st_size - fp->f_seekp;
-	
+
 	rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n);
 	if (rc)
 		return (rc);
 
 	if (0) {
 	    int i;
 	    for (i = 0; i < n; i++)
 		putchar(((char*) start)[i]);
 	}
 	fp->f_seekp += n;
 	if (resid)
 		*resid = size - n;
 
 	return (0);
 }
 
 /*
  * Don't be silly - the bootstrap has no business writing anything.
  */
 static int
 zfs_write(struct open_file *f, void *start, size_t size, size_t *resid	/* out */)
 {
 
 	return (EROFS);
 }
 
 static off_t
 zfs_seek(struct open_file *f, off_t offset, int where)
 {
 	struct file *fp = (struct file *)f->f_fsdata;
 
 	switch (where) {
 	case SEEK_SET:
 		fp->f_seekp = offset;
 		break;
 	case SEEK_CUR:
 		fp->f_seekp += offset;
 		break;
 	case SEEK_END:
 	    {
 		struct stat sb;
 		int error;
 
 		error = zfs_stat(f, &sb);
 		if (error != 0) {
 			errno = error;
 			return (-1);
 		}
 		fp->f_seekp = sb.st_size - offset;
 		break;
 	    }
 	default:
 		errno = EINVAL;
 		return (-1);
 	}
 	return (fp->f_seekp);
 }
 
 static int
 zfs_stat(struct open_file *f, struct stat *sb)
 {
 	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
 	struct file *fp = (struct file *)f->f_fsdata;
 
 	return (zfs_dnode_stat(spa, &fp->f_dnode, sb));
 }
 
 static int
 zfs_readdir(struct open_file *f, struct dirent *d)
 {
 	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
 	struct file *fp = (struct file *)f->f_fsdata;
 	mzap_ent_phys_t mze;
 	struct stat sb;
 	size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT;
 	int rc;
 
 	rc = zfs_stat(f, &sb);
 	if (rc)
 		return (rc);
 	if (!S_ISDIR(sb.st_mode))
 		return (ENOTDIR);
 
 	/*
 	 * If this is the first read, get the zap type.
 	 */
 	if (fp->f_seekp == 0) {
 		rc = dnode_read(spa, &fp->f_dnode,
 				0, &fp->f_zap_type, sizeof(fp->f_zap_type));
 		if (rc)
 			return (rc);
 
 		if (fp->f_zap_type == ZBT_MICRO) {
 			fp->f_seekp = offsetof(mzap_phys_t, mz_chunk);
 		} else {
 			rc = dnode_read(spa, &fp->f_dnode,
 					offsetof(zap_phys_t, zap_num_leafs),
 					&fp->f_num_leafs,
 					sizeof(fp->f_num_leafs));
 			if (rc)
 				return (rc);
 
 			fp->f_seekp = bsize;
 			fp->f_zap_leaf = (zap_leaf_phys_t *)malloc(bsize);
 			rc = dnode_read(spa, &fp->f_dnode,
 					fp->f_seekp,
 					fp->f_zap_leaf,
 					bsize);
 			if (rc)
 				return (rc);
 		}
 	}
 
 	if (fp->f_zap_type == ZBT_MICRO) {
 	mzap_next:
 		if (fp->f_seekp >= bsize)
 			return (ENOENT);
 
 		rc = dnode_read(spa, &fp->f_dnode,
 				fp->f_seekp, &mze, sizeof(mze));
 		if (rc)
 			return (rc);
 		fp->f_seekp += sizeof(mze);
 
 		if (!mze.mze_name[0])
 			goto mzap_next;
 
 		d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value);
 		d->d_type = ZFS_DIRENT_TYPE(mze.mze_value);
 		strcpy(d->d_name, mze.mze_name);
 		d->d_namlen = strlen(d->d_name);
 		return (0);
 	} else {
 		zap_leaf_t zl;
 		zap_leaf_chunk_t *zc, *nc;
 		int chunk;
 		size_t namelen;
 		char *p;
 		uint64_t value;
 
 		/*
 		 * Initialise this so we can use the ZAP size
 		 * calculating macros.
 		 */
 		zl.l_bs = ilog2(bsize);
 		zl.l_phys = fp->f_zap_leaf;
 
 		/*
 		 * Figure out which chunk we are currently looking at
 		 * and consider seeking to the next leaf. We use the
 		 * low bits of f_seekp as a simple chunk index.
 		 */
 	fzap_next:
 		chunk = fp->f_seekp & (bsize - 1);
 		if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) {
 			fp->f_seekp = (fp->f_seekp & ~(bsize - 1)) + bsize;
 			chunk = 0;
 
 			/*
 			 * Check for EOF and read the new leaf.
 			 */
 			if (fp->f_seekp >= bsize * fp->f_num_leafs)
 				return (ENOENT);
 
 			rc = dnode_read(spa, &fp->f_dnode,
 					fp->f_seekp,
 					fp->f_zap_leaf,
 					bsize);
 			if (rc)
 				return (rc);
 		}
 
 		zc = &ZAP_LEAF_CHUNK(&zl, chunk);
 		fp->f_seekp++;
 		if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
 			goto fzap_next;
 
 		namelen = zc->l_entry.le_name_numints;
 		if (namelen > sizeof(d->d_name))
 			namelen = sizeof(d->d_name);
 
 		/*
 		 * Paste the name back together.
 		 */
 		nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk);
 		p = d->d_name;
 		while (namelen > 0) {
 			int len;
 			len = namelen;
 			if (len > ZAP_LEAF_ARRAY_BYTES)
 				len = ZAP_LEAF_ARRAY_BYTES;
 			memcpy(p, nc->l_array.la_array, len);
 			p += len;
 			namelen -= len;
 			nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
 		}
 		d->d_name[sizeof(d->d_name) - 1] = 0;
 
 		/*
 		 * Assume the first eight bytes of the value are
 		 * a uint64_t.
 		 */
 		value = fzap_leaf_value(&zl, zc);
 
 		d->d_fileno = ZFS_DIRENT_OBJ(value);
 		d->d_type = ZFS_DIRENT_TYPE(value);
 		d->d_namlen = strlen(d->d_name);
 
 		return (0);
 	}
 }
 
 static int
 vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t size)
 {
 	int fd;
 
 	fd = (uintptr_t) priv;
 	lseek(fd, offset, SEEK_SET);
 	if (read(fd, buf, size) == size) {
 		return 0;
 	} else {
 		return (EIO);
 	}
 }
 
 static int
 zfs_dev_init(void)
 {
 	spa_t *spa;
 	spa_t *next;
 	spa_t *prev;
 
 	zfs_init();
 	if (archsw.arch_zfs_probe == NULL)
 		return (ENXIO);
 	archsw.arch_zfs_probe();
 
 	prev = NULL;
 	spa = STAILQ_FIRST(&zfs_pools);
 	while (spa != NULL) {
 		next = STAILQ_NEXT(spa, spa_link);
 		if (zfs_spa_init(spa)) {
 			if (prev == NULL)
 				STAILQ_REMOVE_HEAD(&zfs_pools, spa_link);
 			else
 				STAILQ_REMOVE_AFTER(&zfs_pools, prev, spa_link);
 		} else
 			prev = spa;
 		spa = next;
 	}
 	return (0);
 }
 
 struct zfs_probe_args {
 	int		fd;
 	const char	*devname;
 	uint64_t	*pool_guid;
 	uint16_t	secsz;
 };
 
 static int
 zfs_diskread(void *arg, void *buf, size_t blocks, off_t offset)
 {
 	struct zfs_probe_args *ppa;
 
 	ppa = (struct zfs_probe_args *)arg;
 	return (vdev_read(NULL, (void *)(uintptr_t)ppa->fd,
 	    offset * ppa->secsz, buf, blocks * ppa->secsz));
 }
 
 static int
 zfs_probe(int fd, uint64_t *pool_guid)
 {
 	spa_t *spa;
 	int ret;
 
 	ret = vdev_probe(vdev_read, (void *)(uintptr_t)fd, &spa);
 	if (ret == 0 && pool_guid != NULL)
 		*pool_guid = spa->spa_guid;
 	return (ret);
 }
 
 static void
 zfs_probe_partition(void *arg, const char *partname,
     const struct ptable_entry *part)
 {
 	struct zfs_probe_args *ppa, pa;
 	struct ptable *table;
 	char devname[32];
 	int ret;
 
 	/* Probe only freebsd-zfs and freebsd partitions */
 	if (part->type != PART_FREEBSD &&
 	    part->type != PART_FREEBSD_ZFS)
 		return;
 
 	ppa = (struct zfs_probe_args *)arg;
 	strncpy(devname, ppa->devname, strlen(ppa->devname) - 1);
 	devname[strlen(ppa->devname) - 1] = '\0';
 	sprintf(devname, "%s%s:", devname, partname);
 	pa.fd = open(devname, O_RDONLY);
 	if (pa.fd == -1)
 		return;
 	ret = zfs_probe(pa.fd, ppa->pool_guid);
 	if (ret == 0)
 		return;
 	/* Do we have BSD label here? */
 	if (part->type == PART_FREEBSD) {
 		pa.devname = devname;
 		pa.pool_guid = ppa->pool_guid;
 		pa.secsz = ppa->secsz;
 		table = ptable_open(&pa, part->end - part->start + 1,
 		    ppa->secsz, zfs_diskread);
 		if (table != NULL) {
 			ptable_iterate(table, &pa, zfs_probe_partition);
 			ptable_close(table);
 		}
 	}
 	close(pa.fd);
 }
 
 int
 zfs_probe_dev(const char *devname, uint64_t *pool_guid)
 {
 	struct ptable *table;
 	struct zfs_probe_args pa;
 	off_t mediasz;
 	int ret;
 
 	pa.fd = open(devname, O_RDONLY);
 	if (pa.fd == -1)
 		return (ENXIO);
 	/* Probe the whole disk */
 	ret = zfs_probe(pa.fd, pool_guid);
 	if (ret == 0)
 		return (0);
 	/* Probe each partition */
 	ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz);
 	if (ret == 0)
 		ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz);
 	if (ret == 0) {
 		pa.devname = devname;
 		pa.pool_guid = pool_guid;
 		table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz,
 		    zfs_diskread);
 		if (table != NULL) {
 			ptable_iterate(table, &pa, zfs_probe_partition);
 			ptable_close(table);
 		}
 	}
 	close(pa.fd);
-	return (0);
+	return (ret);
 }
 
 /*
  * Print information about ZFS pools
  */
 static void
 zfs_dev_print(int verbose)
 {
 	spa_t *spa;
 	char line[80];
 
 	if (verbose) {
 		spa_all_status();
 		return;
 	}
 	STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
 		sprintf(line, "    zfs:%s\n", spa->spa_name);
 		pager_output(line);
 	}
 }
 
 /*
  * Attempt to open the pool described by (dev) for use by (f).
  */
 static int
 zfs_dev_open(struct open_file *f, ...)
 {
 	va_list		args;
 	struct zfs_devdesc	*dev;
 	struct zfsmount	*mount;
 	spa_t		*spa;
 	int		rv;
 
 	va_start(args, f);
 	dev = va_arg(args, struct zfs_devdesc *);
 	va_end(args);
 
 	if (dev->pool_guid == 0)
 		spa = STAILQ_FIRST(&zfs_pools);
 	else
 		spa = spa_find_by_guid(dev->pool_guid);
 	if (!spa)
 		return (ENXIO);
 	mount = malloc(sizeof(*mount));
 	rv = zfs_mount(spa, dev->root_guid, mount);
 	if (rv != 0) {
 		free(mount);
 		return (rv);
 	}
 	if (mount->objset.os_type != DMU_OST_ZFS) {
 		printf("Unexpected object set type %ju\n",
 		    (uintmax_t)mount->objset.os_type);
 		free(mount);
 		return (EIO);
 	}
 	f->f_devdata = mount;
 	free(dev);
 	return (0);
 }
 
 static int
 zfs_dev_close(struct open_file *f)
 {
 
 	free(f->f_devdata);
 	f->f_devdata = NULL;
 	return (0);
 }
 
 static int
 zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize)
 {
 
 	return (ENOSYS);
 }
 
 struct devsw zfs_dev = {
 	.dv_name = "zfs",
 	.dv_type = DEVT_ZFS,
 	.dv_init = zfs_dev_init,
 	.dv_strategy = zfs_dev_strategy,
 	.dv_open = zfs_dev_open,
 	.dv_close = zfs_dev_close,
 	.dv_ioctl = noioctl,
 	.dv_print = zfs_dev_print,
 	.dv_cleanup = NULL
 };
 
 int
 zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path)
 {
 	static char	rootname[ZFS_MAXNAMELEN];
 	static char	poolname[ZFS_MAXNAMELEN];
 	spa_t		*spa;
 	const char	*end;
 	const char	*np;
 	const char	*sep;
 	int		rv;
 
 	np = devspec;
 	if (*np != ':')
 		return (EINVAL);
 	np++;
 	end = strchr(np, ':');
 	if (end == NULL)
 		return (EINVAL);
 	sep = strchr(np, '/');
 	if (sep == NULL || sep >= end)
 		sep = end;
 	memcpy(poolname, np, sep - np);
 	poolname[sep - np] = '\0';
 	if (sep < end) {
 		sep++;
 		memcpy(rootname, sep, end - sep);
 		rootname[end - sep] = '\0';
 	}
 	else
 		rootname[0] = '\0';
 
 	spa = spa_find_by_name(poolname);
 	if (!spa)
 		return (ENXIO);
 	dev->pool_guid = spa->spa_guid;
 	rv = zfs_lookup_dataset(spa, rootname, &dev->root_guid);
 	if (rv != 0)
 		return (rv);
 	if (path != NULL)
 		*path = (*end == '\0') ? end : end + 1;
 	dev->d_dev = &zfs_dev;
 	dev->d_type = zfs_dev.dv_type;
 	return (0);
 }
 
 char *
 zfs_fmtdev(void *vdev)
 {
 	static char		rootname[ZFS_MAXNAMELEN];
 	static char		buf[2 * ZFS_MAXNAMELEN + 8];
 	struct zfs_devdesc	*dev = (struct zfs_devdesc *)vdev;
 	spa_t			*spa;
 
 	buf[0] = '\0';
 	if (dev->d_type != DEVT_ZFS)
 		return (buf);
 
 	if (dev->pool_guid == 0) {
 		spa = STAILQ_FIRST(&zfs_pools);
 		dev->pool_guid = spa->spa_guid;
 	} else
 		spa = spa_find_by_guid(dev->pool_guid);
 	if (spa == NULL) {
 		printf("ZFS: can't find pool by guid\n");
 		return (buf);
 	}
 	if (dev->root_guid == 0 && zfs_get_root(spa, &dev->root_guid)) {
 		printf("ZFS: can't find root filesystem\n");
 		return (buf);
 	}
 	if (zfs_rlookup(spa, dev->root_guid, rootname)) {
 		printf("ZFS: can't find filesystem by guid\n");
 		return (buf);
 	}
 
 	if (rootname[0] == '\0')
 		sprintf(buf, "%s:%s:", dev->d_dev->dv_name, spa->spa_name);
 	else
 		sprintf(buf, "%s:%s/%s:", dev->d_dev->dv_name, spa->spa_name,
 		    rootname);
 	return (buf);
 }
 
 int
 zfs_list(const char *name)
 {
 	static char	poolname[ZFS_MAXNAMELEN];
 	uint64_t	objid;
 	spa_t		*spa;
 	const char	*dsname;
 	int		len;
 	int		rv;
 
 	len = strlen(name);
 	dsname = strchr(name, '/');
 	if (dsname != NULL) {
 		len = dsname - name;
 		dsname++;
 	} else
 		dsname = "";
 	memcpy(poolname, name, len);
 	poolname[len] = '\0';
 
 	spa = spa_find_by_name(poolname);
 	if (!spa)
 		return (ENXIO);
 	rv = zfs_lookup_dataset(spa, dsname, &objid);
 	if (rv != 0)
 		return (rv);
 
 	return (zfs_list_dataset(spa, objid));
 }
 
 int
 zfs_bootenv(const char *name)
 {
 	static char	poolname[ZFS_MAXNAMELEN], *dsname;
 	char		becount[4];
 	uint64_t	objid;
 	spa_t		*spa;
 	int		len, rv, pages, perpage, currpage;
 
 	if (strcmp(name, getenv("zfs_be_root")) != 0) {
 		if (setenv("zfs_be_root", name, 1) != 0)
 			return (ENOMEM);
 	}
 
 	SLIST_INIT(&zfs_be_head);
 	zfs_env_count = 0;
 	len = strlen(name);
 	dsname = strchr(name, '/');
 	if (dsname != NULL) {
 		len = dsname - name;
 		dsname++;
 	} else
 		dsname = "";
 	memcpy(poolname, name, len);
 	poolname[len] = '\0';
 
 	spa = spa_find_by_name(poolname);
 	if (!spa)
 		return (ENXIO);
 	rv = zfs_lookup_dataset(spa, dsname, &objid);
 	if (rv != 0)
 		return (rv);
 	rv = zfs_callback_dataset(spa, objid, zfs_belist_add);
 
 	/* Calculate and store the number of pages of BEs */
 	perpage = (ZFS_BE_LAST - ZFS_BE_FIRST + 1);
 	pages = (zfs_env_count / perpage) + ((zfs_env_count % perpage) > 0 ? 1 : 0);
 	snprintf(becount, 4, "%d", pages);
 	if (setenv("zfs_be_pages", becount, 1) != 0)
 		return (ENOMEM);
 
 	/* Roll over the page counter if it has exceeded the maximum */
 	currpage = strtol(getenv("zfs_be_currpage"), NULL, 10);
 	if (currpage > pages) {
 		if (setenv("zfs_be_currpage", "1", 1) != 0)
 			return (ENOMEM);
 	}
 
 	/* Populate the menu environment variables */
 	zfs_set_env();
 
 	/* Clean up the SLIST of ZFS BEs */
 	while (!SLIST_EMPTY(&zfs_be_head)) {
 		zfs_be = SLIST_FIRST(&zfs_be_head);
 		SLIST_REMOVE_HEAD(&zfs_be_head, entries);
 		free(zfs_be);
 	}
 
 	return (rv);
 }
 
 int
 zfs_belist_add(const char *name)
 {
 
 	/* Add the boot environment to the head of the SLIST */
 	zfs_be = malloc(sizeof(struct zfs_be_entry));
 	zfs_be->name = name;
 	SLIST_INSERT_HEAD(&zfs_be_head, zfs_be, entries);
 	zfs_env_count++;
 
 	return (0);
 }
 
 int
 zfs_set_env(void)
 {
 	char envname[32], envval[256];
 	char *beroot, *pagenum;
 	int rv, page, ctr;
 
 	beroot = getenv("zfs_be_root");
 	if (beroot == NULL) {
 		return (1);
 	}
 
 	pagenum = getenv("zfs_be_currpage");
 	if (pagenum != NULL) {
 		page = strtol(pagenum, NULL, 10);
 	} else {
 		page = 1;
 	}
 
 	ctr = 1;
 	rv = 0;
 	zfs_env_index = ZFS_BE_FIRST;
 	SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) {
 		/* Skip to the requested page number */
 		if (ctr <= ((ZFS_BE_LAST - ZFS_BE_FIRST + 1) * (page - 1))) {
 			ctr++;
 			continue;
 		}
 		
 		snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
 		snprintf(envval, sizeof(envval), "%s", zfs_be->name);
 		rv = setenv(envname, envval, 1);
 		if (rv != 0) {
 			break;
 		}
 
 		snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
 		rv = setenv(envname, envval, 1);
 		if (rv != 0){
 			break;
 		}
 
 		snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
 		rv = setenv(envname, "set_bootenv", 1);
 		if (rv != 0){
 			break;
 		}
 
 		snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
 		snprintf(envval, sizeof(envval), "zfs:%s/%s", beroot, zfs_be->name);
 		rv = setenv(envname, envval, 1);
 		if (rv != 0){
 			break;
 		}
 
 		zfs_env_index++;
 		if (zfs_env_index > ZFS_BE_LAST) {
 			break;
 		}
 
 	}
 	
 	for (; zfs_env_index <= ZFS_BE_LAST; zfs_env_index++) {
 		snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
 		(void)unsetenv(envname);
 		snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
 		(void)unsetenv(envname);
 		snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
 		(void)unsetenv(envname);
 		snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
 		(void)unsetenv(envname);
 	}
 
 	return (rv);
 }
\ No newline at end of file
Index: projects/release-pkg/sys/boot
===================================================================
--- projects/release-pkg/sys/boot	(revision 293335)
+++ projects/release-pkg/sys/boot	(revision 293336)

Property changes on: projects/release-pkg/sys/boot
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys/boot:r289091-289384,293171-293335
Index: projects/release-pkg/sys/cddl/boot/zfs/lz4.c
===================================================================
--- projects/release-pkg/sys/cddl/boot/zfs/lz4.c	(revision 293335)
+++ projects/release-pkg/sys/cddl/boot/zfs/lz4.c	(revision 293336)
@@ -1,327 +1,327 @@
 /*
  * LZ4 - Fast LZ compression algorithm
  * Header File
  * Copyright (C) 2011-2013, Yann Collet.
  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  *
  *     * Redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer.
  *     * Redistributions in binary form must reproduce the above
  * copyright notice, this list of conditions and the following disclaimer
  * in the documentation and/or other materials provided with the
  * distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * You can contact the author at :
  * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
  * - LZ4 source repository : http://code.google.com/p/lz4/
  *
  * $FreeBSD$
  */
 
 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
 					    int isize, int maxOutputSize);
 
 /* ARGSUSED */
 static int
 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int dummy __unused)
 {
 	const uint8_t *src = s_start;
 	uint32_t bufsiz = htonl(*(uint32_t *)src);
 
 	/* invalid compressed buffer size encoded at start */
 	if (bufsiz + 4 > s_len)
 		return (1);
 
 	/*
 	 * Returns 0 on success (decompression function returned non-negative)
 	 * and non-zero on failure (decompression function returned negative).
 	 */
-	return (LZ4_uncompress_unknownOutputSize(s_start + 4, d_start, bufsiz,
+	return (LZ4_uncompress_unknownOutputSize((const char *)s_start + 4, d_start, bufsiz,
 	    d_len) < 0);
 }
 
 /*
  * CPU Feature Detection
  */
 
 /* 32 or 64 bits ? */
 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
 	defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
 	defined(__LP64__) || defined(_LP64))
 #define	LZ4_ARCH64	1
 #else
 #define	LZ4_ARCH64	0
 #endif
 
 /*
  * Little Endian or Big Endian?
  * Note: overwrite the below #define if you know your architecture endianess.
  */
 #if BYTE_ORDER == BIG_ENDIAN
 #define	LZ4_BIG_ENDIAN	1
 #else
 	/*
 	 * Little Endian assumed. PDP Endian and other very rare endian format
 	 * are unsupported.
 	 */
 #endif
 
 /*
  * Unaligned memory access is automatically enabled for "common" CPU,
  * such as x86. For others CPU, the compiler will be more cautious, and
  * insert extra code to ensure aligned access is respected. If you know
  * your target CPU supports unaligned memory access, you may want to
  * force this option manually to improve performance
  */
 #if defined(__ARM_FEATURE_UNALIGNED)
 #define	LZ4_FORCE_UNALIGNED_ACCESS 1
 #endif
 
 /*
  * Compiler Options
  */
 #if __STDC_VERSION__ >= 199901L	/* C99 */
 /* "restrict" is a known keyword */
 #else
 /* Disable restrict */
 #define	restrict
 #endif
 
 #define	GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
 
 #define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) \
 	| (((x) & 0xffu) << 8)))
 
 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
 #define	expect(expr, value)    (__builtin_expect((expr), (value)))
 #else
 #define	expect(expr, value)    (expr)
 #endif
 
 #define	likely(expr)	expect((expr) != 0, 1)
 #define	unlikely(expr)	expect((expr) != 0, 0)
 
 /* Basic types */
 #define	BYTE	uint8_t
 #define	U16	uint16_t
 #define	U32	uint32_t
 #define	S32	int32_t
 #define	U64	uint64_t
 
 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
 #pragma pack(1)
 #endif
 
 typedef struct _U16_S {
 	U16 v;
 } U16_S;
 typedef struct _U32_S {
 	U32 v;
 } U32_S;
 typedef struct _U64_S {
 	U64 v;
 } U64_S;
 
 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
 #pragma pack()
 #endif
 
 #define	A64(x)	(((U64_S *)(x))->v)
 #define	A32(x)	(((U32_S *)(x))->v)
 #define	A16(x)	(((U16_S *)(x))->v)
 
 /*
  * Constants
  */
 #define	MINMATCH 4
 
 #define	COPYLENGTH 8
 #define	LASTLITERALS 5
 
 #define	ML_BITS 4
 #define	ML_MASK ((1U<<ML_BITS)-1)
 #define	RUN_BITS (8-ML_BITS)
 #define	RUN_MASK ((1U<<RUN_BITS)-1)
 
 /*
  * Architecture-specific macros
  */
 #if LZ4_ARCH64
 #define	STEPSIZE 8
 #define	UARCH U64
 #define	AARCH A64
 #define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
 #define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
 #define	HTYPE U32
 #define	INITBASE(base)		const BYTE* const base = ip
 #else
 #define	STEPSIZE 4
 #define	UARCH U32
 #define	AARCH A32
 #define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
 #define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
 #define	LZ4_SECURECOPY		LZ4_WILDCOPY
 #define	HTYPE const BYTE*
 #define	INITBASE(base)		const int base = 0
 #endif
 
 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
 	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
 #define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
 	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
 #else
 #define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
 #define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
 #endif
 
 /* Macros */
 #define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
 
 /* Decompression functions */
 
 static int
 LZ4_uncompress_unknownOutputSize(const char *source,
     char *dest, int isize, int maxOutputSize)
 {
 	/* Local Variables */
 	const BYTE *restrict ip = (const BYTE *) source;
 	const BYTE *const iend = ip + isize;
 	const BYTE *restrict ref;
 
 	BYTE *restrict op = (BYTE *) dest;
 	BYTE *const oend = op + maxOutputSize;
 	BYTE *cpy;
 
 	size_t dec[] = { 0, 3, 2, 3, 0, 0, 0, 0 };
 
 	/* Main Loop */
 	while (ip < iend) {
 		BYTE token;
 		int length;
 
 		/* get runlength */
 		token = *ip++;
 		if ((length = (token >> ML_BITS)) == RUN_MASK) {
 			int s = 255;
 			while ((ip < iend) && (s == 255)) {
 				s = *ip++;
 				length += s;
 			}
 		}
 		/* copy literals */
 		cpy = op + length;
 		if ((cpy > oend - COPYLENGTH) ||
 		    (ip + length > iend - COPYLENGTH)) {
 			if (cpy > oend)
 				/*
 				 * Error: request to write beyond destination
 				 * buffer.
 				 */
 				goto _output_error;
 			if (ip + length > iend)
 				/*
 				 * Error : request to read beyond source
 				 * buffer.
 				 */
 				goto _output_error;
 			memcpy(op, ip, length);
 			op += length;
 			ip += length;
 			if (ip < iend)
 				/* Error : LZ4 format violation */
 				goto _output_error;
 			/* Necessarily EOF, due to parsing restrictions. */
 			break;
 		}
 		LZ4_WILDCOPY(ip, op, cpy);
 		ip -= (op - cpy);
 		op = cpy;
 
 		/* get offset */
 		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
 		ip += 2;
 		if (ref < (BYTE * const) dest)
 			/*
 			 * Error: offset creates reference outside of
 			 * destination buffer.
 			 */
 			goto _output_error;
 
 		/* get matchlength */
 		if ((length = (token & ML_MASK)) == ML_MASK) {
 			while (ip < iend) {
 				int s = *ip++;
 				length += s;
 				if (s == 255)
 					continue;
 				break;
 			}
 		}
 		/* copy repeated sequence */
 		if unlikely(op - ref < STEPSIZE) {
 #if LZ4_ARCH64
 			size_t dec2table[] = { 0, 0, 0, -1, 0, 1, 2, 3 };
 			size_t dec2 = dec2table[op - ref];
 #else
 			const int dec2 = 0;
 #endif
 			*op++ = *ref++;
 			*op++ = *ref++;
 			*op++ = *ref++;
 			*op++ = *ref++;
 			ref -= dec[op - ref];
 			A32(op) = A32(ref);
 			op += STEPSIZE - 4;
 			ref -= dec2;
 		} else {
 			LZ4_COPYSTEP(ref, op);
 		}
 		cpy = op + length - (STEPSIZE - 4);
 		if (cpy > oend - COPYLENGTH) {
 			if (cpy > oend)
 				/*
 				 * Error: request to write outside of
 				 * destination buffer.
 				 */
 				goto _output_error;
 			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
 			while (op < cpy)
 				*op++ = *ref++;
 			op = cpy;
 			if (op == oend)
 				/*
 				 * Check EOF (should never happen, since last
 				 * 5 bytes are supposed to be literals).
 				 */
 				break;
 			continue;
 		}
 		LZ4_SECURECOPY(ref, op, cpy);
 		op = cpy;	/* correction */
 	}
 
 	/* end of decoding */
 	return (int)(((char *)op) - dest);
 
 	/* write overflow error detected */
 	_output_error:
 	return (int)(-(((char *)ip) - source));
 }
Index: projects/release-pkg/sys/conf/files
===================================================================
--- projects/release-pkg/sys/conf/files	(revision 293335)
+++ projects/release-pkg/sys/conf/files	(revision 293336)
@@ -1,4303 +1,4304 @@
 # $FreeBSD$
 #
 # The long compile-with and dependency lines are required because of
 # limitations in config: backslash-newline doesn't work in strings, and
 # dependency lines other than the first are silently ignored.
 #
 acpi_quirks.h			optional acpi				   \
 	dependency	"$S/tools/acpi_quirks2h.awk $S/dev/acpica/acpi_quirks" \
 	compile-with	"${AWK} -f $S/tools/acpi_quirks2h.awk $S/dev/acpica/acpi_quirks" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"acpi_quirks.h"
 #
 # The 'fdt_dtb_file' target covers an actual DTB file name, which is derived
 # from the specified source (DTS) file: <platform>.dts -> <platform>.dtb
 #
 fdt_dtb_file			optional fdt fdt_dtb_static \
 	compile-with "sh -c 'MACHINE=${MACHINE} $S/tools/fdt/make_dtb.sh $S ${FDT_DTS_FILE} ${.CURDIR}'" \
 	no-obj no-implicit-rule before-depend	\
 	clean		"${FDT_DTS_FILE:R}.dtb"
 fdt_static_dtb.h		optional fdt fdt_dtb_static \
 	compile-with "sh -c 'MACHINE=${MACHINE} $S/tools/fdt/make_dtbh.sh ${FDT_DTS_FILE} ${.CURDIR}'" \
 	dependency	"fdt_dtb_file" \
 	no-obj no-implicit-rule before-depend \
 	clean		"fdt_static_dtb.h"
 feeder_eq_gen.h			optional sound				   \
 	dependency	"$S/tools/sound/feeder_eq_mkfilter.awk"		   \
 	compile-with	"${AWK} -f $S/tools/sound/feeder_eq_mkfilter.awk -- ${FEEDER_EQ_PRESETS} > feeder_eq_gen.h" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"feeder_eq_gen.h"
 feeder_rate_gen.h		optional sound				   \
 	dependency	"$S/tools/sound/feeder_rate_mkfilter.awk"	   \
 	compile-with	"${AWK} -f $S/tools/sound/feeder_rate_mkfilter.awk -- ${FEEDER_RATE_PRESETS} > feeder_rate_gen.h" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"feeder_rate_gen.h"
 snd_fxdiv_gen.h			optional sound				   \
 	dependency	"$S/tools/sound/snd_fxdiv_gen.awk"		   \
 	compile-with	"${AWK} -f $S/tools/sound/snd_fxdiv_gen.awk -- > snd_fxdiv_gen.h" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"snd_fxdiv_gen.h"
 miidevs.h			optional miibus | mii			   \
 	dependency	"$S/tools/miidevs2h.awk $S/dev/mii/miidevs"	   \
 	compile-with	"${AWK} -f $S/tools/miidevs2h.awk $S/dev/mii/miidevs" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"miidevs.h"
 pccarddevs.h			standard				   \
 	dependency	"$S/tools/pccarddevs2h.awk $S/dev/pccard/pccarddevs" \
 	compile-with	"${AWK} -f $S/tools/pccarddevs2h.awk $S/dev/pccard/pccarddevs" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"pccarddevs.h"
 teken_state.h		optional sc | vt				   \
 	dependency	"$S/teken/gensequences $S/teken/sequences" \
 	compile-with	"${AWK} -f $S/teken/gensequences $S/teken/sequences > teken_state.h" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"teken_state.h"
 usbdevs.h			optional usb				   \
 	dependency	"$S/tools/usbdevs2h.awk $S/dev/usb/usbdevs" \
 	compile-with	"${AWK} -f $S/tools/usbdevs2h.awk $S/dev/usb/usbdevs -h" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"usbdevs.h"
 usbdevs_data.h			optional usb				   \
 	dependency	"$S/tools/usbdevs2h.awk $S/dev/usb/usbdevs" \
 	compile-with	"${AWK} -f $S/tools/usbdevs2h.awk $S/dev/usb/usbdevs -d" \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"usbdevs_data.h"
 cam/cam.c			optional scbus
 cam/cam_compat.c		optional scbus
 cam/cam_periph.c		optional scbus
 cam/cam_queue.c			optional scbus
 cam/cam_sim.c			optional scbus
 cam/cam_xpt.c			optional scbus
 cam/ata/ata_all.c		optional scbus
 cam/ata/ata_xpt.c		optional scbus
 cam/ata/ata_pmp.c		optional scbus
 cam/scsi/scsi_xpt.c		optional scbus
 cam/scsi/scsi_all.c		optional scbus
 cam/scsi/scsi_cd.c		optional cd
 cam/scsi/scsi_ch.c		optional ch
 cam/ata/ata_da.c		optional ada | da
 cam/ctl/ctl.c			optional ctl
 cam/ctl/ctl_backend.c		optional ctl
 cam/ctl/ctl_backend_block.c	optional ctl
 cam/ctl/ctl_backend_ramdisk.c	optional ctl
 cam/ctl/ctl_cmd_table.c		optional ctl
 cam/ctl/ctl_frontend.c		optional ctl
 cam/ctl/ctl_frontend_cam_sim.c	optional ctl
 cam/ctl/ctl_frontend_ioctl.c	optional ctl
 cam/ctl/ctl_frontend_iscsi.c	optional ctl
 cam/ctl/ctl_ha.c		optional ctl
 cam/ctl/ctl_scsi_all.c		optional ctl
 cam/ctl/ctl_tpc.c		optional ctl
 cam/ctl/ctl_tpc_local.c		optional ctl
 cam/ctl/ctl_error.c		optional ctl
 cam/ctl/ctl_util.c		optional ctl
 cam/ctl/scsi_ctl.c		optional ctl
 cam/scsi/scsi_da.c		optional da
 cam/scsi/scsi_low.c		optional ct | ncv | nsp | stg
 cam/scsi/scsi_pass.c		optional pass
 cam/scsi/scsi_pt.c		optional pt
 cam/scsi/scsi_sa.c		optional sa
 cam/scsi/scsi_enc.c		optional ses
 cam/scsi/scsi_enc_ses.c		optional ses
 cam/scsi/scsi_enc_safte.c	optional ses
 cam/scsi/scsi_sg.c		optional sg
 cam/scsi/scsi_targ_bh.c		optional targbh
 cam/scsi/scsi_target.c		optional targ
 cam/scsi/smp_all.c		optional scbus
 # shared between zfs and dtrace
 cddl/compat/opensolaris/kern/opensolaris.c		optional zfs | dtrace compile-with "${CDDL_C}"
 cddl/compat/opensolaris/kern/opensolaris_cmn_err.c	optional zfs | dtrace compile-with "${CDDL_C}"
 cddl/compat/opensolaris/kern/opensolaris_kmem.c		optional zfs | dtrace compile-with "${CDDL_C}"
 cddl/compat/opensolaris/kern/opensolaris_misc.c		optional zfs | dtrace compile-with "${CDDL_C}"
 cddl/compat/opensolaris/kern/opensolaris_sunddi.c	optional zfs | dtrace compile-with "${CDDL_C}"
 cddl/compat/opensolaris/kern/opensolaris_taskq.c	optional zfs | dtrace compile-with "${CDDL_C}"
 # zfs specific
 cddl/compat/opensolaris/kern/opensolaris_acl.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_dtrace.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_kobj.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_kstat.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_lookup.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_policy.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_string.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_sysevent.c			optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_uio.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_vfs.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_vm.c				optional zfs compile-with "${ZFS_C}"
 cddl/compat/opensolaris/kern/opensolaris_zone.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/acl/acl_common.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/avl/avl.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/nvpair/opensolaris_fnvpair.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/nvpair/opensolaris_nvpair.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/nvpair/opensolaris_nvpair_alloc_fixed.c	optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/unicode/u8_textprep.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zfeature_common.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zfs_comutil.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zfs_deleg.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zfs_fletcher.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zfs_ioctl_compat.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zfs_namecheck.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zfs_prop.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zpool_prop.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/common/zfs/zprop_common.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/gfs.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/vnode.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/blkptr.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/bplist.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/bqueue.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/ddt.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/ddt_zap.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_diff.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_zfetch.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c			optional zfs compile-with "${ZFS_C}" \
 	warning "kernel contains CDDL licensed ZFS filesystem"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_bookmark.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deadlist.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deleg.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_prop.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_userhold.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_synctask.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/gzip.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/lz4.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/lzjb.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/multilist.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/range_tree.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/refcount.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/rrwlock.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/sha256.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/spa_config.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/spa_errlog.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/space_reftree.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/trim_map.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/uberblock.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/unique.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_cache.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_label.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_missing.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_raidz.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_root.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zap_leaf.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfeature.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_byteswap.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_debug.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fm.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_fuid.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_replay.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_rlock.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_sa.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zio_checksum.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zio_compress.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zle.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zrlock.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/os/callb.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/os/fm.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/os/list.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/os/nvpair_alloc_system.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/adler32.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/deflate.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/inffast.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/inflate.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/inftrees.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/opensolaris_crc32.c		optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/trees.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/zmod.c				optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/zmod_subr.c			optional zfs compile-with "${ZFS_C}"
 cddl/contrib/opensolaris/uts/common/zmod/zutil.c			optional zfs compile-with "${ZFS_C}"
 # dtrace specific
 cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c	optional dtrace compile-with "${DTRACE_C}" \
 							warning "kernel contains CDDL licensed DTRACE"
 cddl/dev/dtmalloc/dtmalloc.c		optional dtmalloc        | dtraceall compile-with "${CDDL_C}"
 cddl/dev/profile/profile.c		optional dtrace_profile  | dtraceall compile-with "${CDDL_C}"
 cddl/dev/sdt/sdt.c			optional dtrace_sdt      | dtraceall compile-with "${CDDL_C}"
 cddl/dev/fbt/fbt.c			optional dtrace_fbt      | dtraceall compile-with "${FBT_C}"
 cddl/dev/systrace/systrace.c		optional dtrace_systrace | dtraceall compile-with "${CDDL_C}"
 cddl/dev/prototype.c			optional dtrace_prototype | dtraceall compile-with "${CDDL_C}"
 fs/nfsclient/nfs_clkdtrace.c		optional dtnfscl nfscl   | dtraceall nfscl compile-with "${CDDL_C}"
 compat/cloudabi/cloudabi_clock.c	optional compat_cloudabi64
 compat/cloudabi/cloudabi_errno.c	optional compat_cloudabi64
 compat/cloudabi/cloudabi_fd.c		optional compat_cloudabi64
 compat/cloudabi/cloudabi_file.c		optional compat_cloudabi64
 compat/cloudabi/cloudabi_futex.c	optional compat_cloudabi64
 compat/cloudabi/cloudabi_mem.c		optional compat_cloudabi64
 compat/cloudabi/cloudabi_proc.c		optional compat_cloudabi64
 compat/cloudabi/cloudabi_random.c	optional compat_cloudabi64
 compat/cloudabi/cloudabi_sock.c		optional compat_cloudabi64
 compat/cloudabi/cloudabi_thread.c	optional compat_cloudabi64
 compat/cloudabi64/cloudabi64_fd.c	optional compat_cloudabi64
 compat/cloudabi64/cloudabi64_module.c	optional compat_cloudabi64
 compat/cloudabi64/cloudabi64_poll.c	optional compat_cloudabi64
 compat/cloudabi64/cloudabi64_sock.c	optional compat_cloudabi64
 compat/cloudabi64/cloudabi64_syscalls.c	optional compat_cloudabi64
 compat/cloudabi64/cloudabi64_sysent.c	optional compat_cloudabi64
 compat/cloudabi64/cloudabi64_thread.c	optional compat_cloudabi64
 compat/freebsd32/freebsd32_capability.c	optional compat_freebsd32
 compat/freebsd32/freebsd32_ioctl.c	optional compat_freebsd32
 compat/freebsd32/freebsd32_misc.c	optional compat_freebsd32
 compat/freebsd32/freebsd32_syscalls.c	optional compat_freebsd32
 compat/freebsd32/freebsd32_sysent.c	optional compat_freebsd32
 contrib/dev/acpica/common/ahids.c			optional acpi acpi_debug
 contrib/dev/acpica/common/ahuuids.c			optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbcmds.c		optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbconvert.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbdisply.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbexec.c		optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbfileio.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbhistry.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbinput.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbmethod.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbnames.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbobject.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbstats.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbtest.c		optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbutils.c	optional acpi acpi_debug
 contrib/dev/acpica/components/debugger/dbxface.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmbuffer.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmcstyle.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmdeferred.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmnames.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmopcode.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmresrc.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmresrcl.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmresrcl2.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmresrcs.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmutils.c	optional acpi acpi_debug
 contrib/dev/acpica/components/disassembler/dmwalk.c	optional acpi acpi_debug
 contrib/dev/acpica/components/dispatcher/dsargs.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dscontrol.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsdebug.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsfield.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsinit.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsmethod.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsmthdat.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsobject.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsopcode.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dsutils.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dswexec.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dswload.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dswload2.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dswscope.c	optional acpi
 contrib/dev/acpica/components/dispatcher/dswstate.c	optional acpi
 contrib/dev/acpica/components/events/evevent.c		optional acpi
 contrib/dev/acpica/components/events/evglock.c		optional acpi
 contrib/dev/acpica/components/events/evgpe.c		optional acpi
 contrib/dev/acpica/components/events/evgpeblk.c		optional acpi
 contrib/dev/acpica/components/events/evgpeinit.c	optional acpi
 contrib/dev/acpica/components/events/evgpeutil.c	optional acpi
 contrib/dev/acpica/components/events/evhandler.c	optional acpi
 contrib/dev/acpica/components/events/evmisc.c		optional acpi
 contrib/dev/acpica/components/events/evregion.c		optional acpi
 contrib/dev/acpica/components/events/evrgnini.c		optional acpi
 contrib/dev/acpica/components/events/evsci.c		optional acpi
 contrib/dev/acpica/components/events/evxface.c		optional acpi
 contrib/dev/acpica/components/events/evxfevnt.c		optional acpi
 contrib/dev/acpica/components/events/evxfgpe.c		optional acpi
 contrib/dev/acpica/components/events/evxfregn.c		optional acpi
 contrib/dev/acpica/components/executer/exconfig.c	optional acpi
 contrib/dev/acpica/components/executer/exconvrt.c	optional acpi
 contrib/dev/acpica/components/executer/excreate.c	optional acpi
 contrib/dev/acpica/components/executer/exdebug.c	optional acpi
 contrib/dev/acpica/components/executer/exdump.c		optional acpi
 contrib/dev/acpica/components/executer/exfield.c	optional acpi
 contrib/dev/acpica/components/executer/exfldio.c	optional acpi
 contrib/dev/acpica/components/executer/exmisc.c		optional acpi
 contrib/dev/acpica/components/executer/exmutex.c	optional acpi
 contrib/dev/acpica/components/executer/exnames.c	optional acpi
 contrib/dev/acpica/components/executer/exoparg1.c	optional acpi
 contrib/dev/acpica/components/executer/exoparg2.c	optional acpi
 contrib/dev/acpica/components/executer/exoparg3.c	optional acpi
 contrib/dev/acpica/components/executer/exoparg6.c	optional acpi
 contrib/dev/acpica/components/executer/exprep.c		optional acpi
 contrib/dev/acpica/components/executer/exregion.c	optional acpi
 contrib/dev/acpica/components/executer/exresnte.c	optional acpi
 contrib/dev/acpica/components/executer/exresolv.c	optional acpi
 contrib/dev/acpica/components/executer/exresop.c	optional acpi
 contrib/dev/acpica/components/executer/exstore.c	optional acpi
 contrib/dev/acpica/components/executer/exstoren.c	optional acpi
 contrib/dev/acpica/components/executer/exstorob.c	optional acpi
 contrib/dev/acpica/components/executer/exsystem.c	optional acpi
 contrib/dev/acpica/components/executer/exutils.c	optional acpi
 contrib/dev/acpica/components/hardware/hwacpi.c		optional acpi
 contrib/dev/acpica/components/hardware/hwesleep.c	optional acpi
 contrib/dev/acpica/components/hardware/hwgpe.c		optional acpi
 contrib/dev/acpica/components/hardware/hwpci.c		optional acpi
 contrib/dev/acpica/components/hardware/hwregs.c		optional acpi
 contrib/dev/acpica/components/hardware/hwsleep.c	optional acpi
 contrib/dev/acpica/components/hardware/hwtimer.c	optional acpi
 contrib/dev/acpica/components/hardware/hwvalid.c	optional acpi
 contrib/dev/acpica/components/hardware/hwxface.c	optional acpi
 contrib/dev/acpica/components/hardware/hwxfsleep.c	optional acpi
 contrib/dev/acpica/components/namespace/nsaccess.c	optional acpi
 contrib/dev/acpica/components/namespace/nsalloc.c	optional acpi
 contrib/dev/acpica/components/namespace/nsarguments.c	optional acpi
 contrib/dev/acpica/components/namespace/nsconvert.c	optional acpi
 contrib/dev/acpica/components/namespace/nsdump.c	optional acpi
 contrib/dev/acpica/components/namespace/nseval.c	optional acpi
 contrib/dev/acpica/components/namespace/nsinit.c	optional acpi
 contrib/dev/acpica/components/namespace/nsload.c	optional acpi
 contrib/dev/acpica/components/namespace/nsnames.c	optional acpi
 contrib/dev/acpica/components/namespace/nsobject.c	optional acpi
 contrib/dev/acpica/components/namespace/nsparse.c	optional acpi
 contrib/dev/acpica/components/namespace/nspredef.c	optional acpi
 contrib/dev/acpica/components/namespace/nsprepkg.c	optional acpi
 contrib/dev/acpica/components/namespace/nsrepair.c	optional acpi
 contrib/dev/acpica/components/namespace/nsrepair2.c	optional acpi
 contrib/dev/acpica/components/namespace/nssearch.c	optional acpi
 contrib/dev/acpica/components/namespace/nsutils.c	optional acpi
 contrib/dev/acpica/components/namespace/nswalk.c	optional acpi
 contrib/dev/acpica/components/namespace/nsxfeval.c	optional acpi
 contrib/dev/acpica/components/namespace/nsxfname.c	optional acpi
 contrib/dev/acpica/components/namespace/nsxfobj.c	optional acpi
 contrib/dev/acpica/components/parser/psargs.c		optional acpi
 contrib/dev/acpica/components/parser/psloop.c		optional acpi
 contrib/dev/acpica/components/parser/psobject.c		optional acpi
 contrib/dev/acpica/components/parser/psopcode.c		optional acpi
 contrib/dev/acpica/components/parser/psopinfo.c		optional acpi
 contrib/dev/acpica/components/parser/psparse.c		optional acpi
 contrib/dev/acpica/components/parser/psscope.c		optional acpi
 contrib/dev/acpica/components/parser/pstree.c		optional acpi
 contrib/dev/acpica/components/parser/psutils.c		optional acpi
 contrib/dev/acpica/components/parser/pswalk.c		optional acpi
 contrib/dev/acpica/components/parser/psxface.c		optional acpi
 contrib/dev/acpica/components/resources/rsaddr.c	optional acpi
 contrib/dev/acpica/components/resources/rscalc.c	optional acpi
 contrib/dev/acpica/components/resources/rscreate.c	optional acpi
 contrib/dev/acpica/components/resources/rsdump.c	optional acpi acpi_debug
 contrib/dev/acpica/components/resources/rsdumpinfo.c	optional acpi
 contrib/dev/acpica/components/resources/rsinfo.c	optional acpi
 contrib/dev/acpica/components/resources/rsio.c		optional acpi
 contrib/dev/acpica/components/resources/rsirq.c		optional acpi
 contrib/dev/acpica/components/resources/rslist.c	optional acpi
 contrib/dev/acpica/components/resources/rsmemory.c	optional acpi
 contrib/dev/acpica/components/resources/rsmisc.c	optional acpi
 contrib/dev/acpica/components/resources/rsserial.c	optional acpi
 contrib/dev/acpica/components/resources/rsutils.c	optional acpi
 contrib/dev/acpica/components/resources/rsxface.c	optional acpi
 contrib/dev/acpica/components/tables/tbdata.c		optional acpi
 contrib/dev/acpica/components/tables/tbfadt.c		optional acpi
 contrib/dev/acpica/components/tables/tbfind.c		optional acpi
 contrib/dev/acpica/components/tables/tbinstal.c		optional acpi
 contrib/dev/acpica/components/tables/tbprint.c		optional acpi
 contrib/dev/acpica/components/tables/tbutils.c		optional acpi
 contrib/dev/acpica/components/tables/tbxface.c		optional acpi
 contrib/dev/acpica/components/tables/tbxfload.c		optional acpi
 contrib/dev/acpica/components/tables/tbxfroot.c		optional acpi
 contrib/dev/acpica/components/utilities/utaddress.c	optional acpi
 contrib/dev/acpica/components/utilities/utalloc.c	optional acpi
 contrib/dev/acpica/components/utilities/utbuffer.c	optional acpi
 contrib/dev/acpica/components/utilities/utcache.c	optional acpi
 contrib/dev/acpica/components/utilities/utcopy.c	optional acpi
 contrib/dev/acpica/components/utilities/utdebug.c	optional acpi
 contrib/dev/acpica/components/utilities/utdecode.c	optional acpi
 contrib/dev/acpica/components/utilities/utdelete.c	optional acpi
 contrib/dev/acpica/components/utilities/uterror.c	optional acpi
 contrib/dev/acpica/components/utilities/uteval.c	optional acpi
 contrib/dev/acpica/components/utilities/utexcep.c	optional acpi
 contrib/dev/acpica/components/utilities/utglobal.c	optional acpi
 contrib/dev/acpica/components/utilities/uthex.c		optional acpi
 contrib/dev/acpica/components/utilities/utids.c		optional acpi
 contrib/dev/acpica/components/utilities/utinit.c	optional acpi
 contrib/dev/acpica/components/utilities/utlock.c	optional acpi
 contrib/dev/acpica/components/utilities/utmath.c	optional acpi
 contrib/dev/acpica/components/utilities/utmisc.c	optional acpi
 contrib/dev/acpica/components/utilities/utmutex.c	optional acpi
 contrib/dev/acpica/components/utilities/utnonansi.c	optional acpi
 contrib/dev/acpica/components/utilities/utobject.c	optional acpi
 contrib/dev/acpica/components/utilities/utosi.c		optional acpi
 contrib/dev/acpica/components/utilities/utownerid.c	optional acpi
 contrib/dev/acpica/components/utilities/utpredef.c	optional acpi
 contrib/dev/acpica/components/utilities/utresrc.c	optional acpi
 contrib/dev/acpica/components/utilities/utstate.c	optional acpi
 contrib/dev/acpica/components/utilities/utstring.c	optional acpi
 contrib/dev/acpica/components/utilities/utuuid.c	optional acpi acpi_debug
 contrib/dev/acpica/components/utilities/utxface.c	optional acpi
 contrib/dev/acpica/components/utilities/utxferror.c	optional acpi
 contrib/dev/acpica/components/utilities/utxfinit.c	optional acpi
 #contrib/dev/acpica/components/utilities/utxfmutex.c	optional acpi
 contrib/ipfilter/netinet/fil.c	optional ipfilter inet \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_auth.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_fil_freebsd.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_frag.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_log.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_nat.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_proxy.c optional ipfilter inet \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_state.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_lookup.c optional ipfilter inet \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN} -Wno-unused -Wno-error -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_pool.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_htable.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_sync.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/mlfk_ipl.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_nat6.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_rules.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_scan.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/ip_dstlist.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -Wno-unused -I$S/contrib/ipfilter"
 contrib/ipfilter/netinet/radix_ipf.c optional ipfilter inet \
 	compile-with "${NORMAL_C} -I$S/contrib/ipfilter"
 contrib/libfdt/fdt.c		optional fdt
 contrib/libfdt/fdt_ro.c		optional fdt
 contrib/libfdt/fdt_rw.c		optional fdt
 contrib/libfdt/fdt_strerror.c	optional fdt
 contrib/libfdt/fdt_sw.c		optional fdt
 contrib/libfdt/fdt_wip.c	optional fdt
 contrib/libnv/dnvlist.c		standard
 contrib/libnv/nvlist.c		standard
 contrib/libnv/nvpair.c		standard
 contrib/ngatm/netnatm/api/cc_conn.c optional ngatm_ccatm \
 	compile-with "${NORMAL_C_NOWERROR} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/api/cc_data.c optional ngatm_ccatm \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/api/cc_dump.c optional ngatm_ccatm \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/api/cc_port.c optional ngatm_ccatm \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/api/cc_sig.c optional ngatm_ccatm \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/api/cc_user.c optional ngatm_ccatm \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/api/unisap.c optional ngatm_ccatm \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/misc/straddr.c optional ngatm_atmbase \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/misc/unimsg_common.c optional ngatm_atmbase \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/msg/traffic.c optional ngatm_atmbase \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/msg/uni_ie.c optional ngatm_atmbase \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/msg/uni_msg.c optional ngatm_atmbase \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/saal/saal_sscfu.c	optional ngatm_sscfu \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/saal/saal_sscop.c	optional ngatm_sscop \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_call.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_coord.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_party.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_print.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_reset.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_uni.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_unimsgcpy.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 contrib/ngatm/netnatm/sig/sig_verify.c optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 crypto/blowfish/bf_ecb.c	optional ipsec
 crypto/blowfish/bf_skey.c	optional crypto | ipsec
 crypto/camellia/camellia.c	optional crypto | ipsec
 crypto/camellia/camellia-api.c	optional crypto | ipsec
 crypto/des/des_ecb.c		optional crypto | ipsec | netsmb
 crypto/des/des_setkey.c		optional crypto | ipsec | netsmb
 crypto/rc4/rc4.c		optional netgraph_mppc_encryption | kgssapi
 crypto/rijndael/rijndael-alg-fst.c optional crypto | geom_bde | \
 					 ipsec | random !random_loadable | wlan_ccmp
 crypto/rijndael/rijndael-api-fst.c optional geom_bde | random !random_loadable
 crypto/rijndael/rijndael-api.c	optional crypto | ipsec | wlan_ccmp
 crypto/sha1.c			optional carp | crypto | ipsec | \
 					 netgraph_mppc_encryption | sctp
 crypto/sha2/sha256c.c		optional crypto | geom_bde | ipsec | random !random_loadable | \
 					 sctp | zfs
 crypto/sha2/sha512c.c		optional crypto | geom_bde | ipsec | zfs
 crypto/siphash/siphash.c	optional inet | inet6
 crypto/siphash/siphash_test.c	optional inet | inet6
 ddb/db_access.c			optional ddb
 ddb/db_break.c			optional ddb
 ddb/db_capture.c		optional ddb
 ddb/db_command.c		optional ddb
 ddb/db_examine.c		optional ddb
 ddb/db_expr.c			optional ddb
 ddb/db_input.c			optional ddb
 ddb/db_lex.c			optional ddb
 ddb/db_main.c			optional ddb
 ddb/db_output.c			optional ddb
 ddb/db_print.c			optional ddb
 ddb/db_ps.c			optional ddb
 ddb/db_run.c			optional ddb
 ddb/db_script.c			optional ddb
 ddb/db_sym.c			optional ddb
 ddb/db_thread.c			optional ddb
 ddb/db_textdump.c		optional ddb
 ddb/db_variables.c		optional ddb
 ddb/db_watch.c			optional ddb
 ddb/db_write_cmd.c		optional ddb
 dev/aac/aac.c			optional aac
 dev/aac/aac_cam.c		optional aacp aac
 dev/aac/aac_debug.c		optional aac
 dev/aac/aac_disk.c		optional aac
 dev/aac/aac_linux.c		optional aac compat_linux
 dev/aac/aac_pci.c		optional aac pci
 dev/aacraid/aacraid.c		optional aacraid
 dev/aacraid/aacraid_cam.c	optional aacraid scbus
 dev/aacraid/aacraid_debug.c	optional aacraid
 dev/aacraid/aacraid_linux.c	optional aacraid compat_linux
 dev/aacraid/aacraid_pci.c	optional aacraid pci
 dev/acpi_support/acpi_wmi.c	optional acpi_wmi acpi
 dev/acpi_support/acpi_asus.c	optional acpi_asus acpi
 dev/acpi_support/acpi_asus_wmi.c	optional acpi_asus_wmi acpi
 dev/acpi_support/acpi_fujitsu.c	optional acpi_fujitsu acpi
 dev/acpi_support/acpi_hp.c	optional acpi_hp acpi
 dev/acpi_support/acpi_ibm.c	optional acpi_ibm acpi
 dev/acpi_support/acpi_panasonic.c optional acpi_panasonic acpi
 dev/acpi_support/acpi_sony.c	optional acpi_sony acpi
 dev/acpi_support/acpi_toshiba.c	optional acpi_toshiba acpi
 dev/acpi_support/atk0110.c	optional aibs acpi
 dev/acpica/Osd/OsdDebug.c	optional acpi
 dev/acpica/Osd/OsdHardware.c	optional acpi
 dev/acpica/Osd/OsdInterrupt.c	optional acpi
 dev/acpica/Osd/OsdMemory.c	optional acpi
 dev/acpica/Osd/OsdSchedule.c	optional acpi
 dev/acpica/Osd/OsdStream.c	optional acpi
 dev/acpica/Osd/OsdSynch.c	optional acpi
 dev/acpica/Osd/OsdTable.c	optional acpi
 dev/acpica/acpi.c		optional acpi
 dev/acpica/acpi_acad.c		optional acpi
 dev/acpica/acpi_battery.c	optional acpi
 dev/acpica/acpi_button.c	optional acpi
 dev/acpica/acpi_cmbat.c		optional acpi
 dev/acpica/acpi_cpu.c		optional acpi
 dev/acpica/acpi_ec.c		optional acpi
 dev/acpica/acpi_isab.c		optional acpi isa
 dev/acpica/acpi_lid.c		optional acpi
 dev/acpica/acpi_package.c	optional acpi
 dev/acpica/acpi_pci.c		optional acpi pci
 dev/acpica/acpi_pci_link.c	optional acpi pci
 dev/acpica/acpi_pcib.c		optional acpi pci
 dev/acpica/acpi_pcib_acpi.c	optional acpi pci
 dev/acpica/acpi_pcib_pci.c	optional acpi pci
 dev/acpica/acpi_perf.c		optional acpi
 dev/acpica/acpi_powerres.c	optional acpi
 dev/acpica/acpi_quirk.c		optional acpi
 dev/acpica/acpi_resource.c	optional acpi
 dev/acpica/acpi_smbat.c		optional acpi
 dev/acpica/acpi_thermal.c	optional acpi
 dev/acpica/acpi_throttle.c	optional acpi
 dev/acpica/acpi_timer.c		optional acpi
 dev/acpica/acpi_video.c		optional acpi_video acpi
 dev/acpica/acpi_dock.c		optional acpi_dock acpi
 dev/adlink/adlink.c		optional adlink
 dev/advansys/adv_eisa.c		optional adv eisa
 dev/advansys/adv_pci.c		optional adv pci
 dev/advansys/advansys.c		optional adv
 dev/advansys/advlib.c		optional adv
 dev/advansys/advmcode.c		optional adv
 dev/advansys/adw_pci.c		optional adw pci
 dev/advansys/adwcam.c		optional adw
 dev/advansys/adwlib.c		optional adw
 dev/advansys/adwmcode.c		optional adw
 dev/ae/if_ae.c			optional ae pci
 dev/age/if_age.c		optional age pci
 dev/agp/agp.c			optional agp pci
 dev/agp/agp_if.m		optional agp pci
 dev/aha/aha.c			optional aha
 dev/aha/aha_isa.c		optional aha isa
 dev/aha/aha_mca.c		optional aha mca
 dev/ahb/ahb.c			optional ahb eisa
 dev/ahci/ahci.c			optional ahci
 dev/ahci/ahciem.c		optional ahci
 dev/ahci/ahci_pci.c		optional ahci pci
 dev/aic/aic.c			optional aic
 dev/aic/aic_pccard.c		optional aic pccard
 dev/aic7xxx/ahc_eisa.c		optional ahc eisa
 dev/aic7xxx/ahc_isa.c		optional ahc isa
 dev/aic7xxx/ahc_pci.c		optional ahc pci \
 	compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}"
 dev/aic7xxx/ahd_pci.c		optional ahd pci \
 	compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}"
 dev/aic7xxx/aic7770.c		optional ahc
 dev/aic7xxx/aic79xx.c		optional ahd pci
 dev/aic7xxx/aic79xx_osm.c	optional ahd pci
 dev/aic7xxx/aic79xx_pci.c	optional ahd pci
 dev/aic7xxx/aic79xx_reg_print.c	optional ahd pci ahd_reg_pretty_print
 dev/aic7xxx/aic7xxx.c		optional ahc
 dev/aic7xxx/aic7xxx_93cx6.c	optional ahc
 dev/aic7xxx/aic7xxx_osm.c	optional ahc
 dev/aic7xxx/aic7xxx_pci.c	optional ahc pci
 dev/aic7xxx/aic7xxx_reg_print.c	optional ahc ahc_reg_pretty_print
 dev/alc/if_alc.c		optional alc pci
 dev/ale/if_ale.c		optional ale pci
 dev/alpm/alpm.c			optional alpm pci
 dev/altera/avgen/altera_avgen.c		optional altera_avgen
 dev/altera/avgen/altera_avgen_fdt.c	optional altera_avgen fdt
 dev/altera/avgen/altera_avgen_nexus.c	optional altera_avgen
 dev/altera/sdcard/altera_sdcard.c	optional altera_sdcard
 dev/altera/sdcard/altera_sdcard_disk.c	optional altera_sdcard
 dev/altera/sdcard/altera_sdcard_io.c	optional altera_sdcard
 dev/altera/sdcard/altera_sdcard_fdt.c	optional altera_sdcard fdt
 dev/altera/sdcard/altera_sdcard_nexus.c	optional altera_sdcard
 dev/altera/pio/pio.c		optional altera_pio
 dev/altera/pio/pio_if.m		optional altera_pio
 dev/amdpm/amdpm.c		optional amdpm pci | nfpm pci
 dev/amdsmb/amdsmb.c		optional amdsmb pci
 dev/amr/amr.c			optional amr
 dev/amr/amr_cam.c		optional amrp amr
 dev/amr/amr_disk.c		optional amr
 dev/amr/amr_linux.c		optional amr compat_linux
 dev/amr/amr_pci.c		optional amr pci
 dev/an/if_an.c			optional an
 dev/an/if_an_isa.c		optional an isa
 dev/an/if_an_pccard.c		optional an pccard
 dev/an/if_an_pci.c		optional an pci
 #
 dev/ata/ata_if.m		optional ata | atacore
 dev/ata/ata-all.c		optional ata | atacore
 dev/ata/ata-dma.c		optional ata | atacore
 dev/ata/ata-lowlevel.c		optional ata | atacore
 dev/ata/ata-sata.c		optional ata | atacore
 dev/ata/ata-card.c		optional ata pccard | atapccard
 dev/ata/ata-cbus.c		optional ata pc98 | atapc98
 dev/ata/ata-isa.c		optional ata isa | ataisa
 dev/ata/ata-pci.c		optional ata pci | atapci
 dev/ata/chipsets/ata-acard.c	optional ata pci | ataacard
 dev/ata/chipsets/ata-acerlabs.c	optional ata pci | ataacerlabs
 dev/ata/chipsets/ata-amd.c	optional ata pci | ataamd
 dev/ata/chipsets/ata-ati.c	optional ata pci | ataati
 dev/ata/chipsets/ata-cenatek.c	optional ata pci | atacenatek
 dev/ata/chipsets/ata-cypress.c	optional ata pci | atacypress
 dev/ata/chipsets/ata-cyrix.c	optional ata pci | atacyrix
 dev/ata/chipsets/ata-highpoint.c	optional ata pci | atahighpoint
 dev/ata/chipsets/ata-intel.c	optional ata pci | ataintel
 dev/ata/chipsets/ata-ite.c	optional ata pci | ataite
 dev/ata/chipsets/ata-jmicron.c	optional ata pci | atajmicron
 dev/ata/chipsets/ata-marvell.c	optional ata pci | atamarvell
 dev/ata/chipsets/ata-micron.c	optional ata pci | atamicron
 dev/ata/chipsets/ata-national.c	optional ata pci | atanational
 dev/ata/chipsets/ata-netcell.c	optional ata pci | atanetcell
 dev/ata/chipsets/ata-nvidia.c	optional ata pci | atanvidia
 dev/ata/chipsets/ata-promise.c	optional ata pci | atapromise
 dev/ata/chipsets/ata-serverworks.c	optional ata pci | ataserverworks
 dev/ata/chipsets/ata-siliconimage.c	optional ata pci | atasiliconimage | ataati
 dev/ata/chipsets/ata-sis.c	optional ata pci | atasis
 dev/ata/chipsets/ata-via.c	optional ata pci | atavia
 #
 dev/ath/if_ath_pci.c		optional ath_pci pci \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 #
 dev/ath/if_ath_ahb.c		optional ath_ahb \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 #
 dev/ath/if_ath.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_alq.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_beacon.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_btcoex.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_debug.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_descdma.c	optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_keycache.c	optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_ioctl.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_led.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_lna_div.c	optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_tx.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_tx_edma.c	optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_tx_ht.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_tdma.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_sysctl.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_rx.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_rx_edma.c	optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/if_ath_spectral.c	optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ah_osdep.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 #
 dev/ath/ath_hal/ah.c		optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_hal/ah_eeprom_v1.c	optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_hal/ah_eeprom_v3.c	optional ath_hal | ath_ar5211 | ath_ar5212 \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_hal/ah_eeprom_v14.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_hal/ah_eeprom_v4k.c \
 	optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_hal/ah_eeprom_9287.c \
 	optional ath_hal | ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_hal/ah_regdomain.c	optional ath \
 	compile-with "${NORMAL_C} ${NO_WSHIFT_COUNT_NEGATIVE} ${NO_WSHIFT_COUNT_OVERFLOW} -I$S/dev/ath"
 # ar5210
 dev/ath/ath_hal/ar5210/ar5210_attach.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_beacon.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_interrupts.c	optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_keycache.c	optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_misc.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_phy.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_power.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_recv.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_reset.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5210/ar5210_xmit.c		optional ath_hal | ath_ar5210 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar5211
 dev/ath/ath_hal/ar5211/ar5211_attach.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_beacon.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_interrupts.c	optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_keycache.c	optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_misc.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_phy.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_power.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_recv.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_reset.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5211/ar5211_xmit.c		optional ath_hal | ath_ar5211 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar5212
 dev/ath/ath_hal/ar5212/ar5212_ani.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_attach.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_beacon.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_eeprom.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_gpio.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_interrupts.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_keycache.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_misc.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_phy.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_power.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_recv.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_reset.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_rfgain.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5212_xmit.c \
 	optional ath_hal | ath_ar5212 | ath_ar5416 | ath_ar9160 | ath_ar9280 | \
 	ath_ar9285 ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar5416 (depends on ar5212)
 dev/ath/ath_hal/ar5416/ar5416_ani.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_attach.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_beacon.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_btcoex.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_cal.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_cal_iq.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_cal_adcgain.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_cal_adcdc.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_eeprom.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_gpio.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_interrupts.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_keycache.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_misc.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_phy.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_power.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_radar.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_recv.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_reset.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_spectral.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar5416_xmit.c \
 	optional ath_hal | ath_ar5416 | ath_ar9160 | ath_ar9280 | ath_ar9285 | \
 	ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar9130 (depends upon ar5416) - also requires AH_SUPPORT_AR9130
 #
 # Since this is an embedded MAC SoC, there's no need to compile it into the
 # default HAL.
 dev/ath/ath_hal/ar9001/ar9130_attach.c optional ath_ar9130 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9001/ar9130_phy.c optional ath_ar9130 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9001/ar9130_eeprom.c optional ath_ar9130 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar9160 (depends on ar5416)
 dev/ath/ath_hal/ar9001/ar9160_attach.c optional ath_hal | ath_ar9160 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar9280 (depends on ar5416)
 dev/ath/ath_hal/ar9002/ar9280_attach.c optional ath_hal | ath_ar9280 | \
 	ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9280_olc.c optional ath_hal | ath_ar9280 | \
 	ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar9285 (depends on ar5416 and ar9280)
 dev/ath/ath_hal/ar9002/ar9285_attach.c optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9285_btcoex.c optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9285_reset.c optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9285_cal.c optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9285_phy.c optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9285_diversity.c optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 # ar9287 (depends on ar5416)
 dev/ath/ath_hal/ar9002/ar9287_attach.c optional ath_hal | ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9287_reset.c optional ath_hal | ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9287_cal.c optional ath_hal | ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9287_olc.c optional ath_hal | ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 
 # ar9300
 contrib/dev/ath/ath_hal/ar9300/ar9300_ani.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_attach.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_beacon.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_eeprom.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal ${NO_WCONSTANT_CONVERSION}"
 contrib/dev/ath/ath_hal/ar9300/ar9300_freebsd.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_gpio.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_interrupts.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_keycache.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_mci.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_misc.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_paprd.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_phy.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_power.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_radar.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_radio.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_recv.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_recv_ds.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_reset.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal ${NO_WSOMETIMES_UNINITIALIZED} -Wno-unused-function"
 contrib/dev/ath/ath_hal/ar9300/ar9300_stub.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_stub_funcs.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_spectral.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_timer.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_xmit.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 contrib/dev/ath/ath_hal/ar9300/ar9300_xmit_ds.c optional ath_hal | ath_ar9300 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal -I$S/contrib/dev/ath/ath_hal"
 
 # rf backends
 dev/ath/ath_hal/ar5212/ar2316.c	optional ath_rf2316 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar2317.c	optional ath_rf2317 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar2413.c	optional ath_hal | ath_rf2413 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar2425.c	optional ath_hal | ath_rf2425 | ath_rf2417 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5111.c	optional ath_hal | ath_rf5111 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5112.c	optional ath_hal | ath_rf5112 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5212/ar5413.c	optional ath_hal | ath_rf5413 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar5416/ar2133.c optional ath_hal | ath_ar5416 | \
 	ath_ar9130 | ath_ar9160 | ath_ar9280 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9280.c optional ath_hal | ath_ar9280 | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9285.c optional ath_hal | ath_ar9285 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 dev/ath/ath_hal/ar9002/ar9287.c optional ath_hal | ath_ar9287 \
 	compile-with "${NORMAL_C} -I$S/dev/ath -I$S/dev/ath/ath_hal"
 
 # ath rate control algorithms
 dev/ath/ath_rate/amrr/amrr.c	optional ath_rate_amrr \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_rate/onoe/onoe.c	optional ath_rate_onoe \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 dev/ath/ath_rate/sample/sample.c	optional ath_rate_sample \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 # ath DFS modules
 dev/ath/ath_dfs/null/dfs_null.c	optional ath \
 	compile-with "${NORMAL_C} -I$S/dev/ath"
 #
 dev/bce/if_bce.c		optional bce
 dev/bfe/if_bfe.c		optional bfe
 dev/bge/if_bge.c		optional bge
 dev/bktr/bktr_audio.c		optional bktr pci
 dev/bktr/bktr_card.c		optional bktr pci
 dev/bktr/bktr_core.c		optional bktr pci
 dev/bktr/bktr_i2c.c		optional bktr pci smbus
 dev/bktr/bktr_os.c		optional bktr pci
 dev/bktr/bktr_tuner.c		optional bktr pci
 dev/bktr/msp34xx.c		optional bktr pci
 dev/buslogic/bt.c		optional bt
 dev/buslogic/bt_eisa.c		optional bt eisa
 dev/buslogic/bt_isa.c		optional bt isa
 dev/buslogic/bt_mca.c		optional bt mca
 dev/buslogic/bt_pci.c		optional bt pci
 dev/bwi/bwimac.c		optional bwi
 dev/bwi/bwiphy.c		optional bwi
 dev/bwi/bwirf.c			optional bwi
 dev/bwi/if_bwi.c		optional bwi
 dev/bwi/if_bwi_pci.c		optional bwi pci
 # XXX Work around clang warning, until maintainer approves fix.
 dev/bwn/if_bwn.c		optional bwn siba_bwn \
 	compile-with "${NORMAL_C} ${NO_WSOMETIMES_UNINITIALIZED}"
 dev/cardbus/cardbus.c		optional cardbus
 dev/cardbus/cardbus_cis.c	optional cardbus
 dev/cardbus/cardbus_device.c	optional cardbus
 dev/cas/if_cas.c		optional cas
 dev/cfi/cfi_bus_fdt.c		optional cfi fdt
 dev/cfi/cfi_bus_nexus.c		optional cfi
 dev/cfi/cfi_core.c		optional cfi
 dev/cfi/cfi_dev.c		optional cfi
 dev/cfi/cfi_disk.c		optional cfid
 dev/ciss/ciss.c			optional ciss
 dev/cm/smc90cx6.c		optional cm
 dev/cmx/cmx.c			optional cmx
 dev/cmx/cmx_pccard.c		optional cmx pccard
 dev/cpufreq/ichss.c		optional cpufreq
 dev/cs/if_cs.c			optional cs
 dev/cs/if_cs_isa.c		optional cs isa
 dev/cs/if_cs_pccard.c		optional cs pccard
 dev/cxgb/cxgb_main.c		optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/cxgb_sge.c		optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_mc5.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_vsc7323.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_vsc8211.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_ael1002.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_aq100x.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_mv88e1xxx.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_xgmac.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_t3_hw.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/common/cxgb_tn1010.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/sys/uipc_mvec.c	optional cxgb pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgb/cxgb_t3fw.c		optional cxgb cxgb_t3fw \
 	compile-with "${NORMAL_C} -I$S/dev/cxgb"
 dev/cxgbe/t4_mp_ring.c		optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 dev/cxgbe/t4_main.c		optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 dev/cxgbe/t4_netmap.c		optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 dev/cxgbe/t4_sge.c		optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 dev/cxgbe/t4_l2t.c		optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 dev/cxgbe/t4_tracer.c		optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 dev/cxgbe/common/t4_hw.c	optional cxgbe pci \
 	compile-with "${NORMAL_C} -I$S/dev/cxgbe"
 t4fw_cfg.c		optional cxgbe					\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk t4fw_cfg.fw:t4fw_cfg t4fw_cfg_uwire.fw:t4fw_cfg_uwire t4fw.fw:t4fw -mt4fw_cfg -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"t4fw_cfg.c"
 t4fw_cfg.fwo		optional cxgbe					\
 	dependency	"t4fw_cfg.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"t4fw_cfg.fwo"
 t4fw_cfg.fw		optional cxgbe					\
 	dependency	"$S/dev/cxgbe/firmware/t4fw_cfg.txt"		\
 	compile-with	"${CP} ${.ALLSRC} ${.TARGET}"			\
 	no-obj no-implicit-rule						\
 	clean		"t4fw_cfg.fw"
 t4fw_cfg_uwire.fwo	optional cxgbe					\
 	dependency	"t4fw_cfg_uwire.fw"				\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"t4fw_cfg_uwire.fwo"
 t4fw_cfg_uwire.fw	optional cxgbe					\
 	dependency	"$S/dev/cxgbe/firmware/t4fw_cfg_uwire.txt"	\
 	compile-with	"${CP} ${.ALLSRC} ${.TARGET}"			\
 	no-obj no-implicit-rule						\
 	clean		"t4fw_cfg_uwire.fw"
 t4fw.fwo		optional cxgbe					\
 	dependency	"t4fw.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"t4fw.fwo"
 t4fw.fw			optional cxgbe					\
 	dependency	"$S/dev/cxgbe/firmware/t4fw-1.14.4.0.bin.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"t4fw.fw"
 t5fw_cfg.c		optional cxgbe					\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk t5fw_cfg.fw:t5fw_cfg t5fw.fw:t5fw -mt5fw_cfg -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"t5fw_cfg.c"
 t5fw_cfg.fwo		optional cxgbe					\
 	dependency	"t5fw_cfg.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"t5fw_cfg.fwo"
 t5fw_cfg.fw		optional cxgbe					\
 	dependency	"$S/dev/cxgbe/firmware/t5fw_cfg.txt"		\
 	compile-with	"${CP} ${.ALLSRC} ${.TARGET}"			\
 	no-obj no-implicit-rule						\
 	clean		"t5fw_cfg.fw"
 t5fw.fwo		optional cxgbe					\
 	dependency	"t5fw.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"t5fw.fwo"
 t5fw.fw			optional cxgbe					\
 	dependency	"$S/dev/cxgbe/firmware/t5fw-1.14.4.0.bin.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"t5fw.fw"
 dev/cy/cy.c			optional cy
 dev/cy/cy_isa.c			optional cy isa
 dev/cy/cy_pci.c			optional cy pci
 dev/cyapa/cyapa.c		optional cyapa smbus
 dev/dc/if_dc.c			optional dc pci
 dev/dc/dcphy.c			optional dc pci
 dev/dc/pnphy.c			optional dc pci
 dev/dcons/dcons.c		optional dcons
 dev/dcons/dcons_crom.c		optional dcons_crom
 dev/dcons/dcons_os.c		optional dcons
 dev/de/if_de.c			optional de pci
 dev/digi/CX.c			optional digi_CX
 dev/digi/CX_PCI.c		optional digi_CX_PCI
 dev/digi/EPCX.c			optional digi_EPCX
 dev/digi/EPCX_PCI.c		optional digi_EPCX_PCI
 dev/digi/Xe.c			optional digi_Xe
 dev/digi/Xem.c			optional digi_Xem
 dev/digi/Xr.c			optional digi_Xr
 dev/digi/digi.c			optional digi
 dev/digi/digi_isa.c		optional digi isa
 dev/digi/digi_pci.c		optional digi pci
 dev/dpt/dpt_eisa.c		optional dpt eisa
 dev/dpt/dpt_pci.c		optional dpt pci
 dev/dpt/dpt_scsi.c		optional dpt
 dev/drm/ati_pcigart.c		optional drm
 dev/drm/drm_agpsupport.c	optional drm
 dev/drm/drm_auth.c		optional drm
 dev/drm/drm_bufs.c		optional drm
 dev/drm/drm_context.c		optional drm
 dev/drm/drm_dma.c		optional drm
 dev/drm/drm_drawable.c		optional drm
 dev/drm/drm_drv.c		optional drm
 dev/drm/drm_fops.c		optional drm
 dev/drm/drm_hashtab.c		optional drm
 dev/drm/drm_ioctl.c		optional drm
 dev/drm/drm_irq.c		optional drm
 dev/drm/drm_lock.c		optional drm
 dev/drm/drm_memory.c		optional drm
 dev/drm/drm_mm.c		optional drm
 dev/drm/drm_pci.c		optional drm
 dev/drm/drm_scatter.c		optional drm
 dev/drm/drm_sman.c		optional drm
 dev/drm/drm_sysctl.c		optional drm
 dev/drm/drm_vm.c		optional drm
 dev/drm/i915_dma.c		optional i915drm
 dev/drm/i915_drv.c		optional i915drm
 dev/drm/i915_irq.c		optional i915drm
 dev/drm/i915_mem.c		optional i915drm
 dev/drm/i915_suspend.c		optional i915drm
 dev/drm/mach64_dma.c		optional mach64drm
 dev/drm/mach64_drv.c		optional mach64drm
 dev/drm/mach64_irq.c		optional mach64drm
 dev/drm/mach64_state.c		optional mach64drm
 dev/drm/mga_dma.c		optional mgadrm
 dev/drm/mga_drv.c		optional mgadrm
 dev/drm/mga_irq.c		optional mgadrm
 dev/drm/mga_state.c		optional mgadrm
 dev/drm/mga_warp.c		optional mgadrm
 dev/drm/r128_cce.c		optional r128drm \
 	compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}"
 dev/drm/r128_drv.c		optional r128drm
 dev/drm/r128_irq.c		optional r128drm
 dev/drm/r128_state.c		optional r128drm
 dev/drm/r300_cmdbuf.c		optional radeondrm
 dev/drm/r600_blit.c		optional radeondrm
 dev/drm/r600_cp.c		optional radeondrm \
 	compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}"
 dev/drm/radeon_cp.c		optional radeondrm \
 	compile-with "${NORMAL_C} ${NO_WCONSTANT_CONVERSION}"
 dev/drm/radeon_cs.c		optional radeondrm
 dev/drm/radeon_drv.c		optional radeondrm
 dev/drm/radeon_irq.c		optional radeondrm
 dev/drm/radeon_mem.c		optional radeondrm
 dev/drm/radeon_state.c		optional radeondrm
 dev/drm/savage_bci.c		optional savagedrm
 dev/drm/savage_drv.c		optional savagedrm
 dev/drm/savage_state.c		optional savagedrm
 dev/drm/sis_drv.c		optional sisdrm
 dev/drm/sis_ds.c		optional sisdrm
 dev/drm/sis_mm.c		optional sisdrm
 dev/drm/tdfx_drv.c		optional tdfxdrm
 dev/drm/via_dma.c		optional viadrm
 dev/drm/via_dmablit.c		optional viadrm
 dev/drm/via_drv.c		optional viadrm
 dev/drm/via_irq.c		optional viadrm
 dev/drm/via_map.c		optional viadrm
 dev/drm/via_mm.c		optional viadrm
 dev/drm/via_verifier.c		optional viadrm
 dev/drm/via_video.c		optional viadrm
 dev/ed/if_ed.c			optional ed
 dev/ed/if_ed_novell.c		optional ed
 dev/ed/if_ed_rtl80x9.c		optional ed
 dev/ed/if_ed_pccard.c		optional ed pccard
 dev/ed/if_ed_pci.c		optional ed pci
 dev/eisa/eisa_if.m		standard
 dev/eisa/eisaconf.c		optional eisa
 dev/e1000/if_em.c		optional em \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/if_lem.c		optional em \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/if_igb.c		optional igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_80003es2lan.c	optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_82540.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_82541.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_82542.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_82543.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_82571.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_82575.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_ich8lan.c	optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_i210.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_api.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_mac.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_manage.c	optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_nvm.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_phy.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_vf.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_mbx.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/e1000/e1000_osdep.c		optional em | igb \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/et/if_et.c			optional et
 dev/en/if_en_pci.c		optional en pci
 dev/en/midway.c			optional en
 dev/ep/if_ep.c			optional ep
 dev/ep/if_ep_eisa.c		optional ep eisa
 dev/ep/if_ep_isa.c		optional ep isa
 dev/ep/if_ep_mca.c		optional ep mca
 dev/ep/if_ep_pccard.c		optional ep pccard
 dev/esp/esp_pci.c		optional esp pci
 dev/esp/ncr53c9x.c		optional esp
 dev/etherswitch/arswitch/arswitch.c		optional arswitch
 dev/etherswitch/arswitch/arswitch_reg.c		optional arswitch
 dev/etherswitch/arswitch/arswitch_phy.c		optional arswitch
 dev/etherswitch/arswitch/arswitch_8216.c	optional arswitch
 dev/etherswitch/arswitch/arswitch_8226.c	optional arswitch
 dev/etherswitch/arswitch/arswitch_8316.c	optional arswitch
 dev/etherswitch/arswitch/arswitch_8327.c	optional arswitch
 dev/etherswitch/arswitch/arswitch_7240.c	optional arswitch
 dev/etherswitch/arswitch/arswitch_9340.c	optional arswitch
 dev/etherswitch/arswitch/arswitch_vlans.c	optional arswitch
 dev/etherswitch/etherswitch.c		optional etherswitch
 dev/etherswitch/etherswitch_if.m	optional etherswitch
 dev/etherswitch/ip17x/ip17x.c		optional ip17x
 dev/etherswitch/ip17x/ip175c.c		optional ip17x
 dev/etherswitch/ip17x/ip175d.c		optional ip17x
 dev/etherswitch/ip17x/ip17x_phy.c	optional ip17x
 dev/etherswitch/ip17x/ip17x_vlans.c	optional ip17x
 dev/etherswitch/miiproxy.c		optional miiproxy
 dev/etherswitch/rtl8366/rtl8366rb.c	optional rtl8366rb
 dev/etherswitch/ukswitch/ukswitch.c	optional ukswitch
 dev/ex/if_ex.c			optional ex
 dev/ex/if_ex_isa.c		optional ex isa
 dev/ex/if_ex_pccard.c		optional ex pccard
 dev/exca/exca.c			optional cbb
 dev/fatm/if_fatm.c		optional fatm pci
 dev/fb/fbd.c			optional fbd | vt
 dev/fb/fb_if.m			standard
 dev/fb/splash.c			optional sc splash
 dev/fdt/fdt_clock.c		optional fdt fdt_clock
 dev/fdt/fdt_clock_if.m		optional fdt fdt_clock
 dev/fdt/fdt_common.c		optional fdt
 dev/fdt/fdt_pinctrl.c		optional fdt fdt_pinctrl
 dev/fdt/fdt_pinctrl_if.m	optional fdt fdt_pinctrl
 dev/fdt/fdt_slicer.c		optional fdt cfi | fdt nand
 dev/fdt/fdt_static_dtb.S	optional fdt fdt_dtb_static \
 	dependency	"$S/boot/fdt/dts/${MACHINE}/${FDT_DTS_FILE}"
 dev/fdt/simplebus.c		optional fdt
 dev/fe/if_fe.c			optional fe
 dev/fe/if_fe_pccard.c		optional fe pccard
 dev/filemon/filemon.c		optional filemon
 dev/firewire/firewire.c		optional firewire
 dev/firewire/fwcrom.c		optional firewire
 dev/firewire/fwdev.c		optional firewire
 dev/firewire/fwdma.c		optional firewire
 dev/firewire/fwmem.c		optional firewire
 dev/firewire/fwohci.c		optional firewire
 dev/firewire/fwohci_pci.c	optional firewire pci
 dev/firewire/if_fwe.c		optional fwe
 dev/firewire/if_fwip.c		optional fwip
 dev/firewire/sbp.c		optional sbp
 dev/firewire/sbp_targ.c		optional sbp_targ
 dev/flash/at45d.c		optional at45d
 dev/flash/mx25l.c		optional mx25l
 dev/fxp/if_fxp.c		optional fxp
 dev/fxp/inphy.c			optional fxp
 dev/gem/if_gem.c		optional gem
 dev/gem/if_gem_pci.c		optional gem pci
 dev/gem/if_gem_sbus.c		optional gem sbus
 dev/gpio/gpiobacklight.c	optional gpiobacklight fdt
 dev/gpio/gpiobus.c		optional gpio				\
 	dependency	"gpiobus_if.h"
 dev/gpio/gpioc.c		optional gpio				\
 	dependency	"gpio_if.h"
 dev/gpio/gpioiic.c		optional gpioiic
 dev/gpio/gpioled.c		optional gpioled
 dev/gpio/gpio_if.m		optional gpio
 dev/gpio/gpiobus_if.m		optional gpio
 dev/gpio/ofw_gpiobus.c		optional fdt gpio
 dev/hatm/if_hatm.c		optional hatm pci
 dev/hatm/if_hatm_intr.c		optional hatm pci
 dev/hatm/if_hatm_ioctl.c	optional hatm pci
 dev/hatm/if_hatm_rx.c		optional hatm pci
 dev/hatm/if_hatm_tx.c		optional hatm pci
 dev/hifn/hifn7751.c		optional hifn
 dev/hme/if_hme.c		optional hme
 dev/hme/if_hme_pci.c		optional hme pci
 dev/hme/if_hme_sbus.c		optional hme sbus
 dev/hptiop/hptiop.c		optional hptiop scbus
 dev/hwpmc/hwpmc_logging.c	optional hwpmc
 dev/hwpmc/hwpmc_mod.c		optional hwpmc
 dev/hwpmc/hwpmc_soft.c		optional hwpmc
 dev/ichiic/ig4_iic.c		optional ig4 smbus
 dev/ichiic/ig4_pci.c		optional ig4 pci smbus
 dev/ichsmb/ichsmb.c		optional ichsmb
 dev/ichsmb/ichsmb_pci.c		optional ichsmb pci
 dev/ida/ida.c			optional ida
 dev/ida/ida_disk.c		optional ida
 dev/ida/ida_eisa.c		optional ida eisa
 dev/ida/ida_pci.c		optional ida pci
 dev/ie/if_ie.c			optional ie isa nowerror
 dev/ie/if_ie_isa.c		optional ie isa
 dev/iicbus/ad7418.c		optional ad7418
 dev/iicbus/ds1307.c		optional ds1307
 dev/iicbus/ds133x.c		optional ds133x
 dev/iicbus/ds1374.c		optional ds1374
 dev/iicbus/ds1672.c		optional ds1672
 dev/iicbus/ds3231.c		optional ds3231
 dev/iicbus/icee.c		optional icee
 dev/iicbus/if_ic.c		optional ic
 dev/iicbus/iic.c		optional iic
 dev/iicbus/iicbb.c		optional iicbb
 dev/iicbus/iicbb_if.m		optional iicbb
 dev/iicbus/iicbus.c		optional iicbus
 dev/iicbus/iicbus_if.m		optional iicbus
 dev/iicbus/iiconf.c		optional iicbus
 dev/iicbus/iicsmb.c		optional iicsmb				\
 	dependency	"iicbus_if.h"
 dev/iicbus/iicoc.c		optional iicoc
 dev/iicbus/lm75.c		optional lm75
 dev/iicbus/pcf8563.c		optional pcf8563
 dev/iicbus/s35390a.c		optional s35390a
 dev/iir/iir.c			optional iir
 dev/iir/iir_ctrl.c		optional iir
 dev/iir/iir_pci.c		optional iir pci
 dev/intpm/intpm.c		optional intpm pci
 # XXX Work around clang warning, until maintainer approves fix.
 dev/ips/ips.c			optional ips \
 	compile-with "${NORMAL_C} ${NO_WSOMETIMES_UNINITIALIZED}"
 dev/ips/ips_commands.c		optional ips
 dev/ips/ips_disk.c		optional ips
 dev/ips/ips_ioctl.c		optional ips
 dev/ips/ips_pci.c		optional ips pci
 dev/ipw/if_ipw.c		optional ipw
 ipwbssfw.c			optional ipwbssfw | ipwfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk ipw_bss.fw:ipw_bss:130 -lintel_ipw -mipw_bss -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"ipwbssfw.c"
 ipw_bss.fwo			optional ipwbssfw | ipwfw		\
 	dependency	"ipw_bss.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"ipw_bss.fwo"
 ipw_bss.fw			optional ipwbssfw | ipwfw		\
 	dependency	"$S/contrib/dev/ipw/ipw2100-1.3.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"ipw_bss.fw"
 ipwibssfw.c			optional ipwibssfw | ipwfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk ipw_ibss.fw:ipw_ibss:130 -lintel_ipw -mipw_ibss -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"ipwibssfw.c"
 ipw_ibss.fwo			optional ipwibssfw | ipwfw		\
 	dependency	"ipw_ibss.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"ipw_ibss.fwo"
 ipw_ibss.fw			optional ipwibssfw | ipwfw		\
 	dependency	"$S/contrib/dev/ipw/ipw2100-1.3-i.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"ipw_ibss.fw"
 ipwmonitorfw.c			optional ipwmonitorfw | ipwfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk ipw_monitor.fw:ipw_monitor:130 -lintel_ipw -mipw_monitor -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"ipwmonitorfw.c"
 ipw_monitor.fwo			optional ipwmonitorfw | ipwfw		\
 	dependency	"ipw_monitor.fw"				\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"ipw_monitor.fwo"
 ipw_monitor.fw			optional ipwmonitorfw | ipwfw		\
 	dependency	"$S/contrib/dev/ipw/ipw2100-1.3-p.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"ipw_monitor.fw"
 dev/iscsi/icl.c			optional iscsi | ctl 
 dev/iscsi/icl_conn_if.m		optional iscsi | ctl 
 dev/iscsi/icl_proxy.c		optional iscsi | ctl
 dev/iscsi/icl_soft.c		optional iscsi | ctl 
 dev/iscsi/iscsi.c		optional iscsi scbus
 dev/iscsi_initiator/iscsi.c	optional iscsi_initiator scbus
 dev/iscsi_initiator/iscsi_subr.c	optional iscsi_initiator scbus
 dev/iscsi_initiator/isc_cam.c	optional iscsi_initiator scbus
 dev/iscsi_initiator/isc_soc.c	optional iscsi_initiator scbus
 dev/iscsi_initiator/isc_sm.c	optional iscsi_initiator scbus
 dev/iscsi_initiator/isc_subr.c	optional iscsi_initiator scbus
 dev/ismt/ismt.c			optional ismt
 dev/isl/isl.c			optional isl smbus
 dev/isp/isp.c			optional isp
 dev/isp/isp_freebsd.c		optional isp
 dev/isp/isp_library.c		optional isp
 dev/isp/isp_pci.c		optional isp pci
 dev/isp/isp_sbus.c		optional isp sbus
 dev/isp/isp_target.c		optional isp
 dev/ispfw/ispfw.c		optional ispfw
 dev/iwi/if_iwi.c		optional iwi
 iwibssfw.c			optional iwibssfw | iwifw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwi_bss.fw:iwi_bss:300 -lintel_iwi -miwi_bss -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwibssfw.c"
 iwi_bss.fwo			optional iwibssfw | iwifw		\
 	dependency	"iwi_bss.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwi_bss.fwo"
 iwi_bss.fw			optional iwibssfw | iwifw		\
 	dependency	"$S/contrib/dev/iwi/ipw2200-bss.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwi_bss.fw"
 iwiibssfw.c			optional iwiibssfw | iwifw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwi_ibss.fw:iwi_ibss:300 -lintel_iwi -miwi_ibss -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwiibssfw.c"
 iwi_ibss.fwo			optional iwiibssfw | iwifw		\
 	dependency	"iwi_ibss.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwi_ibss.fwo"
 iwi_ibss.fw			optional iwiibssfw | iwifw		\
 	dependency	"$S/contrib/dev/iwi/ipw2200-ibss.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwi_ibss.fw"
 iwimonitorfw.c			optional iwimonitorfw | iwifw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwi_monitor.fw:iwi_monitor:300 -lintel_iwi -miwi_monitor -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwimonitorfw.c"
 iwi_monitor.fwo			optional iwimonitorfw | iwifw		\
 	dependency	"iwi_monitor.fw"				\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwi_monitor.fwo"
 iwi_monitor.fw			optional iwimonitorfw | iwifw		\
 	dependency	"$S/contrib/dev/iwi/ipw2200-sniffer.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwi_monitor.fw"
 dev/iwm/if_iwm.c		optional iwm
 dev/iwm/if_iwm_binding.c	optional iwm
 dev/iwm/if_iwm_mac_ctxt.c	optional iwm
 dev/iwm/if_iwm_pcie_trans.c	optional iwm
 dev/iwm/if_iwm_phy_ctxt.c	optional iwm
 dev/iwm/if_iwm_phy_db.c		optional iwm
 dev/iwm/if_iwm_power.c		optional iwm
 dev/iwm/if_iwm_scan.c		optional iwm
 dev/iwm/if_iwm_time_event.c	optional iwm
 dev/iwm/if_iwm_util.c		optional iwm
 iwm3160fw.c			optional iwm3160fw | iwmfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwm3160.fw:iwm3160fw -miwm3160fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwm3160fw.c"
 iwm3160fw.fwo			optional iwm3160fw | iwmfw		\
 	dependency	"iwm3160.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwm3160fw.fwo"
 iwm3160.fw			optional iwm3160fw | iwmfw		\
 	dependency	"$S/contrib/dev/iwm/iwm-3160-9.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwm3160.fw"
 iwm7260fw.c			optional iwm7260fw | iwmfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwm7260.fw:iwm7260fw -miwm7260fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwm7260fw.c"
 iwm7260fw.fwo			optional iwm7260fw | iwmfw		\
 	dependency	"iwm7260.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwm7260fw.fwo"
 iwm7260.fw			optional iwm7260fw | iwmfw		\
 	dependency	"$S/contrib/dev/iwm/iwm-7260-9.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwm7260.fw"
 iwm7265fw.c			optional iwm7265fw | iwmfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwm7265.fw:iwm7265fw -miwm7265fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwm7265fw.c"
 iwm7265fw.fwo			optional iwm7265fw | iwmfw		\
 	dependency	"iwm7265.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwm7265fw.fwo"
 iwm7265.fw			optional iwm7265fw | iwmfw		\
 	dependency	"$S/contrib/dev/iwm/iwm-7265-9.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwm7265.fw"
 dev/iwn/if_iwn.c		optional iwn
 iwn1000fw.c			optional iwn1000fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn1000.fw:iwn1000fw -miwn1000fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn1000fw.c"
 iwn1000fw.fwo			optional iwn1000fw | iwnfw		\
 	dependency	"iwn1000.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn1000fw.fwo"
 iwn1000.fw			optional iwn1000fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-1000-39.31.5.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn1000.fw"
 iwn100fw.c			optional iwn100fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn100.fw:iwn100fw -miwn100fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn100fw.c"
 iwn100fw.fwo			optional iwn100fw | iwnfw		\
 	dependency	"iwn100.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn100fw.fwo"
 iwn100.fw			optional iwn100fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-100-39.31.5.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn100.fw"
 iwn105fw.c			optional iwn105fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn105.fw:iwn105fw -miwn105fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn105fw.c"
 iwn105fw.fwo			optional iwn105fw | iwnfw		\
 	dependency	"iwn105.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn105fw.fwo"
 iwn105.fw			optional iwn105fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-105-6-18.168.6.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn105.fw"
 iwn135fw.c			optional iwn135fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn135.fw:iwn135fw -miwn135fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn135fw.c"
 iwn135fw.fwo			optional iwn135fw | iwnfw		\
 	dependency	"iwn135.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn135fw.fwo"
 iwn135.fw			optional iwn135fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-135-6-18.168.6.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn135.fw"
 iwn2000fw.c			optional iwn2000fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn2000.fw:iwn2000fw -miwn2000fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn2000fw.c"
 iwn2000fw.fwo			optional iwn2000fw | iwnfw		\
 	dependency	"iwn2000.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn2000fw.fwo"
 iwn2000.fw			optional iwn2000fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-2000-18.168.6.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn2000.fw"
 iwn2030fw.c			optional iwn2030fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn2030.fw:iwn2030fw -miwn2030fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn2030fw.c"
 iwn2030fw.fwo			optional iwn2030fw | iwnfw		\
 	dependency	"iwn2030.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn2030fw.fwo"
 iwn2030.fw			optional iwn2030fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwnwifi-2030-18.168.6.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn2030.fw"
 iwn4965fw.c			optional iwn4965fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn4965.fw:iwn4965fw -miwn4965fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn4965fw.c"
 iwn4965fw.fwo			optional iwn4965fw | iwnfw		\
 	dependency	"iwn4965.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn4965fw.fwo"
 iwn4965.fw			optional iwn4965fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-4965-228.61.2.24.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn4965.fw"
 iwn5000fw.c			optional iwn5000fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn5000.fw:iwn5000fw -miwn5000fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn5000fw.c"
 iwn5000fw.fwo		optional iwn5000fw | iwnfw			\
 	dependency	"iwn5000.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn5000fw.fwo"
 iwn5000.fw			optional iwn5000fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-5000-8.83.5.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn5000.fw"
 iwn5150fw.c			optional iwn5150fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn5150.fw:iwn5150fw -miwn5150fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn5150fw.c"
 iwn5150fw.fwo			optional iwn5150fw | iwnfw		\
 	dependency	"iwn5150.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn5150fw.fwo"
 iwn5150.fw			optional iwn5150fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-5150-8.24.2.2.fw.uu"\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn5150.fw"
 iwn6000fw.c			optional iwn6000fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn6000.fw:iwn6000fw -miwn6000fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn6000fw.c"
 iwn6000fw.fwo			optional iwn6000fw | iwnfw		\
 	dependency	"iwn6000.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn6000fw.fwo"
 iwn6000.fw			optional iwn6000fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-6000-9.221.4.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn6000.fw"
 iwn6000g2afw.c			optional iwn6000g2afw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn6000g2a.fw:iwn6000g2afw -miwn6000g2afw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn6000g2afw.c"
 iwn6000g2afw.fwo		optional iwn6000g2afw | iwnfw		\
 	dependency	"iwn6000g2a.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn6000g2afw.fwo"
 iwn6000g2a.fw			optional iwn6000g2afw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-6000g2a-18.168.6.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn6000g2a.fw"
 iwn6000g2bfw.c			optional iwn6000g2bfw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn6000g2b.fw:iwn6000g2bfw -miwn6000g2bfw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn6000g2bfw.c"
 iwn6000g2bfw.fwo		optional iwn6000g2bfw | iwnfw		\
 	dependency	"iwn6000g2b.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn6000g2bfw.fwo"
 iwn6000g2b.fw			optional iwn6000g2bfw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-6000g2b-18.168.6.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn6000g2b.fw"
 iwn6050fw.c			optional iwn6050fw | iwnfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk iwn6050.fw:iwn6050fw -miwn6050fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"iwn6050fw.c"
 iwn6050fw.fwo			optional iwn6050fw | iwnfw		\
 	dependency	"iwn6050.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"iwn6050fw.fwo"
 iwn6050.fw			optional iwn6050fw | iwnfw		\
 	dependency	"$S/contrib/dev/iwn/iwlwifi-6050-41.28.5.1.fw.uu" \
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"iwn6050.fw"
 dev/ixgb/if_ixgb.c		optional ixgb
 dev/ixgb/ixgb_ee.c		optional ixgb
 dev/ixgb/ixgb_hw.c		optional ixgb
 dev/ixgbe/if_ix.c		optional ix inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe -DSMP"
 dev/ixgbe/if_ixv.c		optional ixv inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe -DSMP"
 dev/ixgbe/ix_txrx.c		optional ix inet | ixv inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_osdep.c		optional ix inet | ixv inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_phy.c		optional ix inet | ixv inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_api.c		optional ix inet | ixv inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_common.c	optional ix inet | ixv inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_mbx.c		optional ix inet | ixv inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_vf.c		optional ix inet | ixv inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_82598.c		optional ix inet | ixv inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_82599.c		optional ix inet | ixv inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_x540.c		optional ix inet | ixv inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_x550.c		optional ix inet | ixv inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_dcb.c		optional ix inet | ixv inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_dcb_82598.c	optional ix inet | ixv inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/ixgbe/ixgbe_dcb_82599.c	optional ix inet | ixv inet \
 	compile-with "${NORMAL_C} -I$S/dev/ixgbe"
 dev/jme/if_jme.c		optional jme pci
 dev/joy/joy.c			optional joy
 dev/joy/joy_isa.c		optional joy isa
 dev/kbd/kbd.c			optional atkbd | pckbd | sc | ukbd | vt
 dev/kbdmux/kbdmux.c		optional kbdmux
 dev/ksyms/ksyms.c		optional ksyms
 dev/le/am7990.c			optional le
 dev/le/am79900.c		optional le
 dev/le/if_le_pci.c		optional le pci
 dev/le/lance.c			optional le
 dev/led/led.c			standard
 dev/lge/if_lge.c		optional lge
 dev/lmc/if_lmc.c		optional lmc
 dev/malo/if_malo.c		optional malo
 dev/malo/if_malohal.c		optional malo
 dev/malo/if_malo_pci.c		optional malo pci
 dev/mc146818/mc146818.c		optional mc146818
 dev/mca/mca_bus.c		optional mca
 dev/mcd/mcd.c			optional mcd isa nowerror
 dev/mcd/mcd_isa.c		optional mcd isa nowerror
 dev/md/md.c			optional md
 dev/mdio/mdio_if.m		optional miiproxy | mdio
 dev/mdio/mdio.c			optional miiproxy | mdio
 dev/mem/memdev.c		optional mem
 dev/mem/memutil.c		optional mem
 dev/mfi/mfi.c			optional mfi
 dev/mfi/mfi_debug.c		optional mfi
 dev/mfi/mfi_pci.c		optional mfi pci
 dev/mfi/mfi_disk.c		optional mfi
 dev/mfi/mfi_syspd.c		optional mfi
 dev/mfi/mfi_tbolt.c		optional mfi
 dev/mfi/mfi_linux.c		optional mfi compat_linux
 dev/mfi/mfi_cam.c		optional mfip scbus
 dev/mii/acphy.c			optional miibus | acphy
 dev/mii/amphy.c			optional miibus | amphy
 dev/mii/atphy.c			optional miibus | atphy
 dev/mii/axphy.c			optional miibus | axphy
 dev/mii/bmtphy.c		optional miibus | bmtphy
 dev/mii/brgphy.c		optional miibus | brgphy
 dev/mii/ciphy.c			optional miibus | ciphy
 dev/mii/e1000phy.c		optional miibus | e1000phy
 dev/mii/gentbi.c		optional miibus | gentbi
 dev/mii/icsphy.c		optional miibus | icsphy
 dev/mii/ip1000phy.c		optional miibus | ip1000phy
 dev/mii/jmphy.c			optional miibus | jmphy
 dev/mii/lxtphy.c		optional miibus | lxtphy
 dev/mii/mii.c			optional miibus | mii
 dev/mii/mii_bitbang.c		optional miibus | mii_bitbang
 dev/mii/mii_physubr.c		optional miibus | mii
 dev/mii/miibus_if.m		optional miibus | mii
 dev/mii/mlphy.c			optional miibus | mlphy
 dev/mii/nsgphy.c		optional miibus | nsgphy
 dev/mii/nsphy.c			optional miibus | nsphy
 dev/mii/nsphyter.c		optional miibus | nsphyter
 dev/mii/pnaphy.c		optional miibus | pnaphy
 dev/mii/qsphy.c			optional miibus | qsphy
 dev/mii/rdcphy.c		optional miibus | rdcphy
 dev/mii/rgephy.c		optional miibus | rgephy
 dev/mii/rlphy.c			optional miibus | rlphy
 dev/mii/rlswitch.c		optional rlswitch
 dev/mii/smcphy.c		optional miibus | smcphy
 dev/mii/smscphy.c		optional miibus | smscphy
 dev/mii/tdkphy.c		optional miibus | tdkphy
 dev/mii/tlphy.c			optional miibus | tlphy
 dev/mii/truephy.c		optional miibus | truephy
 dev/mii/ukphy.c			optional miibus | mii
 dev/mii/ukphy_subr.c		optional miibus | mii
 dev/mii/xmphy.c			optional miibus | xmphy
 dev/mk48txx/mk48txx.c		optional mk48txx
 dev/mlx/mlx.c			optional mlx
 dev/mlx/mlx_disk.c		optional mlx
 dev/mlx/mlx_pci.c		optional mlx pci
 dev/mly/mly.c			optional mly
 dev/mmc/mmc.c			optional mmc
 dev/mmc/mmcbr_if.m		standard
 dev/mmc/mmcbus_if.m		standard
 dev/mmc/mmcsd.c			optional mmcsd
 dev/mn/if_mn.c			optional mn pci
 dev/mpr/mpr.c			optional mpr
 dev/mpr/mpr_config.c		optional mpr
 # XXX Work around clang warning, until maintainer approves fix.
 dev/mpr/mpr_mapping.c		optional mpr \
 	compile-with "${NORMAL_C} ${NO_WSOMETIMES_UNINITIALIZED}"
 dev/mpr/mpr_pci.c		optional mpr pci
 dev/mpr/mpr_sas.c		optional mpr \
 	compile-with "${NORMAL_C} ${NO_WUNNEEDED_INTERNAL_DECL}"
 dev/mpr/mpr_sas_lsi.c		optional mpr
 dev/mpr/mpr_table.c		optional mpr
 dev/mpr/mpr_user.c		optional mpr
 dev/mps/mps.c			optional mps
 dev/mps/mps_config.c		optional mps
 # XXX Work around clang warning, until maintainer approves fix.
 dev/mps/mps_mapping.c		optional mps \
 	compile-with "${NORMAL_C} ${NO_WSOMETIMES_UNINITIALIZED}"
 dev/mps/mps_pci.c		optional mps pci
 dev/mps/mps_sas.c		optional mps \
 	compile-with "${NORMAL_C} ${NO_WUNNEEDED_INTERNAL_DECL}"
 dev/mps/mps_sas_lsi.c		optional mps
 dev/mps/mps_table.c		optional mps
 dev/mps/mps_user.c		optional mps
 dev/mpt/mpt.c			optional mpt
 dev/mpt/mpt_cam.c		optional mpt
 dev/mpt/mpt_debug.c		optional mpt
 dev/mpt/mpt_pci.c		optional mpt pci
 dev/mpt/mpt_raid.c		optional mpt
 dev/mpt/mpt_user.c		optional mpt
 dev/mrsas/mrsas.c		optional mrsas
 dev/mrsas/mrsas_cam.c		optional mrsas
 dev/mrsas/mrsas_ioctl.c		optional mrsas
 dev/mrsas/mrsas_fp.c		optional mrsas
 dev/msk/if_msk.c		optional msk
 dev/mvs/mvs.c			optional mvs
 dev/mvs/mvs_if.m		optional mvs
 dev/mvs/mvs_pci.c		optional mvs pci
 dev/mwl/if_mwl.c		optional mwl
 dev/mwl/if_mwl_pci.c		optional mwl pci
 dev/mwl/mwlhal.c		optional mwl
 mwlfw.c				optional mwlfw				\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk mw88W8363.fw:mw88W8363fw mwlboot.fw:mwlboot -mmwl -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"mwlfw.c"
 mw88W8363.fwo		optional mwlfw					\
 	dependency	"mw88W8363.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"mw88W8363.fwo"
 mw88W8363.fw		optional mwlfw					\
 	dependency	"$S/contrib/dev/mwl/mw88W8363.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"mw88W8363.fw"
 mwlboot.fwo		optional mwlfw					\
 	dependency	"mwlboot.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"mwlboot.fwo"
 mwlboot.fw		optional mwlfw					\
 	dependency	"$S/contrib/dev/mwl/mwlboot.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"mwlboot.fw"
 dev/mxge/if_mxge.c		optional mxge pci
 dev/mxge/mxge_eth_z8e.c		optional mxge pci
 dev/mxge/mxge_ethp_z8e.c	optional mxge pci
 dev/mxge/mxge_rss_eth_z8e.c	optional mxge pci
 dev/mxge/mxge_rss_ethp_z8e.c	optional mxge pci
 dev/my/if_my.c			optional my
 dev/nand/nand.c			optional nand
 dev/nand/nand_bbt.c		optional nand
 dev/nand/nand_cdev.c		optional nand
 dev/nand/nand_generic.c		optional nand
 dev/nand/nand_geom.c		optional nand
 dev/nand/nand_id.c		optional nand
 dev/nand/nandbus.c		optional nand
 dev/nand/nandbus_if.m		optional nand
 dev/nand/nand_if.m		optional nand
 dev/nand/nandsim.c		optional nandsim nand
 dev/nand/nandsim_chip.c		optional nandsim nand
 dev/nand/nandsim_ctrl.c		optional nandsim nand
 dev/nand/nandsim_log.c		optional nandsim nand
 dev/nand/nandsim_swap.c		optional nandsim nand
 dev/nand/nfc_if.m		optional nand
 dev/ncr/ncr.c			optional ncr pci
 dev/ncv/ncr53c500.c		optional ncv
 dev/ncv/ncr53c500_pccard.c	optional ncv pccard
 dev/netmap/netmap.c		optional netmap
 dev/netmap/netmap_freebsd.c	optional netmap
 dev/netmap/netmap_generic.c	optional netmap
 dev/netmap/netmap_mbq.c		optional netmap
 dev/netmap/netmap_mem2.c	optional netmap
 dev/netmap/netmap_monitor.c	optional netmap
 dev/netmap/netmap_offloadings.c	optional netmap
 dev/netmap/netmap_pipe.c	optional netmap
 dev/netmap/netmap_vale.c	optional netmap
 # compile-with "${NORMAL_C} -Wconversion -Wextra"
 dev/nfsmb/nfsmb.c		optional nfsmb pci
 dev/nge/if_nge.c		optional nge
 dev/nxge/if_nxge.c		optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nxge/xgehal/xgehal-device.c	optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nxge/xgehal/xgehal-mm.c	optional nxge
 dev/nxge/xgehal/xge-queue.c	optional nxge
 dev/nxge/xgehal/xgehal-driver.c	optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nxge/xgehal/xgehal-ring.c	optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nxge/xgehal/xgehal-channel.c	optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nxge/xgehal/xgehal-fifo.c	optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nxge/xgehal/xgehal-stats.c	optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nxge/xgehal/xgehal-config.c	optional nxge
 dev/nxge/xgehal/xgehal-mgmt.c	optional nxge \
 	compile-with "${NORMAL_C} ${NO_WSELF_ASSIGN}"
 dev/nmdm/nmdm.c			optional nmdm
 dev/nsp/nsp.c			optional nsp
 dev/nsp/nsp_pccard.c		optional nsp pccard
 dev/null/null.c			standard
 dev/oce/oce_hw.c		optional oce pci
 dev/oce/oce_if.c		optional oce pci
 dev/oce/oce_mbox.c		optional oce pci
 dev/oce/oce_queue.c		optional oce pci
 dev/oce/oce_sysctl.c		optional oce pci
 dev/oce/oce_util.c		optional oce pci
 dev/ofw/ofw_bus_if.m		optional fdt
 dev/ofw/ofw_bus_subr.c		optional fdt
 dev/ofw/ofw_fdt.c		optional fdt
 dev/ofw/ofw_if.m		optional fdt
 dev/ofw/ofw_iicbus.c		optional fdt iicbus
 dev/ofw/ofw_subr.c		optional fdt
 dev/ofw/ofwbus.c		optional fdt
 dev/ofw/openfirm.c		optional fdt
 dev/ofw/openfirmio.c		optional fdt
 dev/ow/ow.c			optional ow				\
 	dependency	"owll_if.h"					\
 	dependency	"own_if.h"
 dev/ow/owll_if.m		optional ow
 dev/ow/own_if.m			optional ow
 dev/ow/ow_temp.c		optional ow_temp
 dev/ow/owc_gpiobus.c		optional owc gpio
 dev/patm/if_patm.c		optional patm pci
 dev/patm/if_patm_attach.c	optional patm pci
 dev/patm/if_patm_intr.c		optional patm pci
 dev/patm/if_patm_ioctl.c	optional patm pci
 dev/patm/if_patm_rtables.c	optional patm pci
 dev/patm/if_patm_rx.c		optional patm pci
 dev/patm/if_patm_tx.c		optional patm pci
 dev/pbio/pbio.c			optional pbio isa
 dev/pccard/card_if.m		standard
 dev/pccard/pccard.c		optional pccard
 dev/pccard/pccard_cis.c		optional pccard
 dev/pccard/pccard_cis_quirks.c	optional pccard
 dev/pccard/pccard_device.c	optional pccard
 dev/pccard/power_if.m		standard
 dev/pccbb/pccbb.c		optional cbb
 dev/pccbb/pccbb_isa.c		optional cbb isa
 dev/pccbb/pccbb_pci.c		optional cbb pci
 dev/pcf/pcf.c			optional pcf
 dev/pci/eisa_pci.c		optional pci eisa
 dev/pci/fixup_pci.c		optional pci
 dev/pci/hostb_pci.c		optional pci
 dev/pci/ignore_pci.c		optional pci
 dev/pci/isa_pci.c		optional pci isa
 dev/pci/pci.c			optional pci
 dev/pci/pci_if.m		standard
 dev/pci/pci_iov.c		optional pci pci_iov
 dev/pci/pci_iov_if.m		standard
 dev/pci/pci_iov_schema.c	optional pci pci_iov
 dev/pci/pci_pci.c		optional pci
 dev/pci/pci_subr.c		optional pci
 dev/pci/pci_user.c		optional pci
 dev/pci/pcib_if.m		standard
 dev/pci/pcib_support.c		standard
 dev/pci/vga_pci.c		optional pci
 dev/pcn/if_pcn.c		optional pcn pci
 dev/pdq/if_fea.c		optional fea eisa
 dev/pdq/if_fpa.c		optional fpa pci
 dev/pdq/pdq.c			optional nowerror fea eisa | fpa pci
 dev/pdq/pdq_ifsubr.c		optional nowerror fea eisa | fpa pci
 dev/pms/freebsd/driver/ini/src/agtiapi.c		optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/sallsdk/spc/sadisc.c			optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/sallsdk/spc/mpi.c			optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/sallsdk/spc/saframe.c			optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/sallsdk/spc/sahw.c			optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/sallsdk/spc/sainit.c			optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/sallsdk/spc/saint.c			optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/sallsdk/spc/sampicmd.c			optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/sallsdk/spc/sampirsp.c			optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/sallsdk/spc/saphy.c			optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/sallsdk/spc/saport.c			optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/sallsdk/spc/sasata.c			optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/sallsdk/spc/sasmp.c			optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/sallsdk/spc/sassp.c			optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/sallsdk/spc/satimer.c			optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/sallsdk/spc/sautil.c			optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/sallsdk/spc/saioctlcmd.c		optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/sallsdk/spc/mpidebug.c			optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/discovery/dm/dminit.c			optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/discovery/dm/dmsmp.c			optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/discovery/dm/dmdisc.c			optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/discovery/dm/dmport.c			optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/discovery/dm/dmtimer.c			optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/discovery/dm/dmmisc.c			optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/sat/src/sminit.c			optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/sat/src/smmisc.c			optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/sat/src/smsat.c				optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/sat/src/smsatcb.c			optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/sat/src/smsathw.c			optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/sat/src/smtimer.c			optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/tisa/sassata/common/tdinit.c		optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/tisa/sassata/common/tdmisc.c		optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/tisa/sassata/common/tdesgl.c		optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/tisa/sassata/common/tdport.c		optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/tisa/sassata/common/tdint.c		optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/tisa/sassata/common/tdioctl.c		optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/tisa/sassata/common/tdhw.c		optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/tisa/sassata/common/ossacmnapi.c	optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/tisa/sassata/common/tddmcmnapi.c	optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/tisa/sassata/common/tdsmcmnapi.c	optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/tisa/sassata/common/tdtimers.c		optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/tisa/sassata/sas/ini/itdio.c		optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/tisa/sassata/sas/ini/itdcb.c		optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/tisa/sassata/sas/ini/itdinit.c		optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/tisa/sassata/sas/ini/itddisc.c		optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/tisa/sassata/sata/host/sat.c		optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/tisa/sassata/sata/host/ossasat.c	optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/pms/RefTisa/tisa/sassata/sata/host/sathw.c		optional pmspcv \
 	compile-with "${NORMAL_C} -Wunused-variable -Woverflow -Wparentheses -w"
 dev/ppbus/if_plip.c		optional plip
 dev/ppbus/immio.c		optional vpo
 dev/ppbus/lpbb.c		optional lpbb
 dev/ppbus/lpt.c			optional lpt
 dev/ppbus/pcfclock.c		optional pcfclock
 dev/ppbus/ppb_1284.c		optional ppbus
 dev/ppbus/ppb_base.c		optional ppbus
 dev/ppbus/ppb_msq.c		optional ppbus
 dev/ppbus/ppbconf.c		optional ppbus
 dev/ppbus/ppbus_if.m		optional ppbus
 dev/ppbus/ppi.c			optional ppi
 dev/ppbus/pps.c			optional pps
 dev/ppbus/vpo.c			optional vpo
 dev/ppbus/vpoio.c		optional vpo
 dev/ppc/ppc.c			optional ppc
 dev/ppc/ppc_acpi.c		optional ppc acpi
 dev/ppc/ppc_isa.c		optional ppc isa
 dev/ppc/ppc_pci.c		optional ppc pci
 dev/ppc/ppc_puc.c		optional ppc puc
 dev/proto/proto_bus_isa.c	optional proto acpi | proto isa
 dev/proto/proto_bus_pci.c	optional proto pci
 dev/proto/proto_busdma.c	optional proto
 dev/proto/proto_core.c		optional proto
 dev/pst/pst-iop.c		optional pst
 dev/pst/pst-pci.c		optional pst pci
 dev/pst/pst-raid.c		optional pst
 dev/pty/pty.c			optional pty
 dev/puc/puc.c			optional puc
 dev/puc/puc_cfg.c		optional puc
 dev/puc/puc_pccard.c		optional puc pccard
 dev/puc/puc_pci.c		optional puc pci
 dev/puc/pucdata.c		optional puc pci
 dev/quicc/quicc_core.c		optional quicc
 dev/ral/rt2560.c		optional ral
 dev/ral/rt2661.c		optional ral
 dev/ral/rt2860.c		optional ral
 dev/ral/if_ral_pci.c		optional ral pci
 rt2561fw.c			optional rt2561fw | ralfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk rt2561.fw:rt2561fw -mrt2561 -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"rt2561fw.c"
 rt2561fw.fwo			optional rt2561fw | ralfw		\
 	dependency	"rt2561.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"rt2561fw.fwo"
 rt2561.fw			optional rt2561fw | ralfw		\
 	dependency	"$S/contrib/dev/ral/rt2561.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"rt2561.fw"
 rt2561sfw.c			optional rt2561sfw | ralfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk rt2561s.fw:rt2561sfw -mrt2561s -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"rt2561sfw.c"
 rt2561sfw.fwo			optional rt2561sfw | ralfw		\
 	dependency	"rt2561s.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"rt2561sfw.fwo"
 rt2561s.fw			optional rt2561sfw | ralfw		\
 	dependency	"$S/contrib/dev/ral/rt2561s.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"rt2561s.fw"
 rt2661fw.c			optional rt2661fw | ralfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk rt2661.fw:rt2661fw -mrt2661 -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"rt2661fw.c"
 rt2661fw.fwo			optional rt2661fw | ralfw		\
 	dependency	"rt2661.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"rt2661fw.fwo"
 rt2661.fw			optional rt2661fw | ralfw		\
 	dependency	"$S/contrib/dev/ral/rt2661.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"rt2661.fw"
 rt2860fw.c			optional rt2860fw | ralfw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk rt2860.fw:rt2860fw -mrt2860 -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"rt2860fw.c"
 rt2860fw.fwo			optional rt2860fw | ralfw		\
 	dependency	"rt2860.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"rt2860fw.fwo"
 rt2860.fw			optional rt2860fw | ralfw		\
 	dependency	"$S/contrib/dev/ral/rt2860.fw.uu"		\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"rt2860.fw"
 dev/random/random_infra.c	optional random
 dev/random/random_harvestq.c	optional random
 dev/random/randomdev.c		optional random random_yarrow | \
 					 random !random_yarrow !random_loadable
 dev/random/yarrow.c		optional random random_yarrow
 dev/random/fortuna.c		optional random !random_yarrow !random_loadable
 dev/random/hash.c		optional random random_yarrow | \
 					 random !random_yarrow !random_loadable
 dev/rc/rc.c			optional rc
 dev/rccgpio/rccgpio.c		optional rccgpio gpio
 dev/re/if_re.c			optional re
 dev/rl/if_rl.c			optional rl pci
 dev/rndtest/rndtest.c		optional rndtest
 dev/rp/rp.c			optional rp
 dev/rp/rp_isa.c			optional rp isa
 dev/rp/rp_pci.c			optional rp pci
 dev/safe/safe.c			optional safe
 dev/scc/scc_if.m		optional scc
 dev/scc/scc_bfe_ebus.c		optional scc ebus
 dev/scc/scc_bfe_quicc.c		optional scc quicc
 dev/scc/scc_bfe_sbus.c		optional scc fhc | scc sbus
 dev/scc/scc_core.c		optional scc
 dev/scc/scc_dev_quicc.c		optional scc quicc
 dev/scc/scc_dev_sab82532.c	optional scc
 dev/scc/scc_dev_z8530.c		optional scc
 dev/scd/scd.c			optional scd isa
 dev/scd/scd_isa.c		optional scd isa
 dev/sdhci/sdhci.c		optional sdhci
 dev/sdhci/sdhci_if.m		optional sdhci
 dev/sdhci/sdhci_pci.c		optional sdhci pci
 dev/sf/if_sf.c			optional sf pci
 dev/sge/if_sge.c		optional sge pci
 dev/si/si.c			optional si
 dev/si/si2_z280.c		optional si
 dev/si/si3_t225.c		optional si
 dev/si/si_eisa.c		optional si eisa
 dev/si/si_isa.c			optional si isa
 dev/si/si_pci.c			optional si pci
 dev/siba/siba.c			optional siba
 dev/siba/siba_bwn.c		optional siba_bwn pci
 dev/siba/siba_cc.c		optional siba
 dev/siba/siba_core.c		optional siba | siba_bwn pci
 dev/siba/siba_pcib.c		optional siba pci
 dev/siis/siis.c			optional siis pci
 dev/sis/if_sis.c		optional sis pci
 dev/sk/if_sk.c			optional sk pci
 dev/smbus/smb.c			optional smb
 dev/smbus/smbconf.c		optional smbus
 dev/smbus/smbus.c		optional smbus
 dev/smbus/smbus_if.m		optional smbus
 dev/smc/if_smc.c		optional smc
 dev/smc/if_smc_fdt.c		optional smc fdt
 dev/sn/if_sn.c			optional sn
 dev/sn/if_sn_isa.c		optional sn isa
 dev/sn/if_sn_pccard.c		optional sn pccard
 dev/snp/snp.c			optional snp
 dev/sound/clone.c		optional sound
 dev/sound/unit.c		optional sound
 dev/sound/isa/ad1816.c		optional snd_ad1816 isa
 dev/sound/isa/ess.c		optional snd_ess isa
 dev/sound/isa/gusc.c		optional snd_gusc isa
 dev/sound/isa/mss.c		optional snd_mss isa
 dev/sound/isa/sb16.c		optional snd_sb16 isa
 dev/sound/isa/sb8.c		optional snd_sb8 isa
 dev/sound/isa/sbc.c		optional snd_sbc isa
 dev/sound/isa/sndbuf_dma.c	optional sound isa
 dev/sound/pci/als4000.c		optional snd_als4000 pci
 dev/sound/pci/atiixp.c		optional snd_atiixp pci
 dev/sound/pci/cmi.c		optional snd_cmi pci
 dev/sound/pci/cs4281.c		optional snd_cs4281 pci
 dev/sound/pci/csa.c		optional snd_csa pci
 dev/sound/pci/csapcm.c		optional snd_csa pci
 dev/sound/pci/ds1.c		optional snd_ds1 pci
 dev/sound/pci/emu10k1.c		optional snd_emu10k1 pci
 dev/sound/pci/emu10kx.c		optional snd_emu10kx pci
 dev/sound/pci/emu10kx-pcm.c	optional snd_emu10kx pci
 dev/sound/pci/emu10kx-midi.c	optional snd_emu10kx pci
 dev/sound/pci/envy24.c		optional snd_envy24 pci
 dev/sound/pci/envy24ht.c	optional snd_envy24ht pci
 dev/sound/pci/es137x.c		optional snd_es137x pci
 dev/sound/pci/fm801.c		optional snd_fm801 pci
 dev/sound/pci/ich.c		optional snd_ich pci
 dev/sound/pci/maestro.c		optional snd_maestro pci
 dev/sound/pci/maestro3.c	optional snd_maestro3 pci
 dev/sound/pci/neomagic.c	optional snd_neomagic pci
 dev/sound/pci/solo.c		optional snd_solo pci
 dev/sound/pci/spicds.c		optional snd_spicds pci
 dev/sound/pci/t4dwave.c		optional snd_t4dwave pci
 dev/sound/pci/via8233.c		optional snd_via8233 pci
 dev/sound/pci/via82c686.c	optional snd_via82c686 pci
 dev/sound/pci/vibes.c		optional snd_vibes pci
 dev/sound/pci/hda/hdaa.c	optional snd_hda pci
 dev/sound/pci/hda/hdaa_patches.c	optional snd_hda pci
 dev/sound/pci/hda/hdac.c	optional snd_hda pci
 dev/sound/pci/hda/hdac_if.m	optional snd_hda pci
 dev/sound/pci/hda/hdacc.c	optional snd_hda pci
 dev/sound/pci/hdspe.c		optional snd_hdspe pci
 dev/sound/pci/hdspe-pcm.c	optional snd_hdspe pci
 dev/sound/pcm/ac97.c		optional sound
 dev/sound/pcm/ac97_if.m		optional sound
 dev/sound/pcm/ac97_patch.c	optional sound
 dev/sound/pcm/buffer.c		optional sound	\
 	dependency	"snd_fxdiv_gen.h"
 dev/sound/pcm/channel.c		optional sound
 dev/sound/pcm/channel_if.m	optional sound
 dev/sound/pcm/dsp.c		optional sound
 dev/sound/pcm/feeder.c		optional sound
 dev/sound/pcm/feeder_chain.c	optional sound
 dev/sound/pcm/feeder_eq.c	optional sound	\
 	dependency	"feeder_eq_gen.h"	\
 	dependency	"snd_fxdiv_gen.h"
 dev/sound/pcm/feeder_if.m	optional sound
 dev/sound/pcm/feeder_format.c	optional sound  \
 	dependency	"snd_fxdiv_gen.h"
 dev/sound/pcm/feeder_matrix.c	optional sound  \
 	dependency	"snd_fxdiv_gen.h"
 dev/sound/pcm/feeder_mixer.c	optional sound  \
 	dependency	"snd_fxdiv_gen.h"
 dev/sound/pcm/feeder_rate.c	optional sound	\
 	dependency	"feeder_rate_gen.h"	\
 	dependency	"snd_fxdiv_gen.h"
 dev/sound/pcm/feeder_volume.c	optional sound  \
 	dependency	"snd_fxdiv_gen.h"
 dev/sound/pcm/mixer.c		optional sound
 dev/sound/pcm/mixer_if.m	optional sound
 dev/sound/pcm/sndstat.c		optional sound
 dev/sound/pcm/sound.c		optional sound
 dev/sound/pcm/vchan.c		optional sound
 dev/sound/usb/uaudio.c		optional snd_uaudio usb
 dev/sound/usb/uaudio_pcm.c	optional snd_uaudio usb
 dev/sound/midi/midi.c		optional sound
 dev/sound/midi/mpu401.c		optional sound
 dev/sound/midi/mpu_if.m		optional sound
 dev/sound/midi/mpufoi_if.m	optional sound
 dev/sound/midi/sequencer.c	optional sound
 dev/sound/midi/synth_if.m	optional sound
 dev/spibus/ofw_spibus.c		optional fdt spibus
 dev/spibus/spibus.c		optional spibus				\
 	dependency	"spibus_if.h"
 dev/spibus/spibus_if.m		optional spibus
 dev/ste/if_ste.c		optional ste pci
 dev/stg/tmc18c30.c		optional stg
 dev/stg/tmc18c30_isa.c		optional stg isa
 dev/stg/tmc18c30_pccard.c	optional stg pccard
 dev/stg/tmc18c30_pci.c		optional stg pci
 dev/stg/tmc18c30_subr.c		optional stg
 dev/stge/if_stge.c		optional stge
 dev/streams/streams.c		optional streams
 dev/sym/sym_hipd.c		optional sym				\
 	dependency	"$S/dev/sym/sym_{conf,defs}.h"
 dev/syscons/blank/blank_saver.c	optional blank_saver
 dev/syscons/daemon/daemon_saver.c optional daemon_saver
 dev/syscons/dragon/dragon_saver.c optional dragon_saver
 dev/syscons/fade/fade_saver.c	optional fade_saver
 dev/syscons/fire/fire_saver.c	optional fire_saver
 dev/syscons/green/green_saver.c	optional green_saver
 dev/syscons/logo/logo.c		optional logo_saver
 dev/syscons/logo/logo_saver.c	optional logo_saver
 dev/syscons/rain/rain_saver.c	optional rain_saver
 dev/syscons/schistory.c		optional sc
 dev/syscons/scmouse.c		optional sc
 dev/syscons/scterm.c		optional sc
 dev/syscons/scvidctl.c		optional sc
 dev/syscons/snake/snake_saver.c	optional snake_saver
 dev/syscons/star/star_saver.c	optional star_saver
 dev/syscons/syscons.c		optional sc
 dev/syscons/sysmouse.c		optional sc
 dev/syscons/warp/warp_saver.c	optional warp_saver
 dev/tdfx/tdfx_linux.c		optional tdfx_linux tdfx compat_linux
 dev/tdfx/tdfx_pci.c		optional tdfx pci
 dev/ti/if_ti.c			optional ti pci
 dev/tl/if_tl.c			optional tl pci
 dev/trm/trm.c			optional trm
 dev/twa/tw_cl_init.c		optional twa \
 	compile-with "${NORMAL_C} -I$S/dev/twa"
 dev/twa/tw_cl_intr.c		optional twa \
 	compile-with "${NORMAL_C} -I$S/dev/twa"
 dev/twa/tw_cl_io.c		optional twa \
 	compile-with "${NORMAL_C} -I$S/dev/twa"
 dev/twa/tw_cl_misc.c		optional twa \
 	compile-with "${NORMAL_C} -I$S/dev/twa"
 dev/twa/tw_osl_cam.c		optional twa \
 	compile-with "${NORMAL_C} -I$S/dev/twa"
 dev/twa/tw_osl_freebsd.c	optional twa \
 	compile-with "${NORMAL_C} -I$S/dev/twa"
 dev/twe/twe.c			optional twe
 dev/twe/twe_freebsd.c		optional twe
 dev/tws/tws.c			optional tws
 dev/tws/tws_cam.c		optional tws
 dev/tws/tws_hdm.c		optional tws
 dev/tws/tws_services.c		optional tws
 dev/tws/tws_user.c		optional tws
 dev/tx/if_tx.c			optional tx
 dev/txp/if_txp.c		optional txp
 dev/uart/uart_bus_acpi.c	optional uart acpi
 dev/uart/uart_bus_ebus.c	optional uart ebus
 dev/uart/uart_bus_fdt.c		optional uart fdt
 dev/uart/uart_bus_isa.c		optional uart isa
 dev/uart/uart_bus_pccard.c	optional uart pccard
 dev/uart/uart_bus_pci.c		optional uart pci
 dev/uart/uart_bus_puc.c		optional uart puc
 dev/uart/uart_bus_scc.c		optional uart scc
 dev/uart/uart_core.c		optional uart
 dev/uart/uart_dbg.c		optional uart gdb
 dev/uart/uart_dev_ns8250.c	optional uart uart_ns8250
 dev/uart/uart_dev_pl011.c	optional uart pl011
 dev/uart/uart_dev_quicc.c	optional uart quicc
 dev/uart/uart_dev_sab82532.c	optional uart uart_sab82532
 dev/uart/uart_dev_sab82532.c	optional uart scc
 dev/uart/uart_dev_z8530.c	optional uart uart_z8530
 dev/uart/uart_dev_z8530.c	optional uart scc
 dev/uart/uart_if.m		optional uart
 dev/uart/uart_subr.c		optional uart
 dev/uart/uart_tty.c		optional uart
 dev/ubsec/ubsec.c		optional ubsec
 #
 # USB controller drivers
 #
 dev/usb/controller/at91dci.c		optional at91dci
 dev/usb/controller/at91dci_atmelarm.c	optional at91dci at91rm9200
 dev/usb/controller/musb_otg.c		optional musb
 dev/usb/controller/musb_otg_atmelarm.c	optional musb at91rm9200
 dev/usb/controller/dwc_otg.c		optional dwcotg
 dev/usb/controller/dwc_otg_fdt.c	optional dwcotg fdt
 dev/usb/controller/ehci.c		optional ehci
 dev/usb/controller/ehci_pci.c		optional ehci pci
 dev/usb/controller/ohci.c		optional ohci
 dev/usb/controller/ohci_atmelarm.c	optional ohci at91rm9200
 dev/usb/controller/ohci_pci.c		optional ohci pci
 dev/usb/controller/uhci.c		optional uhci
 dev/usb/controller/uhci_pci.c		optional uhci pci
 dev/usb/controller/xhci.c		optional xhci
 dev/usb/controller/xhci_pci.c		optional xhci pci
 dev/usb/controller/saf1761_otg.c	optional saf1761otg
 dev/usb/controller/saf1761_otg_fdt.c	optional saf1761otg fdt
 dev/usb/controller/uss820dci.c		optional uss820dci
 dev/usb/controller/uss820dci_atmelarm.c	optional uss820dci at91rm9200
 dev/usb/controller/usb_controller.c	optional usb
 #
 # USB storage drivers
 #
 dev/usb/storage/umass.c		optional umass
 dev/usb/storage/urio.c		optional urio
 dev/usb/storage/ustorage_fs.c	optional usfs
 #
 # USB core
 #
 dev/usb/usb_busdma.c		optional usb
 dev/usb/usb_core.c		optional usb
 dev/usb/usb_debug.c		optional usb
 dev/usb/usb_dev.c		optional usb
 dev/usb/usb_device.c		optional usb
 dev/usb/usb_dynamic.c		optional usb
 dev/usb/usb_error.c		optional usb
 dev/usb/usb_generic.c		optional usb
 dev/usb/usb_handle_request.c	optional usb
 dev/usb/usb_hid.c		optional usb
 dev/usb/usb_hub.c		optional usb
 dev/usb/usb_if.m		optional usb
 dev/usb/usb_lookup.c		optional usb
 dev/usb/usb_mbuf.c		optional usb
 dev/usb/usb_msctest.c		optional usb
 dev/usb/usb_parse.c		optional usb
 dev/usb/usb_pf.c		optional usb
 dev/usb/usb_process.c		optional usb
 dev/usb/usb_request.c		optional usb
 dev/usb/usb_transfer.c		optional usb
 dev/usb/usb_util.c		optional usb
 #
 # USB network drivers
 #
 dev/usb/net/if_aue.c		optional aue
 dev/usb/net/if_axe.c		optional axe
 dev/usb/net/if_axge.c		optional axge
 dev/usb/net/if_cdce.c		optional cdce
 dev/usb/net/if_cue.c		optional cue
 dev/usb/net/if_ipheth.c		optional ipheth
 dev/usb/net/if_kue.c		optional kue
 dev/usb/net/if_mos.c		optional mos
 dev/usb/net/if_rue.c		optional rue
 dev/usb/net/if_smsc.c		optional smsc
 dev/usb/net/if_udav.c		optional udav
 dev/usb/net/if_ure.c		optional ure
 dev/usb/net/if_usie.c		optional usie
 dev/usb/net/if_urndis.c		optional urndis
 dev/usb/net/ruephy.c		optional rue
 dev/usb/net/usb_ethernet.c	optional uether | aue | axe | axge | cdce | \
 					 cue | ipheth | kue | mos | rue | \
 					 smsc | udav | ure | urndis
 dev/usb/net/uhso.c		optional uhso
 #
 # USB WLAN drivers
 #
 dev/usb/wlan/if_rsu.c		optional rsu
 rsu-rtl8712fw.c			optional rsu-rtl8712fw | rsufw		\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk rsu-rtl8712fw.fw:rsu-rtl8712fw:120 -mrsu-rtl8712fw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"rsu-rtl8712fw.c"
 rsu-rtl8712fw.fwo		optional rsu-rtl8712fw | rsufw		\
 	dependency	"rsu-rtl8712fw.fw"				\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"rsu-rtl8712fw.fwo"
 rsu-rtl8712fw.fw		optional rsu-rtl8712.fw | rsufw		\
 	dependency	"$S/contrib/dev/rsu/rsu-rtl8712fw.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"rsu-rtl8712fw.fw"
 dev/usb/wlan/if_rum.c		optional rum
 dev/usb/wlan/if_run.c		optional run
 runfw.c				optional runfw							\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk run.fw:runfw -mrunfw -c${.TARGET}"	\
 	no-implicit-rule before-depend local							\
 	clean		"runfw.c"
 runfw.fwo			optional runfw							\
 	dependency	"run.fw"								\
 	compile-with	"${NORMAL_FWO}"								\
 	no-implicit-rule									\
 	clean		"runfw.fwo"
 run.fw				optional runfw							\
 	dependency	"$S/contrib/dev/run/rt2870.fw.uu"					\
 	compile-with	"${NORMAL_FW}"								\
 	no-obj no-implicit-rule									\
 	clean		"run.fw"
 dev/usb/wlan/if_uath.c		optional uath
 dev/usb/wlan/if_upgt.c		optional upgt
 dev/usb/wlan/if_ural.c		optional ural
 dev/usb/wlan/if_urtw.c		optional urtw
 dev/usb/wlan/if_urtwn.c		optional urtwn
 urtwn-rtl8188eufw.c		optional urtwn-rtl8188eufw | urtwnfw	\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk urtwn-rtl8188eufw.fw:urtwn-rtl8188eufw:111 -murtwn-rtl8188eufw -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"urtwn-rtl8188eufw.c"
 urtwn-rtl8188eufw.fwo		optional urtwn-rtl8188eufw | urtwnfw	\
 	dependency	"urtwn-rtl8188eufw.fw"				\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"urtwn-rtl8188eufw.fwo"
 urtwn-rtl8188eufw.fw		optional urtwn-rtl8188eufw | urtwnfw	\
 	dependency	"$S/contrib/dev/urtwn/urtwn-rtl8188eufw.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"urtwn-rtl8188eufw.fw"
 urtwn-rtl8192cfwT.c		optional urtwn-rtl8192cfwT | urtwnfw	\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk urtwn-rtl8192cfwT.fw:urtwn-rtl8192cfwT:111 -murtwn-rtl8192cfwT -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"urtwn-rtl8192cfwT.c"
 urtwn-rtl8192cfwT.fwo		optional urtwn-rtl8192cfwT | urtwnfw	\
 	dependency	"urtwn-rtl8192cfwT.fw"				\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"urtwn-rtl8192cfwT.fwo"
 urtwn-rtl8192cfwT.fw		optional urtwn-rtl8192cfwT | urtwnfw	\
 	dependency	"$S/contrib/dev/urtwn/urtwn-rtl8192cfwT.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"urtwn-rtl8192cfwT.fw"
 urtwn-rtl8192cfwU.c		optional urtwn-rtl8192cfwU | urtwnfw	\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk urtwn-rtl8192cfwU.fw:urtwn-rtl8192cfwU:111 -murtwn-rtl8192cfwU -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"urtwn-rtl8192cfwU.c"
 urtwn-rtl8192cfwU.fwo		optional urtwn-rtl8192cfwU | urtwnfw	\
 	dependency	"urtwn-rtl8192cfwU.fw"				\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"urtwn-rtl8192cfwU.fwo"
 urtwn-rtl8192cfwU.fw		optional urtwn-rtl8192cfwU | urtwnfw	\
 	dependency	"$S/contrib/dev/urtwn/urtwn-rtl8192cfwU.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"urtwn-rtl8192cfwU.fw"
 
 dev/usb/wlan/if_zyd.c		optional zyd
 #
 # USB serial and parallel port drivers
 #
 dev/usb/serial/u3g.c		optional u3g
 dev/usb/serial/uark.c		optional uark
 dev/usb/serial/ubsa.c		optional ubsa
 dev/usb/serial/ubser.c		optional ubser
 dev/usb/serial/uchcom.c		optional uchcom
 dev/usb/serial/ucycom.c		optional ucycom
 dev/usb/serial/ufoma.c		optional ufoma
 dev/usb/serial/uftdi.c		optional uftdi
 dev/usb/serial/ugensa.c		optional ugensa
 dev/usb/serial/uipaq.c		optional uipaq
 dev/usb/serial/ulpt.c		optional ulpt
 dev/usb/serial/umcs.c		optional umcs
 dev/usb/serial/umct.c		optional umct
 dev/usb/serial/umodem.c		optional umodem
 dev/usb/serial/umoscom.c	optional umoscom
 dev/usb/serial/uplcom.c		optional uplcom
 dev/usb/serial/uslcom.c		optional uslcom
 dev/usb/serial/uvisor.c		optional uvisor
 dev/usb/serial/uvscom.c		optional uvscom
 dev/usb/serial/usb_serial.c 	optional ucom | u3g | uark | ubsa | ubser | \
 					 uchcom | ucycom | ufoma | uftdi | \
 					 ugensa | uipaq | umcs | umct | \
 					 umodem | umoscom | uplcom | usie | \
 					 uslcom | uvisor | uvscom
 #
 # USB misc drivers
 #
 dev/usb/misc/ufm.c		optional ufm
 dev/usb/misc/udbp.c		optional udbp
 dev/usb/misc/ugold.c		optional ugold
 dev/usb/misc/uled.c		optional uled
 #
 # USB input drivers
 #
 dev/usb/input/atp.c		optional atp
 dev/usb/input/uep.c		optional uep
 dev/usb/input/uhid.c		optional uhid
 dev/usb/input/ukbd.c		optional ukbd
 dev/usb/input/ums.c		optional ums
 dev/usb/input/wsp.c		optional wsp
 #
 # USB quirks
 #
 dev/usb/quirk/usb_quirk.c	optional usb
 #
 # USB templates
 #
 dev/usb/template/usb_template.c		optional usb_template
 dev/usb/template/usb_template_audio.c	optional usb_template
 dev/usb/template/usb_template_cdce.c	optional usb_template
 dev/usb/template/usb_template_kbd.c	optional usb_template
 dev/usb/template/usb_template_modem.c	optional usb_template
 dev/usb/template/usb_template_mouse.c	optional usb_template
 dev/usb/template/usb_template_msc.c	optional usb_template
 dev/usb/template/usb_template_mtp.c	optional usb_template
 dev/usb/template/usb_template_phone.c	optional usb_template
 dev/usb/template/usb_template_serialnet.c	optional usb_template
 dev/usb/template/usb_template_midi.c	optional usb_template
 #
 # USB video drivers
 #
 dev/usb/video/udl.c			optional udl
 #
 # USB END
 #
 dev/videomode/videomode.c		optional videomode
 dev/videomode/edid.c			optional videomode
 dev/videomode/pickmode.c		optional videomode
 dev/videomode/vesagtf.c			optional videomode
 dev/utopia/idtphy.c		optional utopia
 dev/utopia/suni.c		optional utopia
 dev/utopia/utopia.c		optional utopia
 dev/vge/if_vge.c		optional vge
 dev/viapm/viapm.c		optional viapm pci
 dev/virtio/virtio.c			optional	virtio
 dev/virtio/virtqueue.c			optional	virtio
 dev/virtio/virtio_bus_if.m		optional	virtio
 dev/virtio/virtio_if.m			optional	virtio
 dev/virtio/pci/virtio_pci.c		optional	virtio_pci
 dev/virtio/mmio/virtio_mmio.c		optional	virtio_mmio
 dev/virtio/mmio/virtio_mmio_if.m	optional	virtio_mmio
 dev/virtio/network/if_vtnet.c		optional	vtnet
 dev/virtio/block/virtio_blk.c		optional	virtio_blk
 dev/virtio/balloon/virtio_balloon.c	optional	virtio_balloon
 dev/virtio/scsi/virtio_scsi.c		optional	virtio_scsi
 dev/virtio/random/virtio_random.c	optional	virtio_random
 dev/virtio/console/virtio_console.c	optional	virtio_console
 dev/vkbd/vkbd.c			optional vkbd
 dev/vr/if_vr.c			optional vr pci
 dev/vt/colors/vt_termcolors.c	optional vt
 dev/vt/font/vt_font_default.c	optional vt
 dev/vt/font/vt_mouse_cursor.c	optional vt
 dev/vt/hw/efifb/efifb.c		optional vt_efifb
 dev/vt/hw/fb/vt_fb.c		optional vt
 dev/vt/hw/vga/vt_vga.c		optional vt vt_vga
 dev/vt/logo/logo_freebsd.c	optional vt splash
 dev/vt/logo/logo_beastie.c	optional vt splash
 dev/vt/vt_buf.c			optional vt
 dev/vt/vt_consolectl.c		optional vt
 dev/vt/vt_core.c		optional vt
 dev/vt/vt_cpulogos.c		optional vt splash
 dev/vt/vt_font.c		optional vt
 dev/vt/vt_sysmouse.c		optional vt
 dev/vte/if_vte.c		optional vte pci
 dev/vx/if_vx.c			optional vx
 dev/vx/if_vx_eisa.c		optional vx eisa
 dev/vx/if_vx_pci.c		optional vx pci
 dev/vxge/vxge.c				optional vxge
 dev/vxge/vxgehal/vxgehal-ifmsg.c	optional vxge
 dev/vxge/vxgehal/vxgehal-mrpcim.c	optional vxge
 dev/vxge/vxgehal/vxge-queue.c		optional vxge
 dev/vxge/vxgehal/vxgehal-ring.c		optional vxge
 dev/vxge/vxgehal/vxgehal-swapper.c	optional vxge
 dev/vxge/vxgehal/vxgehal-mgmt.c		optional vxge
 dev/vxge/vxgehal/vxgehal-srpcim.c	optional vxge
 dev/vxge/vxgehal/vxgehal-config.c	optional vxge
 dev/vxge/vxgehal/vxgehal-blockpool.c	optional vxge
 dev/vxge/vxgehal/vxgehal-doorbells.c	optional vxge
 dev/vxge/vxgehal/vxgehal-mgmtaux.c	optional vxge
 dev/vxge/vxgehal/vxgehal-device.c	optional vxge
 dev/vxge/vxgehal/vxgehal-mm.c		optional vxge
 dev/vxge/vxgehal/vxgehal-driver.c	optional vxge
 dev/vxge/vxgehal/vxgehal-virtualpath.c	optional vxge
 dev/vxge/vxgehal/vxgehal-channel.c	optional vxge
 dev/vxge/vxgehal/vxgehal-fifo.c		optional vxge
 dev/watchdog/watchdog.c		standard
 dev/wb/if_wb.c			optional wb pci
 dev/wds/wd7000.c		optional wds isa
 dev/wi/if_wi.c			optional wi
 dev/wi/if_wi_pccard.c		optional wi pccard
 dev/wi/if_wi_pci.c		optional wi pci
 dev/wl/if_wl.c			optional wl isa
 dev/wpi/if_wpi.c		optional wpi pci
 wpifw.c			optional wpifw					\
 	compile-with	"${AWK} -f $S/tools/fw_stub.awk wpi.fw:wpifw:153229 -mwpi -c${.TARGET}" \
 	no-implicit-rule before-depend local				\
 	clean		"wpifw.c"
 wpifw.fwo			optional wpifw				\
 	dependency	"wpi.fw"					\
 	compile-with	"${NORMAL_FWO}"					\
 	no-implicit-rule						\
 	clean		"wpifw.fwo"
 wpi.fw			optional wpifw					\
 	dependency	"$S/contrib/dev/wpi/iwlwifi-3945-15.32.2.9.fw.uu"	\
 	compile-with	"${NORMAL_FW}"					\
 	no-obj no-implicit-rule						\
 	clean		"wpi.fw"
 dev/xe/if_xe.c			optional xe
 dev/xe/if_xe_pccard.c		optional xe pccard
 dev/xen/balloon/balloon.c	optional xenhvm
 dev/xen/blkfront/blkfront.c	optional xenhvm
 dev/xen/blkback/blkback.c	optional xenhvm
 dev/xen/console/xen_console.c	optional xenhvm
 dev/xen/control/control.c	optional xenhvm
 dev/xen/grant_table/grant_table.c	optional xenhvm
 dev/xen/netback/netback.c	optional xenhvm
 dev/xen/netfront/netfront.c	optional xenhvm
 dev/xen/xenpci/xenpci.c		optional xenpci
 dev/xen/timer/timer.c		optional xenhvm
 dev/xen/pvcpu/pvcpu.c		optional xenhvm
 dev/xen/xenstore/xenstore.c	optional xenhvm
 dev/xen/xenstore/xenstore_dev.c	optional xenhvm
 dev/xen/xenstore/xenstored_dev.c	optional xenhvm
 dev/xen/evtchn/evtchn_dev.c	optional xenhvm
 dev/xen/privcmd/privcmd.c	optional xenhvm
 dev/xen/debug/debug.c		optional xenhvm
 dev/xl/if_xl.c			optional xl pci
 dev/xl/xlphy.c			optional xl pci
 fs/autofs/autofs.c		optional autofs
 fs/autofs/autofs_vfsops.c	optional autofs
 fs/autofs/autofs_vnops.c	optional autofs
 fs/deadfs/dead_vnops.c		standard
 fs/devfs/devfs_devs.c		standard
 fs/devfs/devfs_dir.c		standard
 fs/devfs/devfs_rule.c		standard
 fs/devfs/devfs_vfsops.c		standard
 fs/devfs/devfs_vnops.c		standard
 fs/fdescfs/fdesc_vfsops.c	optional fdescfs
 fs/fdescfs/fdesc_vnops.c	optional fdescfs
 fs/fifofs/fifo_vnops.c		standard
 fs/cuse/cuse.c			optional cuse
 fs/fuse/fuse_device.c		optional fuse
 fs/fuse/fuse_file.c		optional fuse
 fs/fuse/fuse_internal.c		optional fuse
 fs/fuse/fuse_io.c		optional fuse
 fs/fuse/fuse_ipc.c		optional fuse
 fs/fuse/fuse_main.c		optional fuse
 fs/fuse/fuse_node.c		optional fuse
 fs/fuse/fuse_vfsops.c		optional fuse
 fs/fuse/fuse_vnops.c		optional fuse
 fs/msdosfs/msdosfs_conv.c	optional msdosfs
 fs/msdosfs/msdosfs_denode.c	optional msdosfs
 fs/msdosfs/msdosfs_fat.c	optional msdosfs
 fs/msdosfs/msdosfs_fileno.c	optional msdosfs
 fs/msdosfs/msdosfs_iconv.c	optional msdosfs_iconv
 fs/msdosfs/msdosfs_lookup.c	optional msdosfs
 fs/msdosfs/msdosfs_vfsops.c	optional msdosfs
 fs/msdosfs/msdosfs_vnops.c	optional msdosfs
 fs/nandfs/bmap.c		optional nandfs
 fs/nandfs/nandfs_alloc.c	optional nandfs
 fs/nandfs/nandfs_bmap.c		optional nandfs
 fs/nandfs/nandfs_buffer.c	optional nandfs
 fs/nandfs/nandfs_cleaner.c	optional nandfs
 fs/nandfs/nandfs_cpfile.c	optional nandfs
 fs/nandfs/nandfs_dat.c		optional nandfs
 fs/nandfs/nandfs_dir.c		optional nandfs
 fs/nandfs/nandfs_ifile.c	optional nandfs
 fs/nandfs/nandfs_segment.c	optional nandfs
 fs/nandfs/nandfs_subr.c		optional nandfs
 fs/nandfs/nandfs_sufile.c	optional nandfs
 fs/nandfs/nandfs_vfsops.c	optional nandfs
 fs/nandfs/nandfs_vnops.c	optional nandfs
 fs/nfs/nfs_commonkrpc.c		optional nfscl | nfsd
 fs/nfs/nfs_commonsubs.c		optional nfscl | nfsd
 fs/nfs/nfs_commonport.c		optional nfscl | nfsd
 fs/nfs/nfs_commonacl.c		optional nfscl | nfsd
 fs/nfsclient/nfs_clcomsubs.c	optional nfscl
 fs/nfsclient/nfs_clsubs.c	optional nfscl
 fs/nfsclient/nfs_clstate.c	optional nfscl
 fs/nfsclient/nfs_clkrpc.c	optional nfscl
 fs/nfsclient/nfs_clrpcops.c	optional nfscl
 fs/nfsclient/nfs_clvnops.c	optional nfscl
 fs/nfsclient/nfs_clnode.c	optional nfscl
 fs/nfsclient/nfs_clvfsops.c	optional nfscl
 fs/nfsclient/nfs_clport.c	optional nfscl
 fs/nfsclient/nfs_clbio.c	optional nfscl
 fs/nfsclient/nfs_clnfsiod.c	optional nfscl
 fs/nfsserver/nfs_fha_new.c	optional nfsd inet
 fs/nfsserver/nfs_nfsdsocket.c	optional nfsd inet
 fs/nfsserver/nfs_nfsdsubs.c	optional nfsd inet
 fs/nfsserver/nfs_nfsdstate.c	optional nfsd inet
 fs/nfsserver/nfs_nfsdkrpc.c	optional nfsd inet
 fs/nfsserver/nfs_nfsdserv.c	optional nfsd inet
 fs/nfsserver/nfs_nfsdport.c	optional nfsd inet
 fs/nfsserver/nfs_nfsdcache.c	optional nfsd inet
 fs/nullfs/null_subr.c		optional nullfs
 fs/nullfs/null_vfsops.c		optional nullfs
 fs/nullfs/null_vnops.c		optional nullfs
 fs/procfs/procfs.c		optional procfs
 fs/procfs/procfs_ctl.c		optional procfs
 fs/procfs/procfs_dbregs.c	optional procfs
 fs/procfs/procfs_fpregs.c	optional procfs
 fs/procfs/procfs_ioctl.c	optional procfs
 fs/procfs/procfs_map.c		optional procfs
 fs/procfs/procfs_mem.c		optional procfs
 fs/procfs/procfs_note.c		optional procfs
 fs/procfs/procfs_osrel.c	optional procfs
 fs/procfs/procfs_regs.c		optional procfs
 fs/procfs/procfs_rlimit.c	optional procfs
 fs/procfs/procfs_status.c	optional procfs
 fs/procfs/procfs_type.c		optional procfs
 fs/pseudofs/pseudofs.c		optional pseudofs
 fs/pseudofs/pseudofs_fileno.c	optional pseudofs
 fs/pseudofs/pseudofs_vncache.c	optional pseudofs
 fs/pseudofs/pseudofs_vnops.c	optional pseudofs
 fs/smbfs/smbfs_io.c		optional smbfs
 fs/smbfs/smbfs_node.c		optional smbfs
 fs/smbfs/smbfs_smb.c		optional smbfs
 fs/smbfs/smbfs_subr.c		optional smbfs
 fs/smbfs/smbfs_vfsops.c		optional smbfs
 fs/smbfs/smbfs_vnops.c		optional smbfs
 fs/udf/osta.c			optional udf
 fs/udf/udf_iconv.c		optional udf_iconv
 fs/udf/udf_vfsops.c		optional udf
 fs/udf/udf_vnops.c		optional udf
 fs/unionfs/union_subr.c		optional unionfs
 fs/unionfs/union_vfsops.c	optional unionfs
 fs/unionfs/union_vnops.c	optional unionfs
 fs/tmpfs/tmpfs_vnops.c		optional tmpfs
 fs/tmpfs/tmpfs_fifoops.c 	optional tmpfs
 fs/tmpfs/tmpfs_vfsops.c 	optional tmpfs
 fs/tmpfs/tmpfs_subr.c 		optional tmpfs
 gdb/gdb_cons.c			optional gdb
 gdb/gdb_main.c			optional gdb
 gdb/gdb_packet.c		optional gdb
 geom/bde/g_bde.c		optional geom_bde
 geom/bde/g_bde_crypt.c		optional geom_bde
 geom/bde/g_bde_lock.c		optional geom_bde
 geom/bde/g_bde_work.c		optional geom_bde
 geom/cache/g_cache.c		optional geom_cache
 geom/concat/g_concat.c		optional geom_concat
 geom/eli/g_eli.c		optional geom_eli
 geom/eli/g_eli_crypto.c		optional geom_eli
 geom/eli/g_eli_ctl.c		optional geom_eli
+geom/eli/g_eli_hmac.c		optional geom_eli
 geom/eli/g_eli_integrity.c	optional geom_eli
 geom/eli/g_eli_key.c		optional geom_eli
 geom/eli/g_eli_key_cache.c	optional geom_eli
 geom/eli/g_eli_privacy.c	optional geom_eli
 geom/eli/pkcs5v2.c		optional geom_eli
 geom/gate/g_gate.c		optional geom_gate
 geom/geom_aes.c			optional geom_aes
 geom/geom_bsd.c			optional geom_bsd
 geom/geom_bsd_enc.c		optional geom_bsd | geom_part_bsd
 geom/geom_ccd.c			optional ccd | geom_ccd
 geom/geom_ctl.c			standard
 geom/geom_dev.c			standard
 geom/geom_disk.c		standard
 geom/geom_dump.c		standard
 geom/geom_event.c		standard
 geom/geom_fox.c			optional geom_fox
 geom/geom_flashmap.c		optional fdt cfi | fdt nand
 geom/geom_io.c			standard
 geom/geom_kern.c		standard
 geom/geom_map.c			optional geom_map
 geom/geom_mbr.c			optional geom_mbr
 geom/geom_mbr_enc.c		optional geom_mbr
 geom/geom_pc98.c		optional geom_pc98
 geom/geom_pc98_enc.c		optional geom_pc98
 geom/geom_redboot.c		optional geom_redboot
 geom/geom_slice.c		standard
 geom/geom_subr.c		standard
 geom/geom_sunlabel.c		optional geom_sunlabel
 geom/geom_sunlabel_enc.c	optional geom_sunlabel
 geom/geom_vfs.c			standard
 geom/geom_vol_ffs.c		optional geom_vol
 geom/journal/g_journal.c	optional geom_journal
 geom/journal/g_journal_ufs.c	optional geom_journal
 geom/label/g_label.c		optional geom_label | geom_label_gpt
 geom/label/g_label_ext2fs.c	optional geom_label
 geom/label/g_label_iso9660.c	optional geom_label
 geom/label/g_label_msdosfs.c	optional geom_label
 geom/label/g_label_ntfs.c	optional geom_label
 geom/label/g_label_reiserfs.c	optional geom_label
 geom/label/g_label_ufs.c	optional geom_label
 geom/label/g_label_gpt.c	optional geom_label | geom_label_gpt
 geom/label/g_label_disk_ident.c	optional geom_label
 geom/linux_lvm/g_linux_lvm.c	optional geom_linux_lvm
 geom/mirror/g_mirror.c		optional geom_mirror
 geom/mirror/g_mirror_ctl.c	optional geom_mirror
 geom/mountver/g_mountver.c	optional geom_mountver
 geom/multipath/g_multipath.c	optional geom_multipath
 geom/nop/g_nop.c		optional geom_nop
 geom/part/g_part.c		standard
 geom/part/g_part_if.m		standard
 geom/part/g_part_apm.c		optional geom_part_apm
 geom/part/g_part_bsd.c		optional geom_part_bsd
 geom/part/g_part_bsd64.c	optional geom_part_bsd64
 geom/part/g_part_ebr.c		optional geom_part_ebr
 geom/part/g_part_gpt.c		optional geom_part_gpt
 geom/part/g_part_ldm.c		optional geom_part_ldm
 geom/part/g_part_mbr.c		optional geom_part_mbr
 geom/part/g_part_pc98.c		optional geom_part_pc98
 geom/part/g_part_vtoc8.c	optional geom_part_vtoc8
 geom/raid/g_raid.c		optional geom_raid
 geom/raid/g_raid_ctl.c		optional geom_raid
 geom/raid/g_raid_md_if.m	optional geom_raid
 geom/raid/g_raid_tr_if.m	optional geom_raid
 geom/raid/md_ddf.c		optional geom_raid
 geom/raid/md_intel.c		optional geom_raid
 geom/raid/md_jmicron.c		optional geom_raid
 geom/raid/md_nvidia.c		optional geom_raid
 geom/raid/md_promise.c		optional geom_raid
 geom/raid/md_sii.c		optional geom_raid
 geom/raid/tr_concat.c		optional geom_raid
 geom/raid/tr_raid0.c		optional geom_raid
 geom/raid/tr_raid1.c		optional geom_raid
 geom/raid/tr_raid1e.c		optional geom_raid
 geom/raid/tr_raid5.c		optional geom_raid
 geom/raid3/g_raid3.c		optional geom_raid3
 geom/raid3/g_raid3_ctl.c	optional geom_raid3
 geom/shsec/g_shsec.c		optional geom_shsec
 geom/stripe/g_stripe.c		optional geom_stripe
 geom/uncompress/g_uncompress.c	optional geom_uncompress
 contrib/xz-embedded/freebsd/xz_malloc.c	\
 	optional xz_embedded | geom_uncompress \
 	compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/"
 contrib/xz-embedded/linux/lib/xz/xz_crc32.c \
 	optional xz_embedded | geom_uncompress \
 	compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/"
 contrib/xz-embedded/linux/lib/xz/xz_dec_bcj.c \
 	optional xz_embedded | geom_uncompress \
 	compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/"
 contrib/xz-embedded/linux/lib/xz/xz_dec_lzma2.c \
 	optional xz_embedded | geom_uncompress \
 	compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/"
 contrib/xz-embedded/linux/lib/xz/xz_dec_stream.c \
 	optional xz_embedded | geom_uncompress \
 	compile-with "${NORMAL_C} -I$S/contrib/xz-embedded/freebsd/ -I$S/contrib/xz-embedded/linux/lib/xz/ -I$S/contrib/xz-embedded/linux/include/linux/"
 geom/uzip/g_uzip.c		optional geom_uzip
 geom/vinum/geom_vinum.c		optional geom_vinum
 geom/vinum/geom_vinum_create.c	optional geom_vinum
 geom/vinum/geom_vinum_drive.c	optional geom_vinum
 geom/vinum/geom_vinum_plex.c	optional geom_vinum
 geom/vinum/geom_vinum_volume.c	optional geom_vinum
 geom/vinum/geom_vinum_subr.c	optional geom_vinum
 geom/vinum/geom_vinum_raid5.c	optional geom_vinum
 geom/vinum/geom_vinum_share.c	optional geom_vinum
 geom/vinum/geom_vinum_list.c	optional geom_vinum
 geom/vinum/geom_vinum_rm.c	optional geom_vinum
 geom/vinum/geom_vinum_init.c	optional geom_vinum
 geom/vinum/geom_vinum_state.c	optional geom_vinum
 geom/vinum/geom_vinum_rename.c	optional geom_vinum
 geom/vinum/geom_vinum_move.c	optional geom_vinum
 geom/vinum/geom_vinum_events.c	optional geom_vinum
 geom/virstor/binstream.c	optional geom_virstor
 geom/virstor/g_virstor.c	optional geom_virstor
 geom/virstor/g_virstor_md.c	optional geom_virstor
 geom/zero/g_zero.c		optional geom_zero
 fs/ext2fs/ext2_alloc.c		optional ext2fs
 fs/ext2fs/ext2_balloc.c		optional ext2fs
 fs/ext2fs/ext2_bmap.c		optional ext2fs
 fs/ext2fs/ext2_extents.c	optional ext2fs
 fs/ext2fs/ext2_inode.c		optional ext2fs
 fs/ext2fs/ext2_inode_cnv.c	optional ext2fs
 fs/ext2fs/ext2_lookup.c		optional ext2fs
 fs/ext2fs/ext2_subr.c		optional ext2fs
 fs/ext2fs/ext2_vfsops.c		optional ext2fs
 fs/ext2fs/ext2_vnops.c		optional ext2fs
 gnu/fs/reiserfs/reiserfs_hashes.c	optional reiserfs \
 	warning "kernel contains GPL contaminated ReiserFS filesystem"
 gnu/fs/reiserfs/reiserfs_inode.c	optional reiserfs
 gnu/fs/reiserfs/reiserfs_item_ops.c	optional reiserfs
 gnu/fs/reiserfs/reiserfs_namei.c	optional reiserfs
 gnu/fs/reiserfs/reiserfs_prints.c	optional reiserfs
 gnu/fs/reiserfs/reiserfs_stree.c	optional reiserfs
 gnu/fs/reiserfs/reiserfs_vfsops.c	optional reiserfs
 gnu/fs/reiserfs/reiserfs_vnops.c	optional reiserfs
 #
 isa/isa_if.m			standard
 isa/isa_common.c		optional isa
 isa/isahint.c			optional isa
 isa/pnp.c			optional isa isapnp
 isa/pnpparse.c			optional isa isapnp
 fs/cd9660/cd9660_bmap.c	optional cd9660
 fs/cd9660/cd9660_lookup.c	optional cd9660
 fs/cd9660/cd9660_node.c	optional cd9660
 fs/cd9660/cd9660_rrip.c	optional cd9660
 fs/cd9660/cd9660_util.c	optional cd9660
 fs/cd9660/cd9660_vfsops.c	optional cd9660
 fs/cd9660/cd9660_vnops.c	optional cd9660
 fs/cd9660/cd9660_iconv.c	optional cd9660_iconv
 kern/bus_if.m			standard
 kern/clock_if.m			standard
 kern/cpufreq_if.m		standard
 kern/device_if.m		standard
 kern/imgact_binmisc.c		optional	imagact_binmisc
 kern/imgact_elf.c		standard
 kern/imgact_elf32.c		optional compat_freebsd32
 kern/imgact_shell.c		standard
 kern/inflate.c			optional gzip
 kern/init_main.c		standard
 kern/init_sysent.c		standard
 kern/ksched.c			optional _kposix_priority_scheduling
 kern/kern_acct.c		standard
 kern/kern_alq.c			optional alq
 kern/kern_clock.c		standard
 kern/kern_condvar.c		standard
 kern/kern_conf.c		standard
 kern/kern_cons.c		standard
 kern/kern_cpu.c			standard
 kern/kern_cpuset.c		standard
 kern/kern_context.c		standard
 kern/kern_descrip.c		standard
 kern/kern_dtrace.c		optional kdtrace_hooks
 kern/kern_dump.c		standard
 kern/kern_environment.c		standard
 kern/kern_et.c			standard
 kern/kern_event.c		standard
 kern/kern_exec.c		standard
 kern/kern_exit.c		standard
 kern/kern_fail.c		standard
 kern/kern_ffclock.c		standard
 kern/kern_fork.c		standard
 kern/kern_gzio.c		optional gzio
 kern/kern_hhook.c		standard
 kern/kern_idle.c		standard
 kern/kern_intr.c		standard
 kern/kern_jail.c		standard
 kern/kern_khelp.c		standard
 kern/kern_kthread.c		standard
 kern/kern_ktr.c			optional ktr
 kern/kern_ktrace.c		standard
 kern/kern_linker.c		standard
 kern/kern_lock.c		standard
 kern/kern_lockf.c		standard
 kern/kern_lockstat.c		optional kdtrace_hooks
 kern/kern_loginclass.c		standard
 kern/kern_malloc.c		standard
 kern/kern_mbuf.c		standard
 kern/kern_mib.c			standard
 kern/kern_module.c		standard
 kern/kern_mtxpool.c		standard
 kern/kern_mutex.c		standard
 kern/kern_ntptime.c		standard
 kern/kern_numa.c		standard
 kern/kern_osd.c			standard
 kern/kern_physio.c		standard
 kern/kern_pmc.c			standard
 kern/kern_poll.c		optional device_polling
 kern/kern_priv.c		standard
 kern/kern_proc.c		standard
 kern/kern_procctl.c		standard
 kern/kern_prot.c		standard
 kern/kern_racct.c		standard
 kern/kern_rangelock.c		standard
 kern/kern_rctl.c		standard
 kern/kern_resource.c		standard
 kern/kern_rmlock.c		standard
 kern/kern_rwlock.c		standard
 kern/kern_sdt.c			optional kdtrace_hooks
 kern/kern_sema.c		standard
 kern/kern_sharedpage.c		standard
 kern/kern_shutdown.c		standard
 kern/kern_sig.c			standard
 kern/kern_switch.c		standard
 kern/kern_sx.c			standard
 kern/kern_synch.c		standard
 kern/kern_syscalls.c		standard
 kern/kern_sysctl.c		standard
 kern/kern_tc.c			standard
 kern/kern_thr.c			standard
 kern/kern_thread.c		standard
 kern/kern_time.c		standard
 kern/kern_timeout.c		standard
 kern/kern_umtx.c		standard
 kern/kern_uuid.c		standard
 kern/kern_xxx.c			standard
 kern/link_elf.c			standard
 kern/linker_if.m		standard
 kern/md4c.c			optional netsmb
 kern/md5c.c			standard
 kern/p1003_1b.c			standard
 kern/posix4_mib.c		standard
 kern/sched_4bsd.c		optional sched_4bsd
 kern/sched_ule.c		optional sched_ule
 kern/serdev_if.m		standard
 kern/stack_protector.c		standard \
 	compile-with "${NORMAL_C:N-fstack-protector*}"
 kern/subr_acl_nfs4.c		optional ufs_acl | zfs
 kern/subr_acl_posix1e.c		optional ufs_acl
 kern/subr_autoconf.c		standard
 kern/subr_blist.c		standard
 kern/subr_bus.c			standard
 kern/subr_bus_dma.c		standard
 kern/subr_bufring.c		standard
 kern/subr_capability.c		standard
 kern/subr_clock.c		standard
 kern/subr_counter.c		standard
 kern/subr_devstat.c		standard
 kern/subr_disk.c		standard
 kern/subr_eventhandler.c	standard
 kern/subr_fattime.c		standard
 kern/subr_firmware.c		optional firmware
 kern/subr_hash.c		standard
 kern/subr_hints.c		standard
 kern/subr_kdb.c			standard
 kern/subr_kobj.c		standard
 kern/subr_lock.c		standard
 kern/subr_log.c			standard
 kern/subr_mbpool.c		optional libmbpool
 kern/subr_mchain.c		optional libmchain
 kern/subr_module.c		standard
 kern/subr_msgbuf.c		standard
 kern/subr_param.c		standard
 kern/subr_pcpu.c		standard
 kern/subr_pctrie.c		standard
 kern/subr_power.c		standard
 kern/subr_prf.c			standard
 kern/subr_prof.c		standard
 kern/subr_rman.c		standard
 kern/subr_rtc.c			standard
 kern/subr_sbuf.c		standard
 kern/subr_scanf.c		standard
 kern/subr_sglist.c		standard
 kern/subr_sleepqueue.c		standard
 kern/subr_smp.c			standard
 kern/subr_stack.c		optional ddb | stack | ktr
 kern/subr_taskqueue.c		standard
 kern/subr_terminal.c		optional vt
 kern/subr_trap.c		standard
 kern/subr_turnstile.c		standard
 kern/subr_uio.c			standard
 kern/subr_unit.c		standard
 kern/subr_vmem.c		standard
 kern/subr_witness.c		optional witness
 kern/sys_capability.c		standard
 kern/sys_generic.c		standard
 kern/sys_pipe.c			standard
 kern/sys_procdesc.c		standard
 kern/sys_process.c		standard
 kern/sys_socket.c		standard
 kern/syscalls.c			standard
 kern/sysv_ipc.c			standard
 kern/sysv_msg.c			optional sysvmsg
 kern/sysv_sem.c			optional sysvsem
 kern/sysv_shm.c			optional sysvshm
 kern/tty.c			standard
 kern/tty_compat.c		optional compat_43tty
 kern/tty_info.c			standard
 kern/tty_inq.c			standard
 kern/tty_outq.c			standard
 kern/tty_pts.c			standard
 kern/tty_tty.c			standard
 kern/tty_ttydisc.c		standard
 kern/uipc_accf.c		standard
 kern/uipc_debug.c		optional ddb
 kern/uipc_domain.c		standard
 kern/uipc_mbuf.c		standard
 kern/uipc_mbuf2.c		standard
 kern/uipc_mbufhash.c		standard
 kern/uipc_mqueue.c		optional p1003_1b_mqueue
 kern/uipc_sem.c			optional p1003_1b_semaphores
 kern/uipc_shm.c			standard
 kern/uipc_sockbuf.c		standard
 kern/uipc_socket.c		standard
 kern/uipc_syscalls.c		standard
 kern/uipc_usrreq.c		standard
 kern/vfs_acl.c			standard
 kern/vfs_aio.c			optional vfs_aio
 kern/vfs_bio.c			standard
 kern/vfs_cache.c		standard
 kern/vfs_cluster.c		standard
 kern/vfs_default.c		standard
 kern/vfs_export.c		standard
 kern/vfs_extattr.c		standard
 kern/vfs_hash.c			standard
 kern/vfs_init.c			standard
 kern/vfs_lookup.c		standard
 kern/vfs_mount.c		standard
 kern/vfs_mountroot.c		standard
 kern/vfs_subr.c			standard
 kern/vfs_syscalls.c		standard
 kern/vfs_vnops.c		standard
 #
 # Kernel GSS-API
 #
 gssd.h				optional kgssapi			\
 	dependency		"$S/kgssapi/gssd.x"			\
 	compile-with		"RPCGEN_CPP='${CPP}' rpcgen -hM $S/kgssapi/gssd.x | grep -v pthread.h > gssd.h" \
 	no-obj no-implicit-rule before-depend local			\
 	clean			"gssd.h"
 gssd_xdr.c			optional kgssapi			\
 	dependency		"$S/kgssapi/gssd.x gssd.h"		\
 	compile-with		"RPCGEN_CPP='${CPP}' rpcgen -c $S/kgssapi/gssd.x -o gssd_xdr.c" \
 	no-implicit-rule before-depend local				\
 	clean			"gssd_xdr.c"
 gssd_clnt.c			optional kgssapi			\
 	dependency		"$S/kgssapi/gssd.x gssd.h"		\
 	compile-with		"RPCGEN_CPP='${CPP}' rpcgen -lM $S/kgssapi/gssd.x | grep -v string.h > gssd_clnt.c" \
 	no-implicit-rule before-depend local				\
 	clean			"gssd_clnt.c"
 kgssapi/gss_accept_sec_context.c optional kgssapi
 kgssapi/gss_add_oid_set_member.c optional kgssapi
 kgssapi/gss_acquire_cred.c	optional kgssapi
 kgssapi/gss_canonicalize_name.c	optional kgssapi
 kgssapi/gss_create_empty_oid_set.c optional kgssapi
 kgssapi/gss_delete_sec_context.c optional kgssapi
 kgssapi/gss_display_status.c	optional kgssapi
 kgssapi/gss_export_name.c	optional kgssapi
 kgssapi/gss_get_mic.c		optional kgssapi
 kgssapi/gss_init_sec_context.c	optional kgssapi
 kgssapi/gss_impl.c		optional kgssapi
 kgssapi/gss_import_name.c	optional kgssapi
 kgssapi/gss_names.c		optional kgssapi
 kgssapi/gss_pname_to_uid.c	optional kgssapi
 kgssapi/gss_release_buffer.c	optional kgssapi
 kgssapi/gss_release_cred.c	optional kgssapi
 kgssapi/gss_release_name.c	optional kgssapi
 kgssapi/gss_release_oid_set.c	optional kgssapi
 kgssapi/gss_set_cred_option.c	optional kgssapi
 kgssapi/gss_test_oid_set_member.c optional kgssapi
 kgssapi/gss_unwrap.c		optional kgssapi
 kgssapi/gss_verify_mic.c	optional kgssapi
 kgssapi/gss_wrap.c		optional kgssapi
 kgssapi/gss_wrap_size_limit.c	optional kgssapi
 kgssapi/gssd_prot.c		optional kgssapi
 kgssapi/krb5/krb5_mech.c	optional kgssapi
 kgssapi/krb5/kcrypto.c		optional kgssapi
 kgssapi/krb5/kcrypto_aes.c	optional kgssapi
 kgssapi/krb5/kcrypto_arcfour.c	optional kgssapi
 kgssapi/krb5/kcrypto_des.c	optional kgssapi
 kgssapi/krb5/kcrypto_des3.c	optional kgssapi
 kgssapi/kgss_if.m		optional kgssapi
 kgssapi/gsstest.c		optional kgssapi_debug
 # These files in libkern/ are those needed by all architectures.  Some
 # of the files in libkern/ are only needed on some architectures, e.g.,
 # libkern/divdi3.c is needed by i386 but not alpha.  Also, some of these
 # routines may be optimized for a particular platform.  In either case,
 # the file should be moved to conf/files.<arch> from here.
 #
 libkern/arc4random.c		standard
 libkern/asprintf.c		standard
 libkern/bcd.c			standard
 libkern/bsearch.c		standard
 libkern/crc32.c			standard
 libkern/explicit_bzero.c	standard
 libkern/fnmatch.c		standard
 libkern/iconv.c			optional libiconv
 libkern/iconv_converter_if.m	optional libiconv
 libkern/iconv_ucs.c		optional libiconv
 libkern/iconv_xlat.c		optional libiconv
 libkern/iconv_xlat16.c		optional libiconv
 libkern/inet_aton.c		standard
 libkern/inet_ntoa.c		standard
 libkern/inet_ntop.c		standard
 libkern/inet_pton.c		standard
 libkern/jenkins_hash.c		standard
 libkern/murmur3_32.c		standard
 libkern/mcount.c		optional profiling-routine
 libkern/memcchr.c		standard
 libkern/memchr.c		standard
 libkern/memcmp.c		standard
 libkern/memmem.c		optional gdb
 libkern/qsort.c			standard
 libkern/qsort_r.c		standard
 libkern/random.c		standard
 libkern/scanc.c			standard
 libkern/strcasecmp.c		standard
 libkern/strcat.c		standard
 libkern/strchr.c		standard
 libkern/strcmp.c		standard
 libkern/strcpy.c		standard
 libkern/strcspn.c		standard
 libkern/strdup.c		standard
 libkern/strndup.c		standard
 libkern/strlcat.c		standard
 libkern/strlcpy.c		standard
 libkern/strlen.c		standard
 libkern/strncmp.c		standard
 libkern/strncpy.c		standard
 libkern/strnlen.c		standard
 libkern/strrchr.c		standard
 libkern/strsep.c		standard
 libkern/strspn.c		standard
 libkern/strstr.c		standard
 libkern/strtol.c		standard
 libkern/strtoq.c		standard
 libkern/strtoul.c		standard
 libkern/strtouq.c		standard
 libkern/strvalid.c		standard
 libkern/timingsafe_bcmp.c	standard
 libkern/zlib.c			optional crypto | geom_uzip | ipsec | \
 					 mxge | netgraph_deflate | \
 					 ddb_ctf | gzio | geom_uncompress
 net/altq/altq_cbq.c		optional altq
 net/altq/altq_cdnr.c		optional altq
 net/altq/altq_codel.c		optional altq
 net/altq/altq_hfsc.c		optional altq
 net/altq/altq_fairq.c		optional altq
 net/altq/altq_priq.c		optional altq
 net/altq/altq_red.c		optional altq
 net/altq/altq_rio.c		optional altq
 net/altq/altq_rmclass.c		optional altq
 net/altq/altq_subr.c		optional altq
 net/bpf.c			standard
 net/bpf_buffer.c		optional bpf
 net/bpf_jitter.c		optional bpf_jitter
 net/bpf_filter.c		optional bpf | netgraph_bpf
 net/bpf_zerocopy.c		optional bpf
 net/bridgestp.c			optional bridge | if_bridge
 net/flowtable.c			optional flowtable inet | flowtable inet6
 net/ieee8023ad_lacp.c		optional lagg
 net/if.c			standard
 net/if_arcsubr.c		optional arcnet
 net/if_atmsubr.c		optional atm
 net/if_bridge.c			optional bridge inet | if_bridge inet
 net/if_clone.c			standard
 net/if_dead.c			standard
 net/if_debug.c			optional ddb
 net/if_disc.c			optional disc
 net/if_edsc.c			optional edsc
 net/if_enc.c			optional enc inet | enc inet6
 net/if_epair.c			optional epair
 net/if_ethersubr.c		optional ether
 net/if_fddisubr.c		optional fddi
 net/if_fwsubr.c			optional fwip
 net/if_gif.c			optional gif inet | gif inet6 | \
 					 netgraph_gif inet | netgraph_gif inet6
 net/if_gre.c			optional gre inet | gre inet6
 net/if_iso88025subr.c		optional token
 net/if_lagg.c			optional lagg
 net/if_loop.c			optional loop
 net/if_llatbl.c			standard
 net/if_me.c			optional me inet
 net/if_media.c			standard
 net/if_mib.c			standard
 net/if_spppfr.c			optional sppp | netgraph_sppp
 net/if_spppsubr.c		optional sppp | netgraph_sppp
 net/if_stf.c			optional stf inet inet6
 net/if_tun.c			optional tun
 net/if_tap.c			optional tap
 net/if_vlan.c			optional vlan
 net/if_vxlan.c			optional vxlan inet | vxlan inet6
 net/mppcc.c			optional netgraph_mppc_compression
 net/mppcd.c			optional netgraph_mppc_compression
 net/netisr.c			standard
 net/pfil.c			optional ether | inet
 net/radix.c			standard
 net/radix_mpath.c		standard
 net/raw_cb.c			standard
 net/raw_usrreq.c		standard
 net/route.c			standard
 net/rss_config.c		optional inet rss | inet6 rss
 net/rtsock.c			standard
 net/slcompress.c		optional netgraph_vjc | sppp | \
 					 netgraph_sppp
 net/toeplitz.c			optional inet rss | inet6 rss
 net/vnet.c			optional vimage
 net80211/ieee80211.c		optional wlan
 net80211/ieee80211_acl.c	optional wlan wlan_acl
 net80211/ieee80211_action.c	optional wlan
 net80211/ieee80211_ageq.c	optional wlan
 net80211/ieee80211_adhoc.c	optional wlan \
 	compile-with "${NORMAL_C} -Wno-unused-function"
 net80211/ieee80211_ageq.c	optional wlan
 net80211/ieee80211_amrr.c	optional wlan | wlan_amrr
 net80211/ieee80211_crypto.c	optional wlan \
 	compile-with "${NORMAL_C} -Wno-unused-function"
 net80211/ieee80211_crypto_ccmp.c optional wlan wlan_ccmp
 net80211/ieee80211_crypto_none.c optional wlan
 net80211/ieee80211_crypto_tkip.c optional wlan wlan_tkip
 net80211/ieee80211_crypto_wep.c	optional wlan wlan_wep
 net80211/ieee80211_ddb.c	optional wlan ddb
 net80211/ieee80211_dfs.c	optional wlan
 net80211/ieee80211_freebsd.c	optional wlan
 net80211/ieee80211_hostap.c	optional wlan \
 	compile-with "${NORMAL_C} -Wno-unused-function"
 net80211/ieee80211_ht.c		optional wlan
 net80211/ieee80211_hwmp.c	optional wlan ieee80211_support_mesh
 net80211/ieee80211_input.c	optional wlan
 net80211/ieee80211_ioctl.c	optional wlan
 net80211/ieee80211_mesh.c	optional wlan ieee80211_support_mesh \
 	compile-with "${NORMAL_C} -Wno-unused-function"
 net80211/ieee80211_monitor.c	optional wlan
 net80211/ieee80211_node.c	optional wlan
 net80211/ieee80211_output.c	optional wlan
 net80211/ieee80211_phy.c	optional wlan
 net80211/ieee80211_power.c	optional wlan
 net80211/ieee80211_proto.c	optional wlan
 net80211/ieee80211_radiotap.c	optional wlan
 net80211/ieee80211_ratectl.c	optional wlan
 net80211/ieee80211_ratectl_none.c optional wlan
 net80211/ieee80211_regdomain.c	optional wlan
 net80211/ieee80211_rssadapt.c	optional wlan wlan_rssadapt
 net80211/ieee80211_scan.c	optional wlan
 net80211/ieee80211_scan_sta.c	optional wlan
 net80211/ieee80211_sta.c	optional wlan \
 	compile-with "${NORMAL_C} -Wno-unused-function"
 net80211/ieee80211_superg.c	optional wlan ieee80211_support_superg
 net80211/ieee80211_scan_sw.c	optional wlan
 net80211/ieee80211_tdma.c	optional wlan ieee80211_support_tdma
 net80211/ieee80211_wds.c	optional wlan
 net80211/ieee80211_xauth.c	optional wlan wlan_xauth
 net80211/ieee80211_alq.c	optional wlan ieee80211_alq
 netgraph/atm/ccatm/ng_ccatm.c	optional ngatm_ccatm \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 netgraph/atm/ng_atm.c		optional ngatm_atm
 netgraph/atm/ngatmbase.c	optional ngatm_atmbase \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 netgraph/atm/sscfu/ng_sscfu.c	optional ngatm_sscfu \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 netgraph/atm/sscop/ng_sscop.c optional ngatm_sscop \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 netgraph/atm/uni/ng_uni.c	optional ngatm_uni \
 	compile-with "${NORMAL_C} -I$S/contrib/ngatm"
 netgraph/bluetooth/common/ng_bluetooth.c optional netgraph_bluetooth
 netgraph/bluetooth/drivers/bt3c/ng_bt3c_pccard.c optional netgraph_bluetooth_bt3c
 netgraph/bluetooth/drivers/h4/ng_h4.c optional netgraph_bluetooth_h4
 netgraph/bluetooth/drivers/ubt/ng_ubt.c optional netgraph_bluetooth_ubt usb
 netgraph/bluetooth/drivers/ubtbcmfw/ubtbcmfw.c optional netgraph_bluetooth_ubtbcmfw usb
 netgraph/bluetooth/hci/ng_hci_cmds.c optional netgraph_bluetooth_hci
 netgraph/bluetooth/hci/ng_hci_evnt.c optional netgraph_bluetooth_hci
 netgraph/bluetooth/hci/ng_hci_main.c optional netgraph_bluetooth_hci
 netgraph/bluetooth/hci/ng_hci_misc.c optional netgraph_bluetooth_hci
 netgraph/bluetooth/hci/ng_hci_ulpi.c optional netgraph_bluetooth_hci
 netgraph/bluetooth/l2cap/ng_l2cap_cmds.c optional netgraph_bluetooth_l2cap
 netgraph/bluetooth/l2cap/ng_l2cap_evnt.c optional netgraph_bluetooth_l2cap
 netgraph/bluetooth/l2cap/ng_l2cap_llpi.c optional netgraph_bluetooth_l2cap
 netgraph/bluetooth/l2cap/ng_l2cap_main.c optional netgraph_bluetooth_l2cap
 netgraph/bluetooth/l2cap/ng_l2cap_misc.c optional netgraph_bluetooth_l2cap
 netgraph/bluetooth/l2cap/ng_l2cap_ulpi.c optional netgraph_bluetooth_l2cap
 netgraph/bluetooth/socket/ng_btsocket.c optional netgraph_bluetooth_socket
 netgraph/bluetooth/socket/ng_btsocket_hci_raw.c	optional netgraph_bluetooth_socket
 netgraph/bluetooth/socket/ng_btsocket_l2cap.c optional netgraph_bluetooth_socket
 netgraph/bluetooth/socket/ng_btsocket_l2cap_raw.c optional netgraph_bluetooth_socket
 netgraph/bluetooth/socket/ng_btsocket_rfcomm.c optional netgraph_bluetooth_socket
 netgraph/bluetooth/socket/ng_btsocket_sco.c optional netgraph_bluetooth_socket
 netgraph/netflow/netflow.c	optional netgraph_netflow
 netgraph/netflow/netflow_v9.c	optional netgraph_netflow
 netgraph/netflow/ng_netflow.c	optional netgraph_netflow
 netgraph/ng_UI.c		optional netgraph_UI
 netgraph/ng_async.c		optional netgraph_async
 netgraph/ng_atmllc.c		optional netgraph_atmllc
 netgraph/ng_base.c		optional netgraph
 netgraph/ng_bpf.c		optional netgraph_bpf
 netgraph/ng_bridge.c		optional netgraph_bridge
 netgraph/ng_car.c		optional netgraph_car
 netgraph/ng_cisco.c		optional netgraph_cisco
 netgraph/ng_deflate.c		optional netgraph_deflate
 netgraph/ng_device.c		optional netgraph_device
 netgraph/ng_echo.c		optional netgraph_echo
 netgraph/ng_eiface.c		optional netgraph_eiface
 netgraph/ng_ether.c		optional netgraph_ether
 netgraph/ng_ether_echo.c	optional netgraph_ether_echo
 netgraph/ng_frame_relay.c	optional netgraph_frame_relay
 netgraph/ng_gif.c		optional netgraph_gif inet6 | netgraph_gif inet
 netgraph/ng_gif_demux.c		optional netgraph_gif_demux
 netgraph/ng_hole.c		optional netgraph_hole
 netgraph/ng_iface.c		optional netgraph_iface
 netgraph/ng_ip_input.c		optional netgraph_ip_input
 netgraph/ng_ipfw.c		optional netgraph_ipfw inet ipfirewall
 netgraph/ng_ksocket.c		optional netgraph_ksocket
 netgraph/ng_l2tp.c		optional netgraph_l2tp
 netgraph/ng_lmi.c		optional netgraph_lmi
 netgraph/ng_mppc.c		optional netgraph_mppc_compression | \
 					 netgraph_mppc_encryption
 netgraph/ng_nat.c		optional netgraph_nat inet libalias
 netgraph/ng_one2many.c		optional netgraph_one2many
 netgraph/ng_parse.c		optional netgraph
 netgraph/ng_patch.c		optional netgraph_patch
 netgraph/ng_pipe.c		optional netgraph_pipe
 netgraph/ng_ppp.c		optional netgraph_ppp
 netgraph/ng_pppoe.c		optional netgraph_pppoe
 netgraph/ng_pptpgre.c		optional netgraph_pptpgre
 netgraph/ng_pred1.c		optional netgraph_pred1
 netgraph/ng_rfc1490.c		optional netgraph_rfc1490
 netgraph/ng_socket.c		optional netgraph_socket
 netgraph/ng_split.c		optional netgraph_split
 netgraph/ng_sppp.c		optional netgraph_sppp
 netgraph/ng_tag.c		optional netgraph_tag
 netgraph/ng_tcpmss.c		optional netgraph_tcpmss
 netgraph/ng_tee.c		optional netgraph_tee
 netgraph/ng_tty.c		optional netgraph_tty
 netgraph/ng_vjc.c		optional netgraph_vjc
 netgraph/ng_vlan.c		optional netgraph_vlan
 netinet/accf_data.c		optional accept_filter_data inet
 netinet/accf_dns.c		optional accept_filter_dns inet
 netinet/accf_http.c		optional accept_filter_http inet
 netinet/if_atm.c		optional atm
 netinet/if_ether.c		optional inet ether
 netinet/igmp.c			optional inet
 netinet/in.c			optional inet
 netinet/in_debug.c		optional inet ddb
 netinet/in_kdtrace.c		optional inet | inet6
 netinet/ip_carp.c		optional inet carp | inet6 carp
 netinet/in_fib.c		optional inet
 netinet/in_gif.c		optional gif inet | netgraph_gif inet
 netinet/ip_gre.c		optional gre inet
 netinet/ip_id.c			optional inet
 netinet/in_mcast.c		optional inet
 netinet/in_pcb.c		optional inet | inet6
 netinet/in_pcbgroup.c		optional inet pcbgroup | inet6 pcbgroup
 netinet/in_proto.c		optional inet | inet6
 netinet/in_rmx.c		optional inet
 netinet/in_rss.c		optional inet rss
 netinet/ip_divert.c		optional inet ipdivert ipfirewall
 netinet/ip_ecn.c		optional inet | inet6
 netinet/ip_encap.c		optional inet | inet6
 netinet/ip_fastfwd.c		optional inet
 netinet/ip_icmp.c		optional inet | inet6
 netinet/ip_input.c		optional inet
 netinet/ip_ipsec.c		optional inet ipsec
 netinet/ip_mroute.c		optional mrouting inet
 netinet/ip_options.c		optional inet
 netinet/ip_output.c		optional inet
 netinet/ip_reass.c		optional inet
 netinet/raw_ip.c		optional inet | inet6
 netinet/cc/cc.c			optional inet | inet6
 netinet/cc/cc_newreno.c		optional inet | inet6
 netinet/sctp_asconf.c		optional inet sctp | inet6 sctp
 netinet/sctp_auth.c		optional inet sctp | inet6 sctp
 netinet/sctp_bsd_addr.c		optional inet sctp | inet6 sctp
 netinet/sctp_cc_functions.c	optional inet sctp | inet6 sctp
 netinet/sctp_crc32.c		optional inet sctp | inet6 sctp
 netinet/sctp_indata.c		optional inet sctp | inet6 sctp
 netinet/sctp_input.c		optional inet sctp | inet6 sctp
 netinet/sctp_output.c		optional inet sctp | inet6 sctp
 netinet/sctp_pcb.c		optional inet sctp | inet6 sctp
 netinet/sctp_peeloff.c		optional inet sctp | inet6 sctp
 netinet/sctp_ss_functions.c	optional inet sctp | inet6 sctp
 netinet/sctp_syscalls.c		optional inet sctp | inet6 sctp
 netinet/sctp_sysctl.c		optional inet sctp | inet6 sctp
 netinet/sctp_timer.c		optional inet sctp | inet6 sctp
 netinet/sctp_usrreq.c		optional inet sctp | inet6 sctp
 netinet/sctputil.c		optional inet sctp | inet6 sctp
 netinet/siftr.c			optional inet siftr alq | inet6 siftr alq
 netinet/tcp_debug.c		optional tcpdebug
 netinet/tcp_fastopen.c		optional inet tcp_rfc7413 | inet6 tcp_rfc7413
 netinet/tcp_hostcache.c		optional inet | inet6
 netinet/tcp_input.c		optional inet | inet6
 netinet/tcp_lro.c		optional inet | inet6
 netinet/tcp_output.c		optional inet | inet6
 netinet/tcp_offload.c		optional tcp_offload inet | tcp_offload inet6
 netinet/tcp_pcap.c		optional inet tcppcap | inet6 tcppcap
 netinet/tcp_reass.c		optional inet | inet6
 netinet/tcp_sack.c		optional inet | inet6
 netinet/tcp_subr.c		optional inet | inet6
 netinet/tcp_syncache.c		optional inet | inet6
 netinet/tcp_timer.c		optional inet | inet6
 netinet/tcp_timewait.c		optional inet | inet6
 netinet/tcp_usrreq.c		optional inet | inet6
 netinet/udp_usrreq.c		optional inet | inet6
 netinet/libalias/alias.c	optional libalias inet | netgraph_nat inet
 netinet/libalias/alias_db.c	optional libalias inet | netgraph_nat inet
 netinet/libalias/alias_mod.c	optional libalias | netgraph_nat
 netinet/libalias/alias_proxy.c	optional libalias inet | netgraph_nat inet
 netinet/libalias/alias_util.c	optional libalias inet | netgraph_nat inet
 netinet/libalias/alias_sctp.c	optional libalias inet | netgraph_nat inet
 netinet6/dest6.c		optional inet6
 netinet6/frag6.c		optional inet6
 netinet6/icmp6.c		optional inet6
 netinet6/in6.c			optional inet6
 netinet6/in6_cksum.c		optional inet6
 netinet6/in6_fib.c		optional inet6
 netinet6/in6_gif.c		optional gif inet6 | netgraph_gif inet6
 netinet6/in6_ifattach.c		optional inet6
 netinet6/in6_mcast.c		optional inet6
 netinet6/in6_pcb.c		optional inet6
 netinet6/in6_pcbgroup.c		optional inet6 pcbgroup
 netinet6/in6_proto.c		optional inet6
 netinet6/in6_rmx.c		optional inet6
 netinet6/in6_rss.c		optional inet6 rss
 netinet6/in6_src.c		optional inet6
 netinet6/ip6_forward.c		optional inet6
 netinet6/ip6_gre.c		optional gre inet6
 netinet6/ip6_id.c		optional inet6
 netinet6/ip6_input.c		optional inet6
 netinet6/ip6_mroute.c		optional mrouting inet6
 netinet6/ip6_output.c		optional inet6
 netinet6/ip6_ipsec.c		optional inet6 ipsec
 netinet6/mld6.c			optional inet6
 netinet6/nd6.c			optional inet6
 netinet6/nd6_nbr.c		optional inet6
 netinet6/nd6_rtr.c		optional inet6
 netinet6/raw_ip6.c		optional inet6
 netinet6/route6.c		optional inet6
 netinet6/scope6.c		optional inet6
 netinet6/sctp6_usrreq.c		optional inet6 sctp
 netinet6/udp6_usrreq.c		optional inet6
 netipsec/ipsec.c		optional ipsec inet | ipsec inet6
 netipsec/ipsec_input.c		optional ipsec inet | ipsec inet6
 netipsec/ipsec_mbuf.c		optional ipsec inet | ipsec inet6
 netipsec/ipsec_output.c		optional ipsec inet | ipsec inet6
 netipsec/key.c			optional ipsec inet | ipsec inet6
 netipsec/key_debug.c		optional ipsec inet | ipsec inet6
 netipsec/keysock.c		optional ipsec inet | ipsec inet6
 netipsec/xform_ah.c		optional ipsec inet | ipsec inet6
 netipsec/xform_esp.c		optional ipsec inet | ipsec inet6
 netipsec/xform_ipcomp.c		optional ipsec inet | ipsec inet6
 netipsec/xform_tcp.c		optional ipsec inet tcp_signature | \
 					 ipsec inet6 tcp_signature
 netnatm/natm.c			optional natm
 netnatm/natm_pcb.c		optional natm
 netnatm/natm_proto.c		optional natm
 netpfil/ipfw/dn_heap.c		optional inet dummynet
 netpfil/ipfw/dn_sched_fifo.c	optional inet dummynet
 netpfil/ipfw/dn_sched_prio.c	optional inet dummynet
 netpfil/ipfw/dn_sched_qfq.c	optional inet dummynet
 netpfil/ipfw/dn_sched_rr.c	optional inet dummynet
 netpfil/ipfw/dn_sched_wf2q.c	optional inet dummynet
 netpfil/ipfw/ip_dummynet.c	optional inet dummynet
 netpfil/ipfw/ip_dn_io.c		optional inet dummynet
 netpfil/ipfw/ip_dn_glue.c	optional inet dummynet
 netpfil/ipfw/ip_fw2.c		optional inet ipfirewall
 netpfil/ipfw/ip_fw_dynamic.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_log.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_pfil.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_sockopt.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_table.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_table_algo.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_table_value.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_iface.c	optional inet ipfirewall
 netpfil/ipfw/ip_fw_nat.c	optional inet ipfirewall_nat
 netpfil/pf/if_pflog.c		optional pflog pf inet
 netpfil/pf/if_pfsync.c		optional pfsync pf inet
 netpfil/pf/pf.c			optional pf inet
 netpfil/pf/pf_if.c		optional pf inet
 netpfil/pf/pf_ioctl.c		optional pf inet
 netpfil/pf/pf_lb.c		optional pf inet
 netpfil/pf/pf_norm.c		optional pf inet
 netpfil/pf/pf_osfp.c		optional pf inet
 netpfil/pf/pf_ruleset.c		optional pf inet
 netpfil/pf/pf_table.c		optional pf inet
 netpfil/pf/in4_cksum.c		optional pf inet
 netsmb/smb_conn.c		optional netsmb
 netsmb/smb_crypt.c		optional netsmb
 netsmb/smb_dev.c		optional netsmb
 netsmb/smb_iod.c		optional netsmb
 netsmb/smb_rq.c			optional netsmb
 netsmb/smb_smb.c		optional netsmb
 netsmb/smb_subr.c		optional netsmb
 netsmb/smb_trantcp.c		optional netsmb
 netsmb/smb_usr.c		optional netsmb
 nfs/bootp_subr.c		optional bootp nfscl
 nfs/krpc_subr.c			optional bootp nfscl
 nfs/nfs_diskless.c		optional nfscl nfs_root
 nfs/nfs_fha.c			optional nfsd
 nfs/nfs_lock.c			optional nfscl | nfslockd | nfsd
 nfs/nfs_nfssvc.c		optional nfscl | nfsd
 nlm/nlm_advlock.c		optional nfslockd | nfsd
 nlm/nlm_prot_clnt.c		optional nfslockd | nfsd
 nlm/nlm_prot_impl.c		optional nfslockd | nfsd
 nlm/nlm_prot_server.c		optional nfslockd | nfsd
 nlm/nlm_prot_svc.c		optional nfslockd | nfsd
 nlm/nlm_prot_xdr.c		optional nfslockd | nfsd
 nlm/sm_inter_xdr.c		optional nfslockd | nfsd
 
 # Linux Kernel Programming Interface
 compat/linuxkpi/common/src/linux_kmod.c		optional compat_linuxkpi \
 	no-depend compile-with "${LINUXKPI_C}"
 compat/linuxkpi/common/src/linux_compat.c	optional compat_linuxkpi \
 	no-depend compile-with "${LINUXKPI_C}"
 compat/linuxkpi/common/src/linux_pci.c		optional compat_linuxkpi pci \
 	no-depend compile-with "${LINUXKPI_C}"
 compat/linuxkpi/common/src/linux_idr.c		optional compat_linuxkpi \
 	no-depend compile-with "${LINUXKPI_C}"
 compat/linuxkpi/common/src/linux_radix.c	optional compat_linuxkpi \
 	no-depend compile-with "${LINUXKPI_C}"
 compat/linuxkpi/common/src/linux_usb.c		optional compat_linuxkpi usb \
 	no-depend compile-with "${LINUXKPI_C}"
 
 # OpenFabrics Enterprise Distribution (Infiniband)
 ofed/drivers/infiniband/core/addr.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/agent.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/cache.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 # XXX Mad.c must be ordered before cm.c for sysinit sets to occur in
 # the correct order.
 ofed/drivers/infiniband/core/mad.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/cm.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/ -Wno-unused-function"
 ofed/drivers/infiniband/core/cma.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/device.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/fmr_pool.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/iwcm.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/mad_rmpp.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/multicast.c	optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/packer.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/peer_mem.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/sa_query.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/smi.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/sysfs.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/ucm.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/ucma.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/ud_header.c	optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/umem.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/user_mad.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/uverbs_cmd.c	optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/uverbs_main.c	optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/uverbs_marshall.c	optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 ofed/drivers/infiniband/core/verbs.c		optional ofed		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/core/"
 
 ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c	optional ipoib		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/"
 #ofed/drivers/infiniband/ulp/ipoib/ipoib_fs.c	optional ipoib		\
 #	no-depend							\
 #	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/"
 ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c	optional ipoib		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/"
 ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c	optional ipoib		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/"
 ofed/drivers/infiniband/ulp/ipoib/ipoib_multicast.c	optional ipoib	\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/"
 ofed/drivers/infiniband/ulp/ipoib/ipoib_verbs.c	optional ipoib		\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/"
 #ofed/drivers/infiniband/ulp/ipoib/ipoib_vlan.c	optional ipoib		\
 #	no-depend							\
 #	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/ipoib/"
 
 ofed/drivers/infiniband/ulp/sdp/sdp_bcopy.c	optional sdp inet	\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/"
 ofed/drivers/infiniband/ulp/sdp/sdp_main.c	optional sdp inet 	\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/"
 ofed/drivers/infiniband/ulp/sdp/sdp_rx.c	optional sdp inet 	\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/"
 ofed/drivers/infiniband/ulp/sdp/sdp_cma.c	optional sdp inet 	\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/"
 ofed/drivers/infiniband/ulp/sdp/sdp_tx.c	optional sdp inet 	\
 	no-depend							\
 	compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/ulp/sdp/"
 
 ofed/drivers/infiniband/hw/mlx4/alias_GUID.c    optional mlx4ib         \
         no-depend obj-prefix "mlx4ib_"                                  \
         compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/mcg.c           optional mlx4ib         \
         no-depend obj-prefix "mlx4ib_"                                  \
         compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/sysfs.c         optional mlx4ib         \
         no-depend obj-prefix "mlx4ib_"                                  \
         compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/cm.c            optional mlx4ib         \
         no-depend obj-prefix "mlx4ib_"                                  \
         compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/ah.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/cq.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/doorbell.c	optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/mad.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/main.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/mlx4_exp.c	optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/mr.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/qp.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/srq.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 ofed/drivers/infiniband/hw/mlx4/wc.c		optional mlx4ib		\
 	no-depend obj-prefix "mlx4ib_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/infiniband/hw/mlx4/"
 
 ofed/drivers/net/mlx4/alloc.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/catas.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/cmd.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/cq.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/eq.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/fw.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/icm.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/intf.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/main.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/mcg.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/ -Wno-unused"
 ofed/drivers/net/mlx4/mr.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/pd.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/port.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/profile.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/qp.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/reset.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/sense.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/srq.c			optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/resource_tracker.c        optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/sys_tune.c		optional mlx4ib | mlxen	\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 
 ofed/drivers/net/mlx4/en_cq.c			optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/en_main.c			optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/en_netdev.c		optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/en_port.c			optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/en_resources.c		optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/en_rx.c			optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 ofed/drivers/net/mlx4/en_tx.c			optional mlxen		\
 	no-depend obj-prefix "mlx4_"					\
 	compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 
 dev/mlx5/mlx5_core/mlx5_alloc.c			optional mlx5 pci	\
 	no-depend compile-with "${OFED_C}"
 dev/mlx5/mlx5_core/mlx5_cmd.c			optional mlx5 pci	\
 	no-depend compile-with "${OFED_C}"
 dev/mlx5/mlx5_core/mlx5_cq.c			optional mlx5 pci	\
 	no-depend compile-with "${OFED_C}"
 dev/mlx5/mlx5_core/mlx5_eq.c			optional mlx5 pci	\
 	no-depend compile-with "${OFED_C}"
 dev/mlx5/mlx5_core/mlx5_flow_table.c		optional mlx5 pci	\
 	no-depend compile-with "${OFED_C}"
 dev/mlx5/mlx5_core/mlx5_fw.c			optional mlx5 pci	\
 	no-depend compile-with "${OFED_C}"
 dev/mlx5/mlx5_core/mlx5_health.c		optional mlx5 pci	\
 	no-depend compile-with "${OFED_C}"
 dev/mlx5/mlx5_core/mlx5_mad.c			optional mlx5 pci	\
 	no-depend compile-with "${OFED_C}"
 dev/mlx5/mlx5_core/mlx5_main.c			optional mlx5 pci	\
 	no-depend compile-with "${OFED_C}"
 dev/mlx5/mlx5_core/mlx5_mcg.c			optional mlx5 pci	\
 	no-depend compile-with "${OFED_C}"
 dev/mlx5/mlx5_core/mlx5_mr.c			optional mlx5 pci	\
 	no-depend compile-with "${OFED_C}"
 dev/mlx5/mlx5_core/mlx5_pagealloc.c		optional mlx5 pci	\
 	no-depend compile-with "${OFED_C}"
 dev/mlx5/mlx5_core/mlx5_pd.c			optional mlx5 pci	\
 	no-depend compile-with "${OFED_C}"
 dev/mlx5/mlx5_core/mlx5_port.c			optional mlx5 pci	\
 	no-depend compile-with "${OFED_C}"
 dev/mlx5/mlx5_core/mlx5_qp.c			optional mlx5 pci	\
 	no-depend compile-with "${OFED_C}"
 dev/mlx5/mlx5_core/mlx5_srq.c			optional mlx5 pci	\
 	no-depend compile-with "${OFED_C}"
 dev/mlx5/mlx5_core/mlx5_transobj.c		optional mlx5 pci	\
 	no-depend compile-with "${OFED_C}"
 dev/mlx5/mlx5_core/mlx5_uar.c			optional mlx5 pci	\
 	no-depend compile-with "${OFED_C}"
 dev/mlx5/mlx5_core/mlx5_vport.c			optional mlx5 pci	\
 	no-depend compile-with "${OFED_C}"
 dev/mlx5/mlx5_core/mlx5_wq.c			optional mlx5 pci	\
 	no-depend compile-with "${OFED_C}"
 
 dev/mlx5/mlx5_en/mlx5_en_ethtool.c		optional mlx5en pci inet inet6	\
 	no-depend compile-with "${OFED_C}"
 dev/mlx5/mlx5_en/mlx5_en_main.c			optional mlx5en pci inet inet6	\
 	no-depend compile-with "${OFED_C}"
 dev/mlx5/mlx5_en/mlx5_en_tx.c			optional mlx5en pci inet inet6	\
 	no-depend compile-with "${OFED_C}"
 dev/mlx5/mlx5_en/mlx5_en_flow_table.c		optional mlx5en pci inet inet6	\
 	no-depend compile-with "${OFED_C}"
 dev/mlx5/mlx5_en/mlx5_en_rx.c			optional mlx5en pci inet inet6	\
 	no-depend compile-with "${OFED_C}"
 dev/mlx5/mlx5_en/mlx5_en_txrx.c			optional mlx5en pci inet inet6	\
 	no-depend compile-with "${OFED_C}"
 
 ofed/drivers/infiniband/hw/mthca/mthca_allocator.c	optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_av.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_catas.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_cmd.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_cq.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_eq.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_mad.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_main.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_mcg.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_memfree.c	optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_mr.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_pd.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_profile.c	optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_provider.c	optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_qp.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_reset.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_srq.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_uar.c		optional mthca	\
 	no-depend compile-with "${OFED_C}"
 
 # crypto support
 opencrypto/cast.c		optional crypto | ipsec
 opencrypto/criov.c		optional crypto | ipsec
 opencrypto/crypto.c		optional crypto | ipsec
 opencrypto/cryptodev.c		optional cryptodev
 opencrypto/cryptodev_if.m	optional crypto | ipsec
 opencrypto/cryptosoft.c		optional crypto | ipsec
 opencrypto/cryptodeflate.c	optional crypto | ipsec
 opencrypto/gmac.c		optional crypto | ipsec
 opencrypto/gfmult.c		optional crypto | ipsec
 opencrypto/rmd160.c		optional crypto | ipsec
 opencrypto/skipjack.c		optional crypto | ipsec
 opencrypto/xform.c		optional crypto | ipsec
 rpc/auth_none.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/auth_unix.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/authunix_prot.c		optional krpc | nfslockd | nfscl | nfsd
 rpc/clnt_bck.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/clnt_dg.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/clnt_rc.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/clnt_vc.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/getnetconfig.c		optional krpc | nfslockd | nfscl | nfsd
 rpc/replay.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/rpc_callmsg.c		optional krpc | nfslockd | nfscl | nfsd
 rpc/rpc_generic.c		optional krpc | nfslockd | nfscl | nfsd
 rpc/rpc_prot.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/rpcb_clnt.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/rpcb_prot.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/svc.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/svc_auth.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/svc_auth_unix.c		optional krpc | nfslockd | nfscl | nfsd
 rpc/svc_dg.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/svc_generic.c		optional krpc | nfslockd | nfscl | nfsd
 rpc/svc_vc.c			optional krpc | nfslockd | nfscl | nfsd
 rpc/rpcsec_gss/rpcsec_gss.c	optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi
 rpc/rpcsec_gss/rpcsec_gss_conf.c optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi
 rpc/rpcsec_gss/rpcsec_gss_misc.c optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi
 rpc/rpcsec_gss/rpcsec_gss_prot.c optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi
 rpc/rpcsec_gss/svc_rpcsec_gss.c	optional krpc kgssapi | nfslockd kgssapi | nfscl kgssapi | nfsd kgssapi
 security/audit/audit.c		optional audit
 security/audit/audit_arg.c	optional audit
 security/audit/audit_bsm.c	optional audit
 security/audit/audit_bsm_klib.c	optional audit
 security/audit/audit_pipe.c	optional audit
 security/audit/audit_syscalls.c	standard
 security/audit/audit_trigger.c	optional audit
 security/audit/audit_worker.c	optional audit
 security/audit/bsm_domain.c	optional audit
 security/audit/bsm_errno.c	optional audit
 security/audit/bsm_fcntl.c	optional audit
 security/audit/bsm_socket_type.c	optional audit
 security/audit/bsm_token.c	optional audit
 security/mac/mac_audit.c	optional mac audit
 security/mac/mac_cred.c		optional mac
 security/mac/mac_framework.c	optional mac
 security/mac/mac_inet.c		optional mac inet | mac inet6
 security/mac/mac_inet6.c	optional mac inet6
 security/mac/mac_label.c	optional mac
 security/mac/mac_net.c		optional mac
 security/mac/mac_pipe.c		optional mac
 security/mac/mac_posix_sem.c	optional mac
 security/mac/mac_posix_shm.c	optional mac
 security/mac/mac_priv.c		optional mac
 security/mac/mac_process.c	optional mac
 security/mac/mac_socket.c	optional mac
 security/mac/mac_syscalls.c	standard
 security/mac/mac_system.c	optional mac
 security/mac/mac_sysv_msg.c	optional mac
 security/mac/mac_sysv_sem.c	optional mac
 security/mac/mac_sysv_shm.c	optional mac
 security/mac/mac_vfs.c		optional mac
 security/mac_biba/mac_biba.c	optional mac_biba
 security/mac_bsdextended/mac_bsdextended.c	optional mac_bsdextended
 security/mac_bsdextended/ugidfw_system.c	optional mac_bsdextended
 security/mac_bsdextended/ugidfw_vnode.c		optional mac_bsdextended
 security/mac_ifoff/mac_ifoff.c	optional mac_ifoff
 security/mac_lomac/mac_lomac.c	optional mac_lomac
 security/mac_mls/mac_mls.c	optional mac_mls
 security/mac_none/mac_none.c	optional mac_none
 security/mac_partition/mac_partition.c optional mac_partition
 security/mac_portacl/mac_portacl.c optional mac_portacl
 security/mac_seeotheruids/mac_seeotheruids.c optional mac_seeotheruids
 security/mac_stub/mac_stub.c	optional mac_stub
 security/mac_test/mac_test.c	optional mac_test
 teken/teken.c			optional sc | vt
 ufs/ffs/ffs_alloc.c		optional ffs
 ufs/ffs/ffs_balloc.c		optional ffs
 ufs/ffs/ffs_inode.c		optional ffs
 ufs/ffs/ffs_snapshot.c		optional ffs
 ufs/ffs/ffs_softdep.c		optional ffs
 ufs/ffs/ffs_subr.c		optional ffs
 ufs/ffs/ffs_tables.c		optional ffs
 ufs/ffs/ffs_vfsops.c		optional ffs
 ufs/ffs/ffs_vnops.c		optional ffs
 ufs/ffs/ffs_rawread.c		optional ffs directio
 ufs/ffs/ffs_suspend.c		optional ffs
 ufs/ufs/ufs_acl.c		optional ffs
 ufs/ufs/ufs_bmap.c		optional ffs
 ufs/ufs/ufs_dirhash.c		optional ffs
 ufs/ufs/ufs_extattr.c		optional ffs
 ufs/ufs/ufs_gjournal.c		optional ffs UFS_GJOURNAL
 ufs/ufs/ufs_inode.c		optional ffs
 ufs/ufs/ufs_lookup.c		optional ffs
 ufs/ufs/ufs_quota.c		optional ffs
 ufs/ufs/ufs_vfsops.c		optional ffs
 ufs/ufs/ufs_vnops.c		optional ffs
 vm/default_pager.c		standard
 vm/device_pager.c		standard
 vm/phys_pager.c			standard
 vm/redzone.c			optional DEBUG_REDZONE
 vm/sg_pager.c			standard
 vm/swap_pager.c			standard
 vm/uma_core.c			standard
 vm/uma_dbg.c			standard
 vm/memguard.c			optional DEBUG_MEMGUARD
 vm/vm_fault.c			standard
 vm/vm_glue.c			standard
 vm/vm_init.c			standard
 vm/vm_kern.c			standard
 vm/vm_map.c			standard
 vm/vm_meter.c			standard
 vm/vm_mmap.c			standard
 vm/vm_object.c			standard
 vm/vm_page.c			standard
 vm/vm_pageout.c			standard
 vm/vm_pager.c			standard
 vm/vm_phys.c			standard
 vm/vm_radix.c			standard
 vm/vm_reserv.c			standard
 vm/vm_domain.c			standard
 vm/vm_unix.c			standard
 vm/vm_zeroidle.c		standard
 vm/vnode_pager.c		standard
 xen/features.c			optional xenhvm
 xen/xenbus/xenbus_if.m		optional xenhvm
 xen/xenbus/xenbus.c		optional xenhvm
 xen/xenbus/xenbusb_if.m		optional xenhvm
 xen/xenbus/xenbusb.c		optional xenhvm
 xen/xenbus/xenbusb_front.c	optional xenhvm
 xen/xenbus/xenbusb_back.c	optional xenhvm
 xen/xenmem/xenmem_if.m		optional xenhvm
 xdr/xdr.c			optional krpc | nfslockd | nfscl | nfsd
 xdr/xdr_array.c			optional krpc | nfslockd | nfscl | nfsd
 xdr/xdr_mbuf.c			optional krpc | nfslockd | nfscl | nfsd
 xdr/xdr_mem.c			optional krpc | nfslockd | nfscl | nfsd
 xdr/xdr_reference.c		optional krpc | nfslockd | nfscl | nfsd
 xdr/xdr_sizeof.c		optional krpc | nfslockd | nfscl | nfsd
Index: projects/release-pkg/sys/conf
===================================================================
--- projects/release-pkg/sys/conf	(revision 293335)
+++ projects/release-pkg/sys/conf	(revision 293336)

Property changes on: projects/release-pkg/sys/conf
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys/conf:r289091-289119,289158,289371-289384,293171-293335
Index: projects/release-pkg/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c
===================================================================
--- projects/release-pkg/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c	(revision 293335)
+++ projects/release-pkg/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c	(revision 293336)
@@ -1,1724 +1,1719 @@
 /**************************************************************************
 
 Copyright (c) 2007, Chelsio Inc.
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 
  1. Redistributions of source code must retain the above copyright notice,
     this list of conditions and the following disclaimer.
 
  2. Neither the name of the Chelsio Corporation nor the names of its
     contributors may be used to endorse or promote products derived from
     this software without specific prior written permission.
 
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 
 ***************************************************************************/
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 
 #ifdef TCP_OFFLOAD
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/bus.h>
 #include <sys/pciio.h>
 #include <sys/conf.h>
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <sys/bus_dma.h>
 #include <sys/rman.h>
 #include <sys/ioccom.h>
 #include <sys/mbuf.h>
 #include <sys/rwlock.h>
 #include <sys/linker.h>
 #include <sys/firmware.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/queue.h>
 #include <sys/taskqueue.h>
 #include <sys/proc.h>
 #include <sys/uio.h>
 
 #include <net/route.h>
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
+#include <netinet/in_fib.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp.h>
 #include <netinet/tcpip.h>
 
 #include <rdma/ib_verbs.h>
 #include <linux/idr.h>
 #include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
 
 #include <cxgb_include.h>
 #include <ulp/tom/cxgb_tom.h>
 #include <ulp/tom/cxgb_toepcb.h>
 #include <ulp/iw_cxgb/iw_cxgb_ib_intfc.h>
 #include <rdma/ib_verbs.h>
 #include <linux/idr.h>
 
 #include <ulp/iw_cxgb/iw_cxgb_wr.h>
 #include <ulp/iw_cxgb/iw_cxgb_hal.h>
 #include <ulp/iw_cxgb/iw_cxgb_provider.h>
 #include <ulp/iw_cxgb/iw_cxgb_cm.h>
 #include <ulp/iw_cxgb/iw_cxgb.h>
 
 #ifdef KTR
 static char *states[] = {
 	"idle",
 	"listen",
 	"connecting",
 	"mpa_wait_req",
 	"mpa_req_sent",
 	"mpa_req_rcvd",
 	"mpa_rep_sent",
 	"fpdu_mode",
 	"aborting",
 	"closing",
 	"moribund",
 	"dead",
 	NULL,
 };
 #endif
 
 SYSCTL_NODE(_hw, OID_AUTO, iw_cxgb, CTLFLAG_RD, 0, "iw_cxgb driver parameters");
 
 static int ep_timeout_secs = 60;
 SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, ep_timeout_secs, CTLFLAG_RWTUN, &ep_timeout_secs, 0,
     "CM Endpoint operation timeout in seconds (default=60)");
 
 static int mpa_rev = 1;
 SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, mpa_rev, CTLFLAG_RWTUN, &mpa_rev, 0,
     "MPA Revision, 0 supports amso1100, 1 is spec compliant. (default=1)");
 
 static int markers_enabled = 0;
 SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, markers_enabled, CTLFLAG_RWTUN, &markers_enabled, 0,
     "Enable MPA MARKERS (default(0)=disabled)");
 
 static int crc_enabled = 1;
 SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, crc_enabled, CTLFLAG_RWTUN, &crc_enabled, 0,
     "Enable MPA CRC (default(1)=enabled)");
 
 static int rcv_win = 256 * 1024;
 SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, rcv_win, CTLFLAG_RWTUN, &rcv_win, 0,
     "TCP receive window in bytes (default=256KB)");
 
 static int snd_win = 32 * 1024;
 SYSCTL_INT(_hw_iw_cxgb, OID_AUTO, snd_win, CTLFLAG_RWTUN, &snd_win, 0,
     "TCP send window in bytes (default=32KB)");
 
 static unsigned int nocong = 0;
 SYSCTL_UINT(_hw_iw_cxgb, OID_AUTO, nocong, CTLFLAG_RWTUN, &nocong, 0,
     "Turn off congestion control (default=0)");
 
 static unsigned int cong_flavor = 1;
 SYSCTL_UINT(_hw_iw_cxgb, OID_AUTO, cong_flavor, CTLFLAG_RWTUN, &cong_flavor, 0,
     "TCP Congestion control flavor (default=1)");
 
 static void ep_timeout(void *arg);
 static void connect_reply_upcall(struct iwch_ep *ep, int status);
 static int iwch_so_upcall(struct socket *so, void *arg, int waitflag);
 
 /*
  * Cruft to offload socket upcalls onto thread.
  */
 static struct mtx req_lock;
 static TAILQ_HEAD(iwch_ep_list, iwch_ep_common) req_list;
 static struct task iw_cxgb_task;
 static struct taskqueue *iw_cxgb_taskq;
 static void process_req(void *ctx, int pending);
 
 static void
 start_ep_timer(struct iwch_ep *ep)
 {
 	CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
 	if (callout_pending(&ep->timer)) {
 		CTR2(KTR_IW_CXGB, "%s stopped / restarted timer ep %p", __FUNCTION__, ep);
 		callout_deactivate(&ep->timer);
 		callout_drain(&ep->timer);
 	} else {
 		/*
 		 * XXX this looks racy
 		 */
 		get_ep(&ep->com);
 		callout_init(&ep->timer, 1);
 	}
 	callout_reset(&ep->timer, ep_timeout_secs * hz, ep_timeout, ep);
 }
 
 static void
 stop_ep_timer(struct iwch_ep *ep)
 {
 	CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
 	if (!callout_pending(&ep->timer)) {
 		CTR3(KTR_IW_CXGB, "%s timer stopped when its not running!  ep %p state %u\n",
                        __func__, ep, ep->com.state);
 		return;
 	}
 	callout_drain(&ep->timer);
 	put_ep(&ep->com);
 }
 
 static int
 set_tcpinfo(struct iwch_ep *ep)
 {
 	struct socket *so = ep->com.so;
 	struct inpcb *inp = sotoinpcb(so);
 	struct tcpcb *tp;
 	struct toepcb *toep;
 	int rc = 0;
 
 	INP_WLOCK(inp);
 	tp = intotcpcb(inp);
 
 	if ((tp->t_flags & TF_TOE) == 0) {
 		rc = EINVAL;
 		printf("%s: connection NOT OFFLOADED!\n", __func__);
 		goto done;
 	}
 	toep = tp->t_toe;
 
 	ep->hwtid = toep->tp_tid;
 	ep->snd_seq = tp->snd_nxt;
 	ep->rcv_seq = tp->rcv_nxt;
 	ep->emss = tp->t_maxseg;
 	if (ep->emss < 128)
 		ep->emss = 128;
 done:
 	INP_WUNLOCK(inp);
 	return (rc);
 
 }
 
 static enum iwch_ep_state
 state_read(struct iwch_ep_common *epc)
 {
 	enum iwch_ep_state state;
 
 	mtx_lock(&epc->lock);
 	state = epc->state;
 	mtx_unlock(&epc->lock);
 	return state;
 }
 
 static void
 __state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
 {
 	epc->state = new;
 }
 
 static void
 state_set(struct iwch_ep_common *epc, enum iwch_ep_state new)
 {
 
 	mtx_lock(&epc->lock);
 	CTR3(KTR_IW_CXGB, "%s - %s -> %s", __FUNCTION__, states[epc->state], states[new]);
 	__state_set(epc, new);
 	mtx_unlock(&epc->lock);
 	return;
 }
 
 static void *
 alloc_ep(int size, int flags)
 {
 	struct iwch_ep_common *epc;
 
 	epc = malloc(size, M_DEVBUF, flags);
 	if (epc) {
 		memset(epc, 0, size);
 		refcount_init(&epc->refcount, 1);
 		mtx_init(&epc->lock, "iwch_epc lock", NULL, MTX_DEF|MTX_DUPOK);
 		cv_init(&epc->waitq, "iwch_epc cv");
 	}
 	CTR2(KTR_IW_CXGB, "%s alloc ep %p", __FUNCTION__, epc);
 	return epc;
 }
 
 void __free_ep(struct iwch_ep_common *epc)
 {
 	CTR3(KTR_IW_CXGB, "%s ep %p state %s", __FUNCTION__, epc, states[state_read(epc)]);
 	KASSERT(!epc->so, ("%s warning ep->so %p \n", __FUNCTION__, epc->so));
 	KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __FUNCTION__, epc));
 	free(epc, M_DEVBUF);
 }
 
-static struct rtentry *
+static int
 find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
-    __be16 peer_port, u8 tos)
+    __be16 peer_port, u8 tos, struct nhop4_extended *pnh4)
 {
-        struct route iproute;
-        struct sockaddr_in *dst = (struct sockaddr_in *)&iproute.ro_dst;
- 
-        bzero(&iproute, sizeof iproute);
-	dst->sin_family = AF_INET;
-	dst->sin_len = sizeof *dst;
-        dst->sin_addr.s_addr = peer_ip;
- 
-        rtalloc(&iproute);
-	return iproute.ro_rt;
+	struct in_addr addr;
+
+	addr.s_addr = peer_ip;
+	return (fib4_lookup_nh_ext(RT_DEFAULT_FIB, addr, NHR_REF, 0, pnh4));
 }
 
 static void
 close_socket(struct iwch_ep_common *epc, int close)
 {
 	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]);
 	SOCK_LOCK(epc->so);
 	soupcall_clear(epc->so, SO_RCV);
 	SOCK_UNLOCK(epc->so);
 	if (close)
 		soclose(epc->so);
 	else
 		soshutdown(epc->so, SHUT_WR|SHUT_RD);
 	epc->so = NULL;
 }
 
 static void
 shutdown_socket(struct iwch_ep_common *epc)
 {
 	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]);
 	soshutdown(epc->so, SHUT_WR);
 }
 
 static void
 abort_socket(struct iwch_ep *ep)
 {
 	struct sockopt sopt;
 	int err;
 	struct linger l;
 
 	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
 	l.l_onoff = 1;
 	l.l_linger = 0;
 
 	/* linger_time of 0 forces RST to be sent */
 	sopt.sopt_dir = SOPT_SET;
 	sopt.sopt_level = SOL_SOCKET;
 	sopt.sopt_name = SO_LINGER;
 	sopt.sopt_val = (caddr_t)&l;
 	sopt.sopt_valsize = sizeof l;
 	sopt.sopt_td = NULL;
 	err = sosetopt(ep->com.so, &sopt);
 	if (err) 
 		printf("%s can't set linger to 0, no RST! err %d\n", __FUNCTION__, err);
 }
 
 static void
 send_mpa_req(struct iwch_ep *ep)
 {
 	int mpalen;
 	struct mpa_message *mpa;
 	struct mbuf *m;
 	int err;
 
 	CTR3(KTR_IW_CXGB, "%s ep %p pd_len %d", __FUNCTION__, ep, ep->plen);
 
 	mpalen = sizeof(*mpa) + ep->plen;
 	m = m_gethdr(mpalen, M_NOWAIT);
 	if (m == NULL) {
 		connect_reply_upcall(ep, -ENOMEM);
 		return;
 	}
 	mpa = mtod(m, struct mpa_message *);
 	m->m_len = mpalen;
 	m->m_pkthdr.len = mpalen;
 	memset(mpa, 0, sizeof(*mpa));
 	memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
 	mpa->flags = (crc_enabled ? MPA_CRC : 0) |
 		     (markers_enabled ? MPA_MARKERS : 0);
 	mpa->private_data_size = htons(ep->plen);
 	mpa->revision = mpa_rev;
 	if (ep->plen)
 		memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen);
 
 	err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread);
 	if (err) {
 		m_freem(m);
 		connect_reply_upcall(ep, -ENOMEM);
 		return;
 	}
 		
 	start_ep_timer(ep);
 	state_set(&ep->com, MPA_REQ_SENT);
 	return;
 }
 
 static int
 send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
 {
 	int mpalen;
 	struct mpa_message *mpa;
 	struct mbuf *m;
 	int err;
 
 	CTR3(KTR_IW_CXGB, "%s ep %p plen %d", __FUNCTION__, ep, plen);
 
 	mpalen = sizeof(*mpa) + plen;
 
 	m = m_gethdr(mpalen, M_NOWAIT);
 	if (m == NULL) {
 		printf("%s - cannot alloc mbuf!\n", __FUNCTION__);
 		return (-ENOMEM);
 	}
 	mpa = mtod(m, struct mpa_message *);
 	m->m_len = mpalen;
 	m->m_pkthdr.len = mpalen;
 	memset(mpa, 0, sizeof(*mpa));
 	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
 	mpa->flags = MPA_REJECT;
 	mpa->revision = mpa_rev;
 	mpa->private_data_size = htons(plen);
 	if (plen)
 		memcpy(mpa->private_data, pdata, plen);
 	err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread);
 	PANIC_IF(err);
 	return 0;
 }
 
 static int
 send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
 {
 	int mpalen;
 	struct mpa_message *mpa;
 	struct mbuf *m;
 
 	CTR4(KTR_IW_CXGB, "%s ep %p so %p plen %d", __FUNCTION__, ep, ep->com.so, plen);
 
 	mpalen = sizeof(*mpa) + plen;
 
 	m = m_gethdr(mpalen, M_NOWAIT);
 	if (m == NULL) {
 		printf("%s - cannot alloc mbuf!\n", __FUNCTION__);
 		return (-ENOMEM);
 	}
 	mpa = mtod(m, struct mpa_message *);
 	m->m_len = mpalen;
 	m->m_pkthdr.len = mpalen;
 	memset(mpa, 0, sizeof(*mpa));
 	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
 	mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
 		     (markers_enabled ? MPA_MARKERS : 0);
 	mpa->revision = mpa_rev;
 	mpa->private_data_size = htons(plen);
 	if (plen)
 		memcpy(mpa->private_data, pdata, plen);
 
 	state_set(&ep->com, MPA_REP_SENT);
 	return sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, 
 		ep->com.thread);
 }
 
 static void
 close_complete_upcall(struct iwch_ep *ep)
 {
 	struct iw_cm_event event;
 
 	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
 	memset(&event, 0, sizeof(event));
 	event.event = IW_CM_EVENT_CLOSE;
 	if (ep->com.cm_id) {
 		CTR3(KTR_IW_CXGB, "close complete delivered ep %p cm_id %p tid %d",
 		     ep, ep->com.cm_id, ep->hwtid);
 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
 		ep->com.cm_id->rem_ref(ep->com.cm_id);
 		ep->com.cm_id = NULL;
 		ep->com.qp = NULL;
 	}
 }
 
 static void
 abort_connection(struct iwch_ep *ep)
 {
 	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
 	state_set(&ep->com, ABORTING);
 	abort_socket(ep);
 	close_socket(&ep->com, 0);
 	close_complete_upcall(ep);
 	state_set(&ep->com, DEAD);
 	put_ep(&ep->com);
 }
 
 static void
 peer_close_upcall(struct iwch_ep *ep)
 {
 	struct iw_cm_event event;
 
 	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
 	memset(&event, 0, sizeof(event));
 	event.event = IW_CM_EVENT_DISCONNECT;
 	if (ep->com.cm_id) {
 		CTR3(KTR_IW_CXGB, "peer close delivered ep %p cm_id %p tid %d",
 		     ep, ep->com.cm_id, ep->hwtid);
 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
 	}
 }
 
 static void
 peer_abort_upcall(struct iwch_ep *ep)
 {
 	struct iw_cm_event event;
 
 	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
 	memset(&event, 0, sizeof(event));
 	event.event = IW_CM_EVENT_CLOSE;
 	event.status = ECONNRESET;
 	if (ep->com.cm_id) {
 		CTR3(KTR_IW_CXGB, "abort delivered ep %p cm_id %p tid %d", ep,
 		     ep->com.cm_id, ep->hwtid);
 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
 		ep->com.cm_id->rem_ref(ep->com.cm_id);
 		ep->com.cm_id = NULL;
 		ep->com.qp = NULL;
 	}
 }
 
 static void
 connect_reply_upcall(struct iwch_ep *ep, int status)
 {
 	struct iw_cm_event event;
 
 	CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s status %d", __FUNCTION__, ep, ep->com.so, states[ep->com.state], status);
 	memset(&event, 0, sizeof(event));
 	event.event = IW_CM_EVENT_CONNECT_REPLY;
 	event.status = status;
 	event.local_addr = ep->com.local_addr;
 	event.remote_addr = ep->com.remote_addr;
 
 	if ((status == 0) || (status == ECONNREFUSED)) {
 		event.private_data_len = ep->plen;
 		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
 	}
 	if (ep->com.cm_id) {
 		CTR4(KTR_IW_CXGB, "%s ep %p tid %d status %d", __FUNCTION__, ep,
 		     ep->hwtid, status);
 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
 	}
 	if (status < 0) {
 		ep->com.cm_id->rem_ref(ep->com.cm_id);
 		ep->com.cm_id = NULL;
 		ep->com.qp = NULL;
 	}
 }
 
 static void
 connect_request_upcall(struct iwch_ep *ep)
 {
 	struct iw_cm_event event;
 
 	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
 	memset(&event, 0, sizeof(event));
 	event.event = IW_CM_EVENT_CONNECT_REQUEST;
 	event.local_addr = ep->com.local_addr;
 	event.remote_addr = ep->com.remote_addr;
 	event.private_data_len = ep->plen;
 	event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
 	event.provider_data = ep;
 	event.so = ep->com.so;
 	if (state_read(&ep->parent_ep->com) != DEAD) {
 		get_ep(&ep->com);
 		ep->parent_ep->com.cm_id->event_handler(
 						ep->parent_ep->com.cm_id,
 						&event);
 	}
 	put_ep(&ep->parent_ep->com);
 }
 
 static void
 established_upcall(struct iwch_ep *ep)
 {
 	struct iw_cm_event event;
 
 	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
 	memset(&event, 0, sizeof(event));
 	event.event = IW_CM_EVENT_ESTABLISHED;
 	if (ep->com.cm_id) {
 		CTR3(KTR_IW_CXGB, "%s ep %p tid %d", __FUNCTION__, ep, ep->hwtid);
 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
 	}
 }
 
 static void
 process_mpa_reply(struct iwch_ep *ep)
 {
 	struct mpa_message *mpa;
 	u16 plen;
 	struct iwch_qp_attributes attrs;
 	enum iwch_qp_attr_mask mask;
 	int err;
 	struct mbuf *top, *m;
 	int flags = MSG_DONTWAIT;
 	struct uio uio;
 	int len;
 
 	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
 
 	/*
 	 * Stop mpa timer.  If it expired, then the state has
 	 * changed and we bail since ep_timeout already aborted
 	 * the connection.
 	 */
 	stop_ep_timer(ep);
 	if (state_read(&ep->com) != MPA_REQ_SENT)
 		return;
 
 	uio.uio_resid = len = 1000000;
 	uio.uio_td = ep->com.thread;
 	err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags);
 	if (err) {
 		if (err == EWOULDBLOCK) {
 			start_ep_timer(ep);
 			return;
 		}
 		err = -err;
 		goto err;
 	}
 
 	if (ep->com.so->so_rcv.sb_mb) {
 		printf("%s data after soreceive called! so %p sb_mb %p top %p\n", 
 			__FUNCTION__, ep->com.so, ep->com.so->so_rcv.sb_mb, top);
 	}
 		
 	m = top;
 	do {
 		/*
 		 * If we get more than the supported amount of private data
 		 * then we must fail this connection.
 		 */
 		if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) {
 			err = (-EINVAL);
 			goto err;
 		}
 
 		/*
 		 * copy the new data into our accumulation buffer.
 		 */
 		m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len]));
 		ep->mpa_pkt_len += m->m_len;
 		if (!m->m_next)
 			m = m->m_nextpkt;
 		else
 			m = m->m_next;
 	} while (m);
 
 	m_freem(top);
 
 	/*
 	 * if we don't even have the mpa message, then bail.
 	 */
 	if (ep->mpa_pkt_len < sizeof(*mpa))
 		return;
 	mpa = (struct mpa_message *)ep->mpa_pkt;
 
 	/* Validate MPA header. */
 	if (mpa->revision != mpa_rev) {
 		CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision);
 		err = EPROTO;
 		goto err;
 	}
 	if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
 		CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key);
 		err = EPROTO;
 		goto err;
 	}
 
 	plen = ntohs(mpa->private_data_size);
 
 	/*
 	 * Fail if there's too much private data.
 	 */
 	if (plen > MPA_MAX_PRIVATE_DATA) {
 		CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen);
 		err = EPROTO;
 		goto err;
 	}
 
 	/*
 	 * If plen does not account for pkt size
 	 */
 	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
 		CTR2(KTR_IW_CXGB, "%s pkt too big %d", __FUNCTION__, ep->mpa_pkt_len);
 		err = EPROTO;
 		goto err;
 	}
 
 	ep->plen = (u8) plen;
 
 	/*
 	 * If we don't have all the pdata yet, then bail.
 	 * We'll continue process when more data arrives.
 	 */
 	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
 		return;
 
 	if (mpa->flags & MPA_REJECT) {
 		err = ECONNREFUSED;
 		goto err;
 	}
 
 	/*
 	 * If we get here we have accumulated the entire mpa
 	 * start reply message including private data. And
 	 * the MPA header is valid.
 	 */
 	CTR1(KTR_IW_CXGB, "%s mpa rpl looks good!", __FUNCTION__);
 	state_set(&ep->com, FPDU_MODE);
 	ep->mpa_attr.initiator = 1;
 	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
 	ep->mpa_attr.recv_marker_enabled = markers_enabled;
 	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
 	ep->mpa_attr.version = mpa_rev;
 	if (set_tcpinfo(ep)) {
 		printf("%s set_tcpinfo error\n", __FUNCTION__);
 		goto err;
 	}
 	CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, "
 	     "xmit_marker_enabled=%d, version=%d", __FUNCTION__,
 	     ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
 	     ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
 
 	attrs.mpa_attr = ep->mpa_attr;
 	attrs.max_ird = ep->ird;
 	attrs.max_ord = ep->ord;
 	attrs.llp_stream_handle = ep;
 	attrs.next_state = IWCH_QP_STATE_RTS;
 
 	mask = IWCH_QP_ATTR_NEXT_STATE |
 	    IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR |
 	    IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD;
 
 	/* bind QP and TID with INIT_WR */
 	err = iwch_modify_qp(ep->com.qp->rhp,
 			     ep->com.qp, mask, &attrs, 1);
 	if (!err)
 		goto out;
 err:
 	abort_connection(ep);
 out:
 	connect_reply_upcall(ep, err);
 	return;
 }
 
 static void
 process_mpa_request(struct iwch_ep *ep)
 {
 	struct mpa_message *mpa;
 	u16 plen;
 	int flags = MSG_DONTWAIT;
 	struct mbuf *top, *m;
 	int err;
 	struct uio uio;
 	int len;
 
 	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
 
 	/*
 	 * Stop mpa timer.  If it expired, then the state has
 	 * changed and we bail since ep_timeout already aborted
 	 * the connection.
 	 */
 	stop_ep_timer(ep);
 	if (state_read(&ep->com) != MPA_REQ_WAIT)
 		return;
 
 	uio.uio_resid = len = 1000000;
 	uio.uio_td = ep->com.thread;
 	err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags);
 	if (err) {
 		if (err == EWOULDBLOCK) {
 			start_ep_timer(ep);
 			return;
 		}
 		err = -err;
 		goto err;
 	}
 
 	m = top;
 	do {
 
 		/*
 		 * If we get more than the supported amount of private data
 		 * then we must fail this connection.
 		 */
 		if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) {
 			CTR2(KTR_IW_CXGB, "%s mpa message too big %d", __FUNCTION__, 
 				ep->mpa_pkt_len + m->m_len);
 			goto err;
 		}
 
 
 		/*
 		 * Copy the new data into our accumulation buffer.
 		 */
 		m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len]));
 		ep->mpa_pkt_len += m->m_len;
 
 		if (!m->m_next)
 			m = m->m_nextpkt;
 		else
 			m = m->m_next;
 	} while (m);
 
 	m_freem(top);
 
 	/*
 	 * If we don't even have the mpa message, then bail.
 	 * We'll continue process when more data arrives.
 	 */
 	if (ep->mpa_pkt_len < sizeof(*mpa)) {
 		start_ep_timer(ep);
 		CTR2(KTR_IW_CXGB, "%s not enough header %d...waiting...", __FUNCTION__, 
 			ep->mpa_pkt_len);
 		return;
 	}
 	mpa = (struct mpa_message *) ep->mpa_pkt;
 
 	/*
 	 * Validate MPA Header.
 	 */
 	if (mpa->revision != mpa_rev) {
 		CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision);
 		goto err;
 	}
 
 	if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
 		CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key);
 		goto err;
 	}
 
 	plen = ntohs(mpa->private_data_size);
 
 	/*
 	 * Fail if there's too much private data.
 	 */
 	if (plen > MPA_MAX_PRIVATE_DATA) {
 		CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen);
 		goto err;
 	}
 
 	/*
 	 * If plen does not account for pkt size
 	 */
 	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
 		CTR2(KTR_IW_CXGB, "%s more data after private data %d", __FUNCTION__, 
 			ep->mpa_pkt_len);
 		goto err;
 	}
 	ep->plen = (u8) plen;
 
 	/*
 	 * If we don't have all the pdata yet, then bail.
 	 */
 	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) {
 		start_ep_timer(ep);
 		CTR2(KTR_IW_CXGB, "%s more mpa msg to come %d", __FUNCTION__, 
 			ep->mpa_pkt_len);
 		return;
 	}
 
 	/*
 	 * If we get here we have accumulated the entire mpa
 	 * start reply message including private data.
 	 */
 	ep->mpa_attr.initiator = 0;
 	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
 	ep->mpa_attr.recv_marker_enabled = markers_enabled;
 	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
 	ep->mpa_attr.version = mpa_rev;
 	if (set_tcpinfo(ep)) {
 		printf("%s set_tcpinfo error\n", __FUNCTION__);
 		goto err;
 	}
 	CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, "
 	     "xmit_marker_enabled=%d, version=%d", __FUNCTION__,
 	     ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
 	     ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
 
 	state_set(&ep->com, MPA_REQ_RCVD);
 
 	/* drive upcall */
 	connect_request_upcall(ep);
 	return;
 err:
 	abort_connection(ep);
 	return;
 }
 
 static void
 process_peer_close(struct iwch_ep *ep)
 {
 	struct iwch_qp_attributes attrs;
 	int disconnect = 1;
 	int release = 0;
 
 	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
 
 	mtx_lock(&ep->com.lock);
 	switch (ep->com.state) {
 	case MPA_REQ_WAIT:
 		__state_set(&ep->com, CLOSING);
 		break;
 	case MPA_REQ_SENT:
 		__state_set(&ep->com, CLOSING);
 		connect_reply_upcall(ep, -ECONNRESET);
 		break;
 	case MPA_REQ_RCVD:
 
 		/*
 		 * We're gonna mark this puppy DEAD, but keep
 		 * the reference on it until the ULP accepts or
 		 * rejects the CR.
 		 */
 		__state_set(&ep->com, CLOSING);
 		break;
 	case MPA_REP_SENT:
 		__state_set(&ep->com, CLOSING);
 		break;
 	case FPDU_MODE:
 		start_ep_timer(ep);
 		__state_set(&ep->com, CLOSING);
 		attrs.next_state = IWCH_QP_STATE_CLOSING;
 		iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
 			       IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
 		peer_close_upcall(ep);
 		break;
 	case ABORTING:
 		disconnect = 0;
 		break;
 	case CLOSING:
 		__state_set(&ep->com, MORIBUND);
 		disconnect = 0;
 		break;
 	case MORIBUND:
 		stop_ep_timer(ep);
 		if (ep->com.cm_id && ep->com.qp) {
 			attrs.next_state = IWCH_QP_STATE_IDLE;
 			iwch_modify_qp(ep->com.qp->rhp, ep->com.qp,
 				       IWCH_QP_ATTR_NEXT_STATE, &attrs, 1);
 		}
 		close_socket(&ep->com, 0);
 		close_complete_upcall(ep);
 		__state_set(&ep->com, DEAD);
 		release = 1;
 		disconnect = 0;
 		break;
 	case DEAD:
 		disconnect = 0;
 		break;
 	default:
 		PANIC_IF(1);
 	}
 	mtx_unlock(&ep->com.lock);
 	if (disconnect)
 		iwch_ep_disconnect(ep, 0, M_NOWAIT);
 	if (release)
 		put_ep(&ep->com);
 	return;
 }
 
 static void
 process_conn_error(struct iwch_ep *ep)
 {
 	struct iwch_qp_attributes attrs;
 	int ret;
 
 	mtx_lock(&ep->com.lock);
 	CTR3(KTR_IW_CXGB, "%s ep %p state %u", __func__, ep, ep->com.state);
 	switch (ep->com.state) {
 	case MPA_REQ_WAIT:
 		stop_ep_timer(ep);
 		break;
 	case MPA_REQ_SENT:
 		stop_ep_timer(ep);
 		connect_reply_upcall(ep, -ECONNRESET);
 		break;
 	case MPA_REP_SENT:
 		ep->com.rpl_err = ECONNRESET;
 		CTR1(KTR_IW_CXGB, "waking up ep %p", ep);
 		break;
 	case MPA_REQ_RCVD:
 
 		/*
 		 * We're gonna mark this puppy DEAD, but keep
 		 * the reference on it until the ULP accepts or
 		 * rejects the CR.
 		 */
 		break;
 	case MORIBUND:
 	case CLOSING:
 		stop_ep_timer(ep);
 		/*FALLTHROUGH*/
 	case FPDU_MODE:
 		if (ep->com.cm_id && ep->com.qp) {
 			attrs.next_state = IWCH_QP_STATE_ERROR;
 			ret = iwch_modify_qp(ep->com.qp->rhp,
 				     ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
 				     &attrs, 1);
 			if (ret)
 				log(LOG_ERR,
 				       "%s - qp <- error failed!\n",
 				       __FUNCTION__);
 		}
 		peer_abort_upcall(ep);
 		break;
 	case ABORTING:
 		break;
 	case DEAD:
 		mtx_unlock(&ep->com.lock);
 		CTR2(KTR_IW_CXGB, "%s so_error %d IN DEAD STATE!!!!", __FUNCTION__, 
 			ep->com.so->so_error);
 		return;
 	default:
 		PANIC_IF(1);
 		break;
 	}
 
 	if (ep->com.state != ABORTING) {
 		close_socket(&ep->com, 0);
 		__state_set(&ep->com, DEAD);
 		put_ep(&ep->com);
 	}
 	mtx_unlock(&ep->com.lock);
 	return;
 }
 
 static void
 process_close_complete(struct iwch_ep *ep)
 {
 	struct iwch_qp_attributes attrs;
 	int release = 0;
 
 	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
 	PANIC_IF(!ep);
 
 	/* The cm_id may be null if we failed to connect */
 	mtx_lock(&ep->com.lock);
 	switch (ep->com.state) {
 	case CLOSING:
 		__state_set(&ep->com, MORIBUND);
 		break;
 	case MORIBUND:
 		stop_ep_timer(ep);
 		if ((ep->com.cm_id) && (ep->com.qp)) {
 			attrs.next_state = IWCH_QP_STATE_IDLE;
 			iwch_modify_qp(ep->com.qp->rhp,
 					     ep->com.qp,
 					     IWCH_QP_ATTR_NEXT_STATE,
 					     &attrs, 1);
 		}
 		if (ep->parent_ep)
 			close_socket(&ep->com, 1);
 		else
 			close_socket(&ep->com, 0);
 		close_complete_upcall(ep);
 		__state_set(&ep->com, DEAD);
 		release = 1;
 		break;
 	case ABORTING:
 		break;
 	case DEAD:
 	default:
 		PANIC_IF(1);
 		break;
 	}
 	mtx_unlock(&ep->com.lock);
 	if (release)
 		put_ep(&ep->com);
 	return;
 }
 
 /*
  * T3A does 3 things when a TERM is received:
  * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet
  * 2) generate an async event on the QP with the TERMINATE opcode
  * 3) post a TERMINATE opcde cqe into the associated CQ.
  *
  * For (1), we save the message in the qp for later consumer consumption.
  * For (2), we move the QP into TERMINATE, post a QP event and disconnect.
  * For (3), we toss the CQE in cxio_poll_cq().
  *
  * terminate() handles case (1)...
  */
 static int
 terminate(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
 	struct adapter *sc = qs->adap;
 	struct tom_data *td = sc->tom_softc;
 	uint32_t hash = *((uint32_t *)r + 1);
 	unsigned int tid = ntohl(hash) >> 8 & 0xfffff;
 	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
 	struct socket *so = toep->tp_inp->inp_socket;
 	struct iwch_ep *ep = so->so_rcv.sb_upcallarg;
 
 	if (state_read(&ep->com) != FPDU_MODE)
 		goto done;
 
 	m_adj(m, sizeof(struct cpl_rdma_terminate));
 
 	CTR4(KTR_IW_CXGB, "%s: tid %u, ep %p, saved %d bytes",
 	    __func__, tid, ep, m->m_len);
 
 	m_copydata(m, 0, m->m_len, ep->com.qp->attr.terminate_buffer);
 	ep->com.qp->attr.terminate_msg_len = m->m_len;
 	ep->com.qp->attr.is_terminate_local = 0;
 
 done:
 	m_freem(m);
 	return (0);
 }
 
 static int
 ec_status(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
 	struct adapter *sc = qs->adap;
 	struct tom_data *td = sc->tom_softc;
 	struct cpl_rdma_ec_status *rep = mtod(m, void *);
 	unsigned int tid = GET_TID(rep);
 	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
 	struct socket *so = toep->tp_inp->inp_socket;
 	struct iwch_ep *ep = so->so_rcv.sb_upcallarg;
 
 	if (rep->status) {
 		struct iwch_qp_attributes attrs;
 
 		CTR1(KTR_IW_CXGB, "%s BAD CLOSE - Aborting", __FUNCTION__);
 		stop_ep_timer(ep);
 		attrs.next_state = IWCH_QP_STATE_ERROR;
 		iwch_modify_qp(ep->com.qp->rhp,
 			     ep->com.qp,
 			     IWCH_QP_ATTR_NEXT_STATE,
 			     &attrs, 1);
 		abort_connection(ep);
 	}
 
 	m_freem(m);
 	return (0);
 }
 
 static void
 ep_timeout(void *arg)
 {
 	struct iwch_ep *ep = (struct iwch_ep *)arg;
 	struct iwch_qp_attributes attrs;
 	int err = 0;
 	int abort = 1;
 
 	mtx_lock(&ep->com.lock);
 	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
 	switch (ep->com.state) {
 	case MPA_REQ_SENT:
 		__state_set(&ep->com, ABORTING);
 		connect_reply_upcall(ep, -ETIMEDOUT);
 		break;
 	case MPA_REQ_WAIT:
 		__state_set(&ep->com, ABORTING);
 		break;
 	case CLOSING:
 	case MORIBUND:
 		if (ep->com.cm_id && ep->com.qp)
 			err = 1;
 		__state_set(&ep->com, ABORTING);
 		break;
 	default:
 		CTR3(KTR_IW_CXGB, "%s unexpected state ep %p state %u\n",
 			__func__, ep, ep->com.state);
 		abort = 0;
 	}
 	mtx_unlock(&ep->com.lock);
 	if (err){
 		attrs.next_state = IWCH_QP_STATE_ERROR;
 		iwch_modify_qp(ep->com.qp->rhp,
 			     ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
 			     &attrs, 1);
 	}
 	if (abort)
 		abort_connection(ep);
 	put_ep(&ep->com);
 }
 
 int
 iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
 {
 	int err;
 	struct iwch_ep *ep = to_ep(cm_id);
 	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
 
 	if (state_read(&ep->com) == DEAD) {
 		put_ep(&ep->com);
 		return (-ECONNRESET);
 	}
 	PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD);
 	if (mpa_rev == 0) {
 		abort_connection(ep);
 	} else {
 		err = send_mpa_reject(ep, pdata, pdata_len);
 		err = soshutdown(ep->com.so, 3);
 	}
 	put_ep(&ep->com);
 	return 0;
 }
 
 int
 iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 {
 	int err;
 	struct iwch_qp_attributes attrs;
 	enum iwch_qp_attr_mask mask;
 	struct iwch_ep *ep = to_ep(cm_id);
 	struct iwch_dev *h = to_iwch_dev(cm_id->device);
 	struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
 
 	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
 	if (state_read(&ep->com) == DEAD) {
 		err = -ECONNRESET;
 		goto err;
 	}
 
 	PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD);
 	PANIC_IF(!qp);
 
 	if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) ||
 	    (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) {
 		abort_connection(ep);
 		err = -EINVAL;
 		goto err;
 	}
 
 	cm_id->add_ref(cm_id);
 	ep->com.cm_id = cm_id;
 	ep->com.qp = qp;
 
 	ep->com.rpl_err = 0;
 	ep->com.rpl_done = 0;
 	ep->ird = conn_param->ird;
 	ep->ord = conn_param->ord;
 	CTR3(KTR_IW_CXGB, "%s ird %d ord %d", __FUNCTION__, ep->ird, ep->ord);
 
 	/* bind QP to EP and move to RTS */
 	attrs.mpa_attr = ep->mpa_attr;
 	attrs.max_ird = ep->ird;
 	attrs.max_ord = ep->ord;
 	attrs.llp_stream_handle = ep;
 	attrs.next_state = IWCH_QP_STATE_RTS;
 
 	/* bind QP and TID with INIT_WR */
 	mask = IWCH_QP_ATTR_NEXT_STATE |
 			     IWCH_QP_ATTR_LLP_STREAM_HANDLE |
 			     IWCH_QP_ATTR_MPA_ATTR |
 			     IWCH_QP_ATTR_MAX_IRD |
 			     IWCH_QP_ATTR_MAX_ORD;
 
 	err = iwch_modify_qp(ep->com.qp->rhp,
 			     ep->com.qp, mask, &attrs, 1);
 
 	if (err) 
 		goto err1;
 
 	err = send_mpa_reply(ep, conn_param->private_data,
  			     conn_param->private_data_len);
 	if (err)
 		goto err1;
 	state_set(&ep->com, FPDU_MODE);
 	established_upcall(ep);
 	put_ep(&ep->com);
 	return 0;
 err1:
 	ep->com.cm_id = NULL;
 	ep->com.qp = NULL;
 	cm_id->rem_ref(cm_id);
 err:
 	put_ep(&ep->com);
 	return err;
 }
 
 static int init_sock(struct iwch_ep_common *epc)
 {
 	int err;
 	struct sockopt sopt;
 	int on=1;
 
 	SOCK_LOCK(epc->so);
 	soupcall_set(epc->so, SO_RCV, iwch_so_upcall, epc);
 	epc->so->so_state |= SS_NBIO;
 	SOCK_UNLOCK(epc->so);
 	sopt.sopt_dir = SOPT_SET;
 	sopt.sopt_level = IPPROTO_TCP;
 	sopt.sopt_name = TCP_NODELAY;
 	sopt.sopt_val = (caddr_t)&on;
 	sopt.sopt_valsize = sizeof on;
 	sopt.sopt_td = NULL;
 	err = sosetopt(epc->so, &sopt);
 	if (err) 
 		printf("%s can't set TCP_NODELAY err %d\n", __FUNCTION__, err);
 
 	return 0;
 }
 
 static int 
 is_loopback_dst(struct iw_cm_id *cm_id)
 {
 	uint16_t port = cm_id->remote_addr.sin_port;
 	int ifa_present;
 
 	cm_id->remote_addr.sin_port = 0;
 	ifa_present = ifa_ifwithaddr_check(
 	    (struct sockaddr *)&cm_id->remote_addr);
 	cm_id->remote_addr.sin_port = port;
 	return (ifa_present);
 }
 
 int
 iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 {
 	int err = 0;
 	struct iwch_dev *h = to_iwch_dev(cm_id->device);
 	struct iwch_ep *ep;
-	struct rtentry *rt;
+	struct nhop4_extended nh4;
 	struct toedev *tdev;
 	
 	if (is_loopback_dst(cm_id)) {
 		err = -ENOSYS;
 		goto out;
 	}
 
 	ep = alloc_ep(sizeof(*ep), M_NOWAIT);
 	if (!ep) {
 		printf("%s - cannot alloc ep.\n", __FUNCTION__);
 		err = (-ENOMEM);
 		goto out;
 	}
 	callout_init(&ep->timer, 1);
 	ep->plen = conn_param->private_data_len;
 	if (ep->plen)
 		memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
 		       conn_param->private_data, ep->plen);
 	ep->ird = conn_param->ird;
 	ep->ord = conn_param->ord;
 
 	cm_id->add_ref(cm_id);
 	ep->com.cm_id = cm_id;
 	ep->com.qp = get_qhp(h, conn_param->qpn);
 	ep->com.thread = curthread;
 	PANIC_IF(!ep->com.qp);
 	CTR4(KTR_IW_CXGB, "%s qpn 0x%x qp %p cm_id %p", __FUNCTION__, conn_param->qpn,
 	     ep->com.qp, cm_id);
 
 	ep->com.so = cm_id->so;
 	err = init_sock(&ep->com);
 	if (err)
 		goto fail2;
 
 	/* find a route */
-	rt = find_route(cm_id->local_addr.sin_addr.s_addr,
+	err = find_route(cm_id->local_addr.sin_addr.s_addr,
 			cm_id->remote_addr.sin_addr.s_addr,
 			cm_id->local_addr.sin_port,
-			cm_id->remote_addr.sin_port, IPTOS_LOWDELAY);
-	if (!rt) {
+			cm_id->remote_addr.sin_port, IPTOS_LOWDELAY, &nh4);
+	if (err) {
 		printf("%s - cannot find route.\n", __FUNCTION__);
 		err = EHOSTUNREACH;
 		goto fail2;
 	}
 
-	if (!(rt->rt_ifp->if_flags & IFCAP_TOE)) {
+	if (!(nh4.nh_ifp->if_flags & IFCAP_TOE)) {
 		printf("%s - interface not TOE capable.\n", __FUNCTION__);
-		RTFREE(rt);
+		fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4);
 		goto fail2;
 	}
-	tdev = TOEDEV(rt->rt_ifp);
+	tdev = TOEDEV(nh4.nh_ifp);
 	if (tdev == NULL) {
 		printf("%s - No toedev for interface.\n", __FUNCTION__);
-		RTFREE(rt);
+		fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4);
 		goto fail2;
 	}
-	RTFREE(rt);
+	fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4);
 
 	state_set(&ep->com, CONNECTING);
 	ep->com.local_addr = cm_id->local_addr;
 	ep->com.remote_addr = cm_id->remote_addr;
 	err = soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr, 
 		ep->com.thread);
 	if (!err)
 		goto out;
 fail2:
 	put_ep(&ep->com);
 out:
 	return err;
 }
 
 int
 iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
 {
 	int err = 0;
 	struct iwch_listen_ep *ep;
 
 	ep = alloc_ep(sizeof(*ep), M_NOWAIT);
 	if (!ep) {
 		printf("%s - cannot alloc ep.\n", __FUNCTION__);
 		err = ENOMEM;
 		goto out;
 	}
 	CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
 	cm_id->add_ref(cm_id);
 	ep->com.cm_id = cm_id;
 	ep->backlog = backlog;
 	ep->com.local_addr = cm_id->local_addr;
 	ep->com.thread = curthread;
 	state_set(&ep->com, LISTEN);
 
 	ep->com.so = cm_id->so;
 	err = init_sock(&ep->com);
 	if (err)
 		goto fail;
 
 	err = solisten(ep->com.so, ep->backlog, ep->com.thread);
 	if (!err) {
 		cm_id->provider_data = ep;
 		goto out;
 	}
 	close_socket(&ep->com, 0);
 fail:
 	cm_id->rem_ref(cm_id);
 	put_ep(&ep->com);
 out:
 	return err;
 }
 
 int
 iwch_destroy_listen(struct iw_cm_id *cm_id)
 {
 	struct iwch_listen_ep *ep = to_listen_ep(cm_id);
 
 	CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
 
 	state_set(&ep->com, DEAD);
 	close_socket(&ep->com, 0);
 	cm_id->rem_ref(cm_id);
 	put_ep(&ep->com);
 	return 0;
 }
 
 int
 iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, int flags)
 {
 	int close = 0;
 
 	mtx_lock(&ep->com.lock);
 
 	PANIC_IF(!ep);
 	PANIC_IF(!ep->com.so);
 
 	CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s, abrupt %d", __FUNCTION__, ep,
 	     ep->com.so, states[ep->com.state], abrupt);
 
 	switch (ep->com.state) {
 	case MPA_REQ_WAIT:
 	case MPA_REQ_SENT:
 	case MPA_REQ_RCVD:
 	case MPA_REP_SENT:
 	case FPDU_MODE:
 		close = 1;
 		if (abrupt)
 			ep->com.state = ABORTING;
 		else {
 			ep->com.state = CLOSING;
 			start_ep_timer(ep);
 		}
 		break;
 	case CLOSING:
 		close = 1;
 		if (abrupt) {
 			stop_ep_timer(ep);
 			ep->com.state = ABORTING;
 		} else
 			ep->com.state = MORIBUND;
 		break;
 	case MORIBUND:
 	case ABORTING:
 	case DEAD:
 		CTR3(KTR_IW_CXGB, "%s ignoring disconnect ep %p state %u\n",
 			__func__, ep, ep->com.state);
 		break;
 	default:
 		panic("unknown state: %d\n", ep->com.state);
 		break;
 	}
 
 	mtx_unlock(&ep->com.lock);
 	if (close) {
 		if (abrupt)
 			abort_connection(ep);
 		else {
 			if (!ep->parent_ep)
 				__state_set(&ep->com, MORIBUND);
 			shutdown_socket(&ep->com);
 		}
 	}
 	return 0;
 }
 
 static void
 process_data(struct iwch_ep *ep)
 {
 	struct sockaddr_in *local, *remote;
 
 	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
 
 	switch (state_read(&ep->com)) {
 	case MPA_REQ_SENT:
 		process_mpa_reply(ep);
 		break;
 	case MPA_REQ_WAIT:
 
 		/*
 		 * XXX
 		 * Set local and remote addrs here because when we
 		 * dequeue the newly accepted socket, they aren't set
 		 * yet in the pcb!
 		 */
 		in_getsockaddr(ep->com.so, (struct sockaddr **)&local);
 		in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote);
 		CTR3(KTR_IW_CXGB, "%s local %s remote %s", __FUNCTION__, 
 			inet_ntoa(local->sin_addr),
 			inet_ntoa(remote->sin_addr));
 		ep->com.local_addr = *local;
 		ep->com.remote_addr = *remote;
 		free(local, M_SONAME);
 		free(remote, M_SONAME);
 		process_mpa_request(ep);
 		break;
 	default:
 		if (sbavail(&ep->com.so->so_rcv)) 
 			printf("%s Unexpected streaming data."
 			       " ep %p state %d so %p so_state %x so_rcv.sb_cc %u so_rcv.sb_mb %p\n",
 			       __FUNCTION__, ep, state_read(&ep->com), ep->com.so, ep->com.so->so_state,
 			       sbavail(&ep->com.so->so_rcv), ep->com.so->so_rcv.sb_mb);
 		break;
 	}
 	return;
 }
 
 static void
 process_connected(struct iwch_ep *ep)
 {
 	CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]);
 	if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error) {
 		send_mpa_req(ep);
 	} else {
 		connect_reply_upcall(ep, -ep->com.so->so_error);
 		close_socket(&ep->com, 0);
 		state_set(&ep->com, DEAD);
 		put_ep(&ep->com);
 	}
 }
 
 static struct socket *
 dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct iwch_ep *child_ep)
 {
 	struct socket *so;
 
 	ACCEPT_LOCK();
 	so = TAILQ_FIRST(&head->so_comp);
 	if (!so) {
 		ACCEPT_UNLOCK();
 		return NULL;
 	}
 	TAILQ_REMOVE(&head->so_comp, so, so_list);
 	head->so_qlen--;
 	SOCK_LOCK(so);
 	so->so_qstate &= ~SQ_COMP;
 	so->so_head = NULL;
 	soref(so);
 	soupcall_set(so, SO_RCV, iwch_so_upcall, child_ep);
 	so->so_state |= SS_NBIO;
 	PANIC_IF(!(so->so_state & SS_ISCONNECTED));
 	PANIC_IF(so->so_error);
 	SOCK_UNLOCK(so);
 	ACCEPT_UNLOCK();
 	soaccept(so, (struct sockaddr **)remote);
 	return so;
 }
 
 static void
 process_newconn(struct iwch_ep *parent_ep)
 {
 	struct socket *child_so;
 	struct iwch_ep *child_ep;
 	struct sockaddr_in *remote;
 
 	CTR3(KTR_IW_CXGB, "%s parent ep %p so %p", __FUNCTION__, parent_ep, parent_ep->com.so);
 	child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT);
 	if (!child_ep) {
 		log(LOG_ERR, "%s - failed to allocate ep entry!\n",
 		       __FUNCTION__);
 		return;
 	}
 	child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep);
 	if (!child_so) {
 		log(LOG_ERR, "%s - failed to dequeue child socket!\n",
 		       __FUNCTION__);
 		__free_ep(&child_ep->com);
 		return;
 	}
 	CTR3(KTR_IW_CXGB, "%s remote addr %s port %d", __FUNCTION__, 
 		inet_ntoa(remote->sin_addr), ntohs(remote->sin_port));
 	child_ep->com.tdev = parent_ep->com.tdev;
 	child_ep->com.local_addr.sin_family = parent_ep->com.local_addr.sin_family;
 	child_ep->com.local_addr.sin_port = parent_ep->com.local_addr.sin_port;
 	child_ep->com.local_addr.sin_addr.s_addr = parent_ep->com.local_addr.sin_addr.s_addr;
 	child_ep->com.local_addr.sin_len = parent_ep->com.local_addr.sin_len;
 	child_ep->com.remote_addr.sin_family = remote->sin_family;
 	child_ep->com.remote_addr.sin_port = remote->sin_port;
 	child_ep->com.remote_addr.sin_addr.s_addr = remote->sin_addr.s_addr;
 	child_ep->com.remote_addr.sin_len = remote->sin_len;
 	child_ep->com.so = child_so;
 	child_ep->com.cm_id = NULL;
 	child_ep->com.thread = parent_ep->com.thread;
 	child_ep->parent_ep = parent_ep;
 
 	free(remote, M_SONAME);
 	get_ep(&parent_ep->com);
 	child_ep->parent_ep = parent_ep;
 	callout_init(&child_ep->timer, 1);
 	state_set(&child_ep->com, MPA_REQ_WAIT);
 	start_ep_timer(child_ep);
 
 	/* maybe the request has already been queued up on the socket... */
 	process_mpa_request(child_ep);
 }
 
 static int
 iwch_so_upcall(struct socket *so, void *arg, int waitflag)
 {
 	struct iwch_ep *ep = arg;
 
 	CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]);
 	mtx_lock(&req_lock);
 	if (ep && ep->com.so && !ep->com.entry.tqe_prev) {
 		get_ep(&ep->com);
 		TAILQ_INSERT_TAIL(&req_list, &ep->com, entry);
 		taskqueue_enqueue(iw_cxgb_taskq, &iw_cxgb_task);
 	}
 	mtx_unlock(&req_lock);
 	return (SU_OK);
 }
 
 static void
 process_socket_event(struct iwch_ep *ep)
 {
 	int state = state_read(&ep->com);
 	struct socket *so = ep->com.so;
 	
 	CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]);
 	if (state == CONNECTING) {
 		process_connected(ep);
 		return;
 	}
 
 	if (state == LISTEN) {
 		process_newconn(ep);
 		return;
 	}
 
 	/* connection error */
 	if (so->so_error) {
 		process_conn_error(ep);
 		return;
 	}
 
 	/* peer close */
 	if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state < CLOSING) {
 		process_peer_close(ep);
 		return;
 	}
 
 	/* close complete */
 	if (so->so_state & (SS_ISDISCONNECTED)) {
 		process_close_complete(ep);
 		return;
 	}
 	
 	/* rx data */
 	process_data(ep);
 	return;
 }
 
 static void
 process_req(void *ctx, int pending)
 {
 	struct iwch_ep_common *epc;
 
 	CTR1(KTR_IW_CXGB, "%s enter", __FUNCTION__);
 	mtx_lock(&req_lock);
 	while (!TAILQ_EMPTY(&req_list)) {
 		epc = TAILQ_FIRST(&req_list);
 		TAILQ_REMOVE(&req_list, epc, entry);
 		epc->entry.tqe_prev = NULL;
 		mtx_unlock(&req_lock);
 		if (epc->so)
 			process_socket_event((struct iwch_ep *)epc);
 		put_ep(epc);
 		mtx_lock(&req_lock);
 	}
 	mtx_unlock(&req_lock);
 }
 
 int
 iwch_cm_init(void)
 {
 	TAILQ_INIT(&req_list);
 	mtx_init(&req_lock, "iw_cxgb req_list lock", NULL, MTX_DEF);
 	iw_cxgb_taskq = taskqueue_create("iw_cxgb_taskq", M_NOWAIT,
 		taskqueue_thread_enqueue, &iw_cxgb_taskq);
         if (iw_cxgb_taskq == NULL) {
                 printf("failed to allocate iw_cxgb taskqueue\n");
                 return (ENOMEM);
         }
         taskqueue_start_threads(&iw_cxgb_taskq, 1, PI_NET, "iw_cxgb taskq");
         TASK_INIT(&iw_cxgb_task, 0, process_req, NULL);
 	return (0);
 }
 
 void
 iwch_cm_term(void)
 {
 
 	taskqueue_drain(iw_cxgb_taskq, &iw_cxgb_task);
 	taskqueue_free(iw_cxgb_taskq);
 }
 
 void
 iwch_cm_init_cpl(struct adapter *sc)
 {
 
 	t3_register_cpl_handler(sc, CPL_RDMA_TERMINATE, terminate);
 	t3_register_cpl_handler(sc, CPL_RDMA_EC_STATUS, ec_status);
 }
 
 void
 iwch_cm_term_cpl(struct adapter *sc)
 {
 
 	t3_register_cpl_handler(sc, CPL_RDMA_TERMINATE, NULL);
 	t3_register_cpl_handler(sc, CPL_RDMA_EC_STATUS, NULL);
 }
 #endif
Index: projects/release-pkg/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c
===================================================================
--- projects/release-pkg/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c	(revision 293335)
+++ projects/release-pkg/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c	(revision 293336)
@@ -1,1813 +1,1812 @@
 /*-
  * Copyright (c) 2012 Chelsio Communications, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 
 #ifdef TCP_OFFLOAD
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/fcntl.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/sockstate.h>
 #include <sys/sockopt.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockbuf.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/protosw.h>
 #include <sys/priv.h>
 #include <sys/sglist.h>
 #include <sys/taskqueue.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/ethernet.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 
 #include <netinet/ip.h>
 #include <netinet/tcp_var.h>
 #define TCPSTATES
 #include <netinet/tcp_fsm.h>
 #include <netinet/toecore.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <net/route.h>
 
 #include "cxgb_include.h"
 #include "ulp/tom/cxgb_l2t.h"
 #include "ulp/tom/cxgb_tom.h"
 #include "ulp/tom/cxgb_toepcb.h"
 
 VNET_DECLARE(int, tcp_do_autosndbuf);
 #define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf)
 VNET_DECLARE(int, tcp_autosndbuf_inc);
 #define V_tcp_autosndbuf_inc VNET(tcp_autosndbuf_inc)
 VNET_DECLARE(int, tcp_autosndbuf_max);
 #define V_tcp_autosndbuf_max VNET(tcp_autosndbuf_max)
 VNET_DECLARE(int, tcp_do_autorcvbuf);
 #define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf)
 VNET_DECLARE(int, tcp_autorcvbuf_inc);
 #define V_tcp_autorcvbuf_inc VNET(tcp_autorcvbuf_inc)
 VNET_DECLARE(int, tcp_autorcvbuf_max);
 #define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max)
 extern int always_keepalive;
 
 /*
  * For ULP connections HW may add headers, e.g., for digests, that aren't part
  * of the messages sent by the host but that are part of the TCP payload and
  * therefore consume TCP sequence space.  Tx connection parameters that
  * operate in TCP sequence space are affected by the HW additions and need to
  * compensate for them to accurately track TCP sequence numbers. This array
  * contains the compensating extra lengths for ULP packets.  It is indexed by
  * a packet's ULP submode.
  */
 const unsigned int t3_ulp_extra_len[] = {0, 4, 4, 8};
 
 /*
  * Max receive window supported by HW in bytes.  Only a small part of it can
  * be set through option0, the rest needs to be set through RX_DATA_ACK.
  */
 #define MAX_RCV_WND ((1U << 27) - 1)
 
 /*
  * Min receive window.  We want it to be large enough to accommodate receive
  * coalescing, handle jumbo frames, and not trigger sender SWS avoidance.
  */
 #define MIN_RCV_WND (24 * 1024U)
 #define INP_TOS(inp) ((inp_ip_tos_get(inp) >> 2) & M_TOS)
 
 static void t3_release_offload_resources(struct toepcb *);
 static void send_reset(struct toepcb *toep);
 
 /*
  * Called after the last CPL for the toepcb has been received.
  *
  * The inp must be wlocked on entry and is unlocked (or maybe destroyed) by the
  * time this function exits.
  */
 static int
 toepcb_release(struct toepcb *toep)
 {
 	struct inpcb *inp = toep->tp_inp;
 	struct toedev *tod = toep->tp_tod;
 	struct tom_data *td = t3_tomdata(tod);
 	int rc;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(!(toep->tp_flags & TP_CPL_DONE),
 	    ("%s: double release?", __func__));
 
 	CTR2(KTR_CXGB, "%s: tid %d", __func__, toep->tp_tid);
 
 	toep->tp_flags |= TP_CPL_DONE;
 	toep->tp_inp = NULL;
 
 	mtx_lock(&td->toep_list_lock);
 	TAILQ_REMOVE(&td->toep_list, toep, link);
 	mtx_unlock(&td->toep_list_lock);
 
 	if (!(toep->tp_flags & TP_ATTACHED))
 		t3_release_offload_resources(toep);
 
 	rc = in_pcbrele_wlocked(inp);
 	if (!rc)
 		INP_WUNLOCK(inp);
 	return (rc);
 }
 
 /*
  * One sided detach.  The tcpcb is going away and we need to unhook the toepcb
  * hanging off it.  If the TOE driver is also done with the toepcb we'll release
  * all offload resources.
  */
 static void
 toepcb_detach(struct inpcb *inp)
 {
 	struct toepcb *toep;
 	struct tcpcb *tp;
 
 	KASSERT(inp, ("%s: inp is NULL", __func__));
 	INP_WLOCK_ASSERT(inp);
 
 	tp = intotcpcb(inp);
 	toep = tp->t_toe;
 
 	KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
 	KASSERT(toep->tp_flags & TP_ATTACHED, ("%s: not attached", __func__));
 
 	CTR6(KTR_CXGB, "%s: %s %u, toep %p, inp %p, tp %p", __func__,
 	    tp->t_state == TCPS_SYN_SENT ? "atid" : "tid", toep->tp_tid,
 	    toep, inp, tp);
 
 	tp->t_toe = NULL;
 	tp->t_flags &= ~TF_TOE;
 	toep->tp_flags &= ~TP_ATTACHED;
 
 	if (toep->tp_flags & TP_CPL_DONE)
 		t3_release_offload_resources(toep);
 }
 
 void
 t3_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp)
 {
 
 	toepcb_detach(tp->t_inpcb);
 }
 
 static int
 alloc_atid(struct tid_info *t, void *ctx)
 {
 	int atid = -1;
 
 	mtx_lock(&t->atid_lock);
 	if (t->afree) {
 		union active_open_entry *p = t->afree;
 
 		atid = (p - t->atid_tab) + t->atid_base;
 		t->afree = p->next;
 		p->ctx = ctx;
 		t->atids_in_use++;
 	}
 	mtx_unlock(&t->atid_lock);
 
 	return (atid);
 }
 
 static void
 free_atid(struct tid_info *t, int atid)
 {
 	union active_open_entry *p = atid2entry(t, atid);
 
 	mtx_lock(&t->atid_lock);
 	p->next = t->afree;
 	t->afree = p;
 	t->atids_in_use--;
 	mtx_unlock(&t->atid_lock);
 }
 
 void
 insert_tid(struct tom_data *td, void *ctx, unsigned int tid)
 {
 	struct tid_info *t = &td->tid_maps;
 
 	t->tid_tab[tid] = ctx;
 	atomic_add_int(&t->tids_in_use, 1);
 }
 
 void
 update_tid(struct tom_data *td, void *ctx, unsigned int tid)
 {
 	struct tid_info *t = &td->tid_maps;
 
 	t->tid_tab[tid] = ctx;
 }
 
 void
 remove_tid(struct tom_data *td, unsigned int tid)
 {
 	struct tid_info *t = &td->tid_maps;
 
 	t->tid_tab[tid] = NULL;
 	atomic_add_int(&t->tids_in_use, -1);
 }
 
 /* use ctx as a next pointer in the tid release list */
 void
 queue_tid_release(struct toedev *tod, unsigned int tid)
 {
 	struct tom_data *td = t3_tomdata(tod);
 	void **p = &td->tid_maps.tid_tab[tid];
 	struct adapter *sc = tod->tod_softc;
 
 	mtx_lock(&td->tid_release_lock);
 	*p = td->tid_release_list;
 	td->tid_release_list = p;
 	if (!*p)
 		taskqueue_enqueue(sc->tq, &td->tid_release_task);
 	mtx_unlock(&td->tid_release_lock);
 }
 
 /*
  * Populate a TID_RELEASE WR.
  */
 static inline void
 mk_tid_release(struct cpl_tid_release *cpl, unsigned int tid)
 {
 
 	cpl->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 	OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
 }
 
 void
 release_tid(struct toedev *tod, unsigned int tid, int qset)
 {
 	struct tom_data *td = t3_tomdata(tod);
 	struct adapter *sc = tod->tod_softc;
 	struct mbuf *m;
 	struct cpl_tid_release *cpl;
 #ifdef INVARIANTS
 	struct tid_info *t = &td->tid_maps;
 #endif
 
 	KASSERT(tid >= 0 && tid < t->ntids,
 	    ("%s: tid=%d, ntids=%d", __func__, tid, t->ntids));
 
 	m = M_GETHDR_OFLD(qset, CPL_PRIORITY_CONTROL, cpl);
 	if (m) {
 		mk_tid_release(cpl, tid);
 		t3_offload_tx(sc, m);
 		remove_tid(td, tid);
 	} else
 		queue_tid_release(tod, tid);
 
 }
 
 void
 t3_process_tid_release_list(void *data, int pending)
 {
 	struct mbuf *m;
 	struct tom_data *td = data;
 	struct adapter *sc = td->tod.tod_softc;
 
 	mtx_lock(&td->tid_release_lock);
 	while (td->tid_release_list) {
 		void **p = td->tid_release_list;
 		unsigned int tid = p - td->tid_maps.tid_tab;
 		struct cpl_tid_release *cpl;
 
 		td->tid_release_list = (void **)*p;
 		m = M_GETHDR_OFLD(0, CPL_PRIORITY_CONTROL, cpl); /* qs 0 here */
 		if (m == NULL)
 			break;	/* XXX: who reschedules the release task? */
 		mtx_unlock(&td->tid_release_lock);
 		mk_tid_release(cpl, tid);
 		t3_offload_tx(sc, m);
 		remove_tid(td, tid);
 		mtx_lock(&td->tid_release_lock);
 	}
 	mtx_unlock(&td->tid_release_lock);
 }
 
 static void
 close_conn(struct adapter *sc, struct toepcb *toep)
 {
 	struct mbuf *m;
 	struct cpl_close_con_req *req;
 
 	if (toep->tp_flags & TP_FIN_SENT)
 		return;
 
 	m = M_GETHDR_OFLD(toep->tp_qset, CPL_PRIORITY_DATA, req);
 	if (m == NULL)
 		CXGB_UNIMPLEMENTED();
 
 	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
 	req->wr.wrh_lo = htonl(V_WR_TID(toep->tp_tid));
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, toep->tp_tid));
 	req->rsvd = 0;
 
 	toep->tp_flags |= TP_FIN_SENT;
 	t3_offload_tx(sc, m);
 }
 
 static inline void
 make_tx_data_wr(struct socket *so, struct tx_data_wr *req, int len,
     struct mbuf *tail)
 {
 	struct tcpcb *tp = so_sototcpcb(so);
 	struct toepcb *toep = tp->t_toe;
 	struct sockbuf *snd;
 
 	inp_lock_assert(tp->t_inpcb);
 	snd = so_sockbuf_snd(so);
 
 	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
 	req->wr.wrh_lo = htonl(V_WR_TID(toep->tp_tid));
 	/* len includes the length of any HW ULP additions */
 	req->len = htonl(len);
 	req->param = htonl(V_TX_PORT(toep->tp_l2t->smt_idx));
 	/* V_TX_ULP_SUBMODE sets both the mode and submode */
 	req->flags = htonl(V_TX_ULP_SUBMODE(toep->tp_ulp_mode) | V_TX_URG(0) |
 	    V_TX_SHOVE(!(tp->t_flags & TF_MORETOCOME) && (tail ? 0 : 1)));
 	req->sndseq = htonl(tp->snd_nxt);
 	if (__predict_false((toep->tp_flags & TP_DATASENT) == 0)) {
 		struct adapter *sc = toep->tp_tod->tod_softc;
 		int cpu_idx = sc->rrss_map[toep->tp_qset];
 
 		req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT |
 		    V_TX_CPU_IDX(cpu_idx));
 
 		/* Sendbuffer is in units of 32KB. */
 		if (V_tcp_do_autosndbuf && snd->sb_flags & SB_AUTOSIZE) 
 			req->param |= htonl(V_TX_SNDBUF(VNET(tcp_autosndbuf_max) >> 15));
 		else
 			req->param |= htonl(V_TX_SNDBUF(snd->sb_hiwat >> 15));
 
 		toep->tp_flags |= TP_DATASENT;
 	}
 }
 
 /*
  * TOM_XXX_DUPLICATION sgl_len, calc_tx_descs, calc_tx_descs_ofld, mbuf_wrs, etc.
  * TOM_XXX_MOVE to some common header file.
  */
 /*
  * IMM_LEN: # of bytes that can be tx'd as immediate data.  There are 16 flits
  * in a tx desc; subtract 3 for tx_data_wr (including the WR header), and 1 more
  * for the second gen bit flit.  This leaves us with 12 flits.
  *
  * descs_to_sgllen: # of SGL entries that can fit into the given # of tx descs.
  * The first desc has a tx_data_wr (which includes the WR header), the rest have
  * the WR header only.  All descs have the second gen bit flit.
  *
  * sgllen_to_descs: # of tx descs used up by an sgl of given length.  The first
  * desc has a tx_data_wr (which includes the WR header), the rest have the WR
  * header only.  All descs have the second gen bit flit.
  *
  * flits_to_sgllen: # of SGL entries that can be fit in the given # of flits.
  *
  */
 #define IMM_LEN 96
 static int descs_to_sgllen[TX_MAX_DESC + 1] = {0, 8, 17, 26, 35};
 static int sgllen_to_descs[TX_MAX_SEGS] = {
 	0, 1, 1, 1, 1, 1, 1, 1, 1, 2,	/*  0 -  9 */
 	2, 2, 2, 2, 2, 2, 2, 2, 3, 3,	/* 10 - 19 */
 	3, 3, 3, 3, 3, 3, 3, 4, 4, 4,	/* 20 - 29 */
 	4, 4, 4, 4, 4, 4		/* 30 - 35 */
 };
 #if 0
 static int flits_to_sgllen[TX_DESC_FLITS + 1] = {
 	0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 10, 10
 };
 #endif
 #if SGE_NUM_GENBITS != 2
 #error "SGE_NUM_GENBITS really must be 2"
 #endif
 
 int
 t3_push_frames(struct socket *so, int req_completion)
 {
 	struct tcpcb *tp = so_sototcpcb(so);
 	struct toepcb *toep = tp->t_toe;
 	struct mbuf *m0, *sndptr, *m;
 	struct toedev *tod = toep->tp_tod;
 	struct adapter *sc = tod->tod_softc;
 	int bytes, ndesc, total_bytes = 0, mlen;
 	struct sockbuf *snd;
 	struct sglist *sgl;
 	struct ofld_hdr *oh;
 	caddr_t dst;
 	struct tx_data_wr *wr;
 
 	inp_lock_assert(tp->t_inpcb);
 
 	snd = so_sockbuf_snd(so);
 	SOCKBUF_LOCK(snd);
 
 	/*
 	 * Autosize the send buffer.
 	 */
 	if (snd->sb_flags & SB_AUTOSIZE && VNET(tcp_do_autosndbuf)) {
 		if (sbused(snd) >= (snd->sb_hiwat / 8 * 7) &&
 		    sbused(snd) < VNET(tcp_autosndbuf_max)) {
 			if (!sbreserve_locked(snd, min(snd->sb_hiwat +
 			    VNET(tcp_autosndbuf_inc), VNET(tcp_autosndbuf_max)),
 			    so, curthread))
 				snd->sb_flags &= ~SB_AUTOSIZE;
 		}
 	}
 
 	if (toep->tp_m_last && toep->tp_m_last == snd->sb_sndptr)
 		sndptr = toep->tp_m_last->m_next;
 	else
 		sndptr = snd->sb_sndptr ? snd->sb_sndptr : snd->sb_mb;
 
 	/* Nothing to send or no WRs available for sending data */
 	if (toep->tp_wr_avail == 0 || sndptr == NULL)
 		goto out;
 
 	/* Something to send and at least 1 WR available */
 	while (toep->tp_wr_avail && sndptr != NULL) {
 
 		m0 = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m0 == NULL)
 			break;
 		oh = mtod(m0, struct ofld_hdr *);
 		wr = (void *)(oh + 1);
 		dst = (void *)(wr + 1);
 
 		m0->m_pkthdr.len = m0->m_len = sizeof(*oh) + sizeof(*wr);
 		oh->flags = V_HDR_CTRL(CPL_PRIORITY_DATA) | F_HDR_DF |
 		    V_HDR_QSET(toep->tp_qset);
 
 		/*
 		 * Try to construct an immediate data WR if possible.  Stuff as
 		 * much data into it as possible, one whole mbuf at a time.
 		 */
 		mlen = sndptr->m_len;
 		ndesc = bytes = 0;
 		while (mlen <= IMM_LEN - bytes) {
 			bcopy(sndptr->m_data, dst, mlen);
 			bytes += mlen;
 			dst += mlen;
 
 			if (!(sndptr = sndptr->m_next))
 				break;
 			mlen = sndptr->m_len;
 		}
 
 		if (bytes) {
 
 			/* Was able to fit 'bytes' bytes in an immediate WR */
 
 			ndesc = 1;
 			make_tx_data_wr(so, wr, bytes, sndptr);
 
 			m0->m_len += bytes;
 			m0->m_pkthdr.len = m0->m_len;
 
 		} else {
 			int wr_avail = min(toep->tp_wr_avail, TX_MAX_DESC);
 
 			/* Need to make an SGL */
 
 			sgl = sglist_alloc(descs_to_sgllen[wr_avail], M_NOWAIT);
 			if (sgl == NULL)
 				break;
 
 			for (m = sndptr; m != NULL; m = m->m_next) {
 				if ((mlen = m->m_len) > 0) {
 					if (sglist_append(sgl, m->m_data, mlen))
 					    break;
 				}
 				bytes += mlen;
 			}
 			sndptr = m;
 			if (bytes == 0) {
 				sglist_free(sgl);
 				break;
 			}
 			ndesc = sgllen_to_descs[sgl->sg_nseg];
 			oh->flags |= F_HDR_SGL;
 			oh->sgl = sgl;
 			make_tx_data_wr(so, wr, bytes, sndptr);
 		}
 
 		oh->flags |= V_HDR_NDESC(ndesc);
 		oh->plen = bytes;
 
 		snd->sb_sndptr = sndptr;
 		snd->sb_sndptroff += bytes;
 		if (sndptr == NULL) {
 			snd->sb_sndptr = snd->sb_mbtail;
 			snd->sb_sndptroff -= snd->sb_mbtail->m_len;
 			toep->tp_m_last = snd->sb_mbtail;
 		} else
 			toep->tp_m_last = NULL;
 
 		total_bytes += bytes;
 
 		toep->tp_wr_avail -= ndesc;
 		toep->tp_wr_unacked += ndesc;
 
 		if ((req_completion && toep->tp_wr_unacked == ndesc) ||
 		    toep->tp_wr_unacked >= toep->tp_wr_max / 2) {
 			wr->wr.wrh_hi |= htonl(F_WR_COMPL);
 			toep->tp_wr_unacked = 0;	
 		}
 
 		enqueue_wr(toep, m0);
 		l2t_send(sc, m0, toep->tp_l2t);
 	}
 out:
 	SOCKBUF_UNLOCK(snd);
 
 	if (sndptr == NULL && (toep->tp_flags & TP_SEND_FIN))
 		close_conn(sc, toep);
 
 	return (total_bytes);
 }
 
 static int
 send_rx_credits(struct adapter *sc, struct toepcb *toep, int credits)
 {
 	struct mbuf *m;
 	struct cpl_rx_data_ack *req;
 	uint32_t dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1);
 
 	m = M_GETHDR_OFLD(toep->tp_qset, CPL_PRIORITY_CONTROL, req);
 	if (m == NULL)
 		return (0);
 
 	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 	req->wr.wrh_lo = 0;
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid));
 	req->credit_dack = htonl(dack | V_RX_CREDITS(credits));
 	t3_offload_tx(sc, m);
 	return (credits);
 }
 
 void
 t3_rcvd(struct toedev *tod, struct tcpcb *tp)
 {
 	struct adapter *sc = tod->tod_softc;
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so = inp->inp_socket;
 	struct sockbuf *so_rcv = &so->so_rcv;
 	struct toepcb *toep = tp->t_toe;
 	int must_send;
 
 	INP_WLOCK_ASSERT(inp);
 
 	SOCKBUF_LOCK(so_rcv);
 	KASSERT(toep->tp_enqueued >= sbused(so_rcv),
 	    ("%s: sbused(so_rcv) > enqueued", __func__));
 	toep->tp_rx_credits += toep->tp_enqueued - sbused(so_rcv);
 	toep->tp_enqueued = sbused(so_rcv);
 	SOCKBUF_UNLOCK(so_rcv);
 
 	must_send = toep->tp_rx_credits + 16384 >= tp->rcv_wnd;
 	if (must_send || toep->tp_rx_credits >= 15 * 1024) {
 		int credits;
 
 		credits = send_rx_credits(sc, toep, toep->tp_rx_credits);
 		toep->tp_rx_credits -= credits;
 		tp->rcv_wnd += credits;
 		tp->rcv_adv += credits;
 	}
 }
 
 static int
 do_rx_urg_notify(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
 	struct adapter *sc = qs->adap;
 	struct tom_data *td = sc->tom_softc;
 	struct cpl_rx_urg_notify *hdr = mtod(m, void *);
 	unsigned int tid = GET_TID(hdr);
 	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
 
 	log(LOG_ERR, "%s: tid %u inp %p", __func__, tid, toep->tp_inp);
 
 	m_freem(m);
 	return (0);
 }
 
 int
 t3_send_fin(struct toedev *tod, struct tcpcb *tp)
 {
 	struct toepcb *toep = tp->t_toe;
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so = inp_inpcbtosocket(inp);
 #if defined(KTR)
 	unsigned int tid = toep->tp_tid;
 #endif
 
 	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	CTR4(KTR_CXGB, "%s: tid %d, toep %p, flags %x", __func__, tid, toep,
 	    toep->tp_flags);
 
 	toep->tp_flags |= TP_SEND_FIN;
 	t3_push_frames(so, 1);
 
 	return (0);
 }
 
 int
 t3_tod_output(struct toedev *tod, struct tcpcb *tp)
 {
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so = inp->inp_socket;
 
 	t3_push_frames(so, 1);
 	return (0);
 }
 
 /* What mtu_idx to use, given a 4-tuple and/or an MSS cap */
 int
 find_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc, int pmss)
 {
 	unsigned short *mtus = &sc->params.mtus[0];
 	int i = 0, mss;
 
 	KASSERT(inc != NULL || pmss > 0,
 	    ("%s: at least one of inc/pmss must be specified", __func__));
 
 	mss = inc ? tcp_mssopt(inc) : pmss;
 	if (pmss > 0 && mss > pmss)
 		mss = pmss;
 
 	while (i < NMTUS - 1 && mtus[i + 1] <= mss + 40)
 		++i;
 
 	return (i);
 }
 
 static inline void
 purge_wr_queue(struct toepcb *toep)
 {
 	struct mbuf *m;
 	struct ofld_hdr *oh;
 
 	while ((m = mbufq_dequeue(&toep->wr_list)) != NULL) {
 		oh = mtod(m, struct ofld_hdr *);
 		if (oh->flags & F_HDR_SGL)
 			sglist_free(oh->sgl);
 		m_freem(m);
 	}
 }
 
 /*
  * Release cxgb(4) and T3 resources held by an offload connection (TID, L2T
  * entry, etc.)
  */
 static void
 t3_release_offload_resources(struct toepcb *toep)
 {
 	struct toedev *tod = toep->tp_tod;
 	struct tom_data *td = t3_tomdata(tod);
 
 	/*
 	 * The TOM explicitly detaches its toepcb from the system's inp before
 	 * it releases the offload resources.
 	 */
 	if (toep->tp_inp) {
 		panic("%s: inp %p still attached to toepcb %p",
 		    __func__, toep->tp_inp, toep);
 	}
 
 	if (toep->tp_wr_avail != toep->tp_wr_max)
 		purge_wr_queue(toep);
 
 	if (toep->tp_l2t) {
 		l2t_release(td->l2t, toep->tp_l2t);
 		toep->tp_l2t = NULL;
 	}
 
 	if (toep->tp_tid >= 0)
 		release_tid(tod, toep->tp_tid, toep->tp_qset);
 
 	toepcb_free(toep);
 }
 
 /*
  * Determine the receive window size for a socket.
  */
 unsigned long
 select_rcv_wnd(struct socket *so)
 {
 	unsigned long wnd;
 
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 
 	wnd = sbspace(&so->so_rcv);
 	if (wnd < MIN_RCV_WND)
 		wnd = MIN_RCV_WND;
 
 	return min(wnd, MAX_RCV_WND);
 }
 
 int
 select_rcv_wscale(void)
 {
 	int wscale = 0;
 	unsigned long space = sb_max;
 
 	if (space > MAX_RCV_WND)
 		space = MAX_RCV_WND;
 
 	while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < space)
 		wscale++;
 
 	return (wscale);
 }
 
 
 /*
  * Set up the socket for TCP offload.
  */
 void
 offload_socket(struct socket *so, struct toepcb *toep)
 {
 	struct toedev *tod = toep->tp_tod;
 	struct tom_data *td = t3_tomdata(tod);
 	struct inpcb *inp = sotoinpcb(so);
 	struct tcpcb *tp = intotcpcb(inp);
 
 	INP_WLOCK_ASSERT(inp);
 
 	/* Update socket */
 	SOCKBUF_LOCK(&so->so_snd);
 	so_sockbuf_snd(so)->sb_flags |= SB_NOCOALESCE;
 	SOCKBUF_UNLOCK(&so->so_snd);
 	SOCKBUF_LOCK(&so->so_rcv);
 	so_sockbuf_rcv(so)->sb_flags |= SB_NOCOALESCE;
 	SOCKBUF_UNLOCK(&so->so_rcv);
 
 	/* Update TCP PCB */
 	tp->tod = toep->tp_tod;
 	tp->t_toe = toep;
 	tp->t_flags |= TF_TOE;
 
 	/* Install an extra hold on inp */
 	toep->tp_inp = inp;
 	toep->tp_flags |= TP_ATTACHED;
 	in_pcbref(inp);
 
 	/* Add the TOE PCB to the active list */
 	mtx_lock(&td->toep_list_lock);
 	TAILQ_INSERT_HEAD(&td->toep_list, toep, link);
 	mtx_unlock(&td->toep_list_lock);
 }
 
 /* This is _not_ the normal way to "unoffload" a socket. */
 void
 undo_offload_socket(struct socket *so)
 {
 	struct inpcb *inp = sotoinpcb(so);
 	struct tcpcb *tp = intotcpcb(inp);
 	struct toepcb *toep = tp->t_toe;
 	struct toedev *tod = toep->tp_tod;
 	struct tom_data *td = t3_tomdata(tod);
 
 	INP_WLOCK_ASSERT(inp);
 
 	so_sockbuf_snd(so)->sb_flags &= ~SB_NOCOALESCE;
 	so_sockbuf_rcv(so)->sb_flags &= ~SB_NOCOALESCE;
 
 	tp->tod = NULL;
 	tp->t_toe = NULL;
 	tp->t_flags &= ~TF_TOE;
 
 	toep->tp_inp = NULL;
 	toep->tp_flags &= ~TP_ATTACHED;
 	if (in_pcbrele_wlocked(inp))
 		panic("%s: inp freed.", __func__);
 
 	mtx_lock(&td->toep_list_lock);
 	TAILQ_REMOVE(&td->toep_list, toep, link);
 	mtx_unlock(&td->toep_list_lock);
 }
 
 /*
  * Socket could be a listening socket, and we may not have a toepcb at all at
  * this time.
  */
 uint32_t
 calc_opt0h(struct socket *so, int mtu_idx, int rscale, struct l2t_entry *e)
 {
 	uint32_t opt0h = F_TCAM_BYPASS | V_WND_SCALE(rscale) |
 	    V_MSS_IDX(mtu_idx);
 
 	if (so != NULL) {
 		struct inpcb *inp = sotoinpcb(so);
 		struct tcpcb *tp = intotcpcb(inp);
 		int keepalive = always_keepalive ||
 		    so_options_get(so) & SO_KEEPALIVE;
 
 		opt0h |= V_NAGLE((tp->t_flags & TF_NODELAY) == 0);
 		opt0h |= V_KEEP_ALIVE(keepalive != 0);
 	}
 
 	if (e != NULL)
 		opt0h |= V_L2T_IDX(e->idx) | V_TX_CHANNEL(e->smt_idx);
 
 	return (htobe32(opt0h));
 }
 
 uint32_t
 calc_opt0l(struct socket *so, int rcv_bufsize)
 {
 	uint32_t opt0l = V_ULP_MODE(ULP_MODE_NONE) | V_RCV_BUFSIZ(rcv_bufsize);
 
 	KASSERT(rcv_bufsize <= M_RCV_BUFSIZ,
 	    ("%s: rcv_bufsize (%d) is too high", __func__, rcv_bufsize));
 
 	if (so != NULL)		/* optional because noone cares about IP TOS */
 		opt0l |= V_TOS(INP_TOS(sotoinpcb(so)));
 
 	return (htobe32(opt0l));
 }
 
 /*
  * Convert an ACT_OPEN_RPL status to an errno.
  */
 static int
 act_open_rpl_status_to_errno(int status)
 {
 	switch (status) {
 	case CPL_ERR_CONN_RESET:
 		return (ECONNREFUSED);
 	case CPL_ERR_ARP_MISS:
 		return (EHOSTUNREACH);
 	case CPL_ERR_CONN_TIMEDOUT:
 		return (ETIMEDOUT);
 	case CPL_ERR_TCAM_FULL:
 		return (EAGAIN);
 	case CPL_ERR_CONN_EXIST:
 		log(LOG_ERR, "ACTIVE_OPEN_RPL: 4-tuple in use\n");
 		return (EAGAIN);
 	default:
 		return (EIO);
 	}
 }
 
 /*
  * Return whether a failed active open has allocated a TID
  */
 static inline int
 act_open_has_tid(int status)
 {
 	return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
 	       status != CPL_ERR_ARP_MISS;
 }
 
 /*
  * Active open failed.
  */
 static int
 do_act_open_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
 	struct adapter *sc = qs->adap;
 	struct tom_data *td = sc->tom_softc;
 	struct toedev *tod = &td->tod;
 	struct cpl_act_open_rpl *rpl = mtod(m, void *);
 	unsigned int atid = G_TID(ntohl(rpl->atid));
 	struct toepcb *toep = lookup_atid(&td->tid_maps, atid);
 	struct inpcb *inp = toep->tp_inp;
 	int s = rpl->status, rc;
 
 	CTR3(KTR_CXGB, "%s: atid %u, status %u ", __func__, atid, s);
 
 	free_atid(&td->tid_maps, atid);
 	toep->tp_tid = -1;
 
 	if (act_open_has_tid(s))
 		queue_tid_release(tod, GET_TID(rpl));
 
 	rc = act_open_rpl_status_to_errno(s);
 	if (rc != EAGAIN)
 		INP_INFO_RLOCK(&V_tcbinfo);
 	INP_WLOCK(inp);
 	toe_connect_failed(tod, inp, rc);
 	toepcb_release(toep);	/* unlocks inp */
 	if (rc != EAGAIN)
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 
 	m_freem(m);
 	return (0);
 }
 
 /*
  * Send an active open request.
  *
  * State of affairs on entry:
  * soisconnecting (so_state |= SS_ISCONNECTING)
  * tcbinfo not locked (this has changed - used to be WLOCKed)
  * inp WLOCKed
  * tp->t_state = TCPS_SYN_SENT
  * rtalloc1, RT_UNLOCK on rt.
  */
 int
 t3_connect(struct toedev *tod, struct socket *so,
     struct rtentry *rt, struct sockaddr *nam)
 {
 	struct mbuf *m = NULL;
 	struct l2t_entry *e = NULL;
 	struct tom_data *td = t3_tomdata(tod);
 	struct adapter *sc = tod->tod_softc;
 	struct cpl_act_open_req *cpl;
 	struct inpcb *inp = sotoinpcb(so);
 	struct tcpcb *tp = intotcpcb(inp);
 	struct toepcb *toep;
 	int atid = -1, mtu_idx, rscale, cpu_idx, qset;
 	struct sockaddr *gw;
 	struct ifnet *ifp = rt->rt_ifp;
 	struct port_info *pi = ifp->if_softc;	/* XXX wrong for VLAN etc. */
 
 	INP_WLOCK_ASSERT(inp);
 
 	toep = toepcb_alloc(tod);
 	if (toep == NULL)
 		goto failed;
 
 	atid = alloc_atid(&td->tid_maps, toep);
 	if (atid < 0)
 		goto failed;
 
 	qset = pi->first_qset + (arc4random() % pi->nqsets);
 
 	m = M_GETHDR_OFLD(qset, CPL_PRIORITY_CONTROL, cpl);
 	if (m == NULL)
 		goto failed;
 
 	gw = rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway : nam;
 	e = t3_l2t_get(pi, ifp, gw);
 	if (e == NULL)
 		goto failed;
 
 	toep->tp_l2t = e;
 	toep->tp_tid = atid;	/* used to double check response */
 	toep->tp_qset = qset;
 
 	SOCKBUF_LOCK(&so->so_rcv);
 	/* opt0 rcv_bufsiz initially, assumes its normal meaning later */
 	toep->tp_rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ);
 	SOCKBUF_UNLOCK(&so->so_rcv);
 
 	offload_socket(so, toep);
 
 	/*
 	 * The kernel sets request_r_scale based on sb_max whereas we need to
 	 * take hardware's MAX_RCV_WND into account too.  This is normally a
 	 * no-op as MAX_RCV_WND is much larger than the default sb_max.
 	 */
 	if (tp->t_flags & TF_REQ_SCALE)
 		rscale = tp->request_r_scale = select_rcv_wscale();
 	else
 		rscale = 0;
 	mtu_idx = find_best_mtu_idx(sc, &inp->inp_inc, 0);
 	cpu_idx = sc->rrss_map[qset];
 
 	cpl->wr.wrh_hi = htobe32(V_WR_OP(FW_WROPCODE_FORWARD));
 	cpl->wr.wrh_lo = 0;
 	OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid)); 
 	inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port, &cpl->peer_ip,
 	    &cpl->peer_port);
 	cpl->opt0h = calc_opt0h(so, mtu_idx, rscale, e);
 	cpl->opt0l = calc_opt0l(so, toep->tp_rx_credits);
 	cpl->params = 0;
 	cpl->opt2 = calc_opt2(cpu_idx);
 
 	CTR5(KTR_CXGB, "%s: atid %u (%s), toep %p, inp %p", __func__,
 	    toep->tp_tid, tcpstates[tp->t_state], toep, inp);
 
 	if (l2t_send(sc, m, e) == 0)
 		return (0);
 
 	undo_offload_socket(so);
 
 failed:
 	CTR5(KTR_CXGB, "%s: FAILED, atid %d, toep %p, l2te %p, mbuf %p",
 	    __func__, atid, toep, e, m);
 
 	if (atid >= 0)
 		free_atid(&td->tid_maps, atid);
 
 	if (e)
 		l2t_release(td->l2t, e);
 
 	if (toep)
 		toepcb_free(toep);
 
 	m_freem(m);
 
 	return (ENOMEM);
 }
 
 /*
  * Send an ABORT_REQ message.  Cannot fail.  This routine makes sure we do not
  * send multiple ABORT_REQs for the same connection and also that we do not try
  * to send a message after the connection has closed.
  */
 static void
 send_reset(struct toepcb *toep)
 {
 
 	struct cpl_abort_req *req;
 	unsigned int tid = toep->tp_tid;
 	struct inpcb *inp = toep->tp_inp;
 	struct socket *so = inp->inp_socket;
 	struct tcpcb *tp = intotcpcb(inp);
 	struct toedev *tod = toep->tp_tod;
 	struct adapter *sc = tod->tod_softc;
 	struct mbuf *m;
 
 	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	CTR4(KTR_CXGB, "%s: tid %d, toep %p (%x)", __func__, tid, toep,
 	    toep->tp_flags);
 
 	if (toep->tp_flags & TP_ABORT_SHUTDOWN)
 		return;
 
 	toep->tp_flags |= (TP_ABORT_RPL_PENDING | TP_ABORT_SHUTDOWN);
 
 	/* Purge the send queue */
 	sbflush(so_sockbuf_snd(so));
 	purge_wr_queue(toep);
 
 	m = M_GETHDR_OFLD(toep->tp_qset, CPL_PRIORITY_DATA, req);
 	if (m == NULL)
 		CXGB_UNIMPLEMENTED();
 
 	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
 	req->wr.wrh_lo = htonl(V_WR_TID(tid));
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid));
 	req->rsvd0 = htonl(tp->snd_nxt);
 	req->rsvd1 = !(toep->tp_flags & TP_DATASENT);
 	req->cmd = CPL_ABORT_SEND_RST;
 
 	if (tp->t_state == TCPS_SYN_SENT)
 		(void )mbufq_enqueue(&toep->out_of_order_queue, m); /* defer */
 	else
 		l2t_send(sc, m, toep->tp_l2t);
 }
 
 int
 t3_send_rst(struct toedev *tod __unused, struct tcpcb *tp)
 {
 
 	send_reset(tp->t_toe);
 	return (0);
 }
 
 /*
  * Handler for RX_DATA CPL messages.
  */
 static int
 do_rx_data(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
 	struct adapter *sc = qs->adap;
 	struct tom_data *td = sc->tom_softc;
 	struct cpl_rx_data *hdr = mtod(m, void *);
 	unsigned int tid = GET_TID(hdr);
 	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
 	struct inpcb *inp = toep->tp_inp;
 	struct tcpcb *tp;
 	struct socket *so;
 	struct sockbuf *so_rcv;	
 
 	/* Advance over CPL */
 	m_adj(m, sizeof(*hdr));
 
 	/* XXX: revisit.  This comes from the T4 TOM */
 	if (__predict_false(inp == NULL)) {
 		/*
 		 * do_pass_establish failed and must be attempting to abort the
 		 * connection.  Meanwhile, the T4 has sent us data for such a
 		 * connection.
 		 */
 #ifdef notyet
 		KASSERT(toepcb_flag(toep, TPF_ABORT_SHUTDOWN),
 		    ("%s: inp NULL and tid isn't being aborted", __func__));
 #endif
 		m_freem(m);
 		return (0);
 	}
 
 	INP_WLOCK(inp);
 	if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) {
 		CTR4(KTR_CXGB, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
 		    __func__, tid, m->m_pkthdr.len, inp->inp_flags);
 		INP_WUNLOCK(inp);
 		m_freem(m);
 		return (0);
 	}
 
 	if (__predict_false(hdr->dack_mode != toep->tp_delack_mode))
 		toep->tp_delack_mode = hdr->dack_mode;
 
 	tp = intotcpcb(inp);
 
 #ifdef INVARIANTS
 	if (__predict_false(tp->rcv_nxt != be32toh(hdr->seq))) {
 		log(LOG_ERR,
 		    "%s: unexpected seq# %x for TID %u, rcv_nxt %x\n",
 		    __func__, be32toh(hdr->seq), toep->tp_tid, tp->rcv_nxt);
 	}
 #endif
 	tp->rcv_nxt += m->m_pkthdr.len;
 	KASSERT(tp->rcv_wnd >= m->m_pkthdr.len,
 	    ("%s: negative window size", __func__));
 	tp->rcv_wnd -= m->m_pkthdr.len;
 	tp->t_rcvtime = ticks;
 
 	so  = inp->inp_socket;
 	so_rcv = &so->so_rcv;
 	SOCKBUF_LOCK(so_rcv);
 
 	if (__predict_false(so_rcv->sb_state & SBS_CANTRCVMORE)) {
 		CTR3(KTR_CXGB, "%s: tid %u, excess rx (%d bytes)",
 		    __func__, tid, m->m_pkthdr.len);
 		SOCKBUF_UNLOCK(so_rcv);
 		INP_WUNLOCK(inp);
 
 		INP_INFO_RLOCK(&V_tcbinfo);
 		INP_WLOCK(inp);
 		tp = tcp_drop(tp, ECONNRESET);
 		if (tp)
 			INP_WUNLOCK(inp);
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 
 		m_freem(m);
 		return (0);
 	}
 
 	/* receive buffer autosize */
 	if (so_rcv->sb_flags & SB_AUTOSIZE &&
 	    V_tcp_do_autorcvbuf &&
 	    so_rcv->sb_hiwat < V_tcp_autorcvbuf_max &&
 	    (m->m_pkthdr.len > (sbspace(so_rcv) / 8 * 7) || tp->rcv_wnd < 32768)) {
 		unsigned int hiwat = so_rcv->sb_hiwat;
 		unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc,
 		    V_tcp_autorcvbuf_max);
 
 		if (!sbreserve_locked(so_rcv, newsize, so, NULL))
 			so_rcv->sb_flags &= ~SB_AUTOSIZE;
 		else
 			toep->tp_rx_credits += newsize - hiwat;
 	}
 
 	toep->tp_enqueued += m->m_pkthdr.len;
 	sbappendstream_locked(so_rcv, m, 0);
 	sorwakeup_locked(so);
 	SOCKBUF_UNLOCK_ASSERT(so_rcv);
 
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 /*
  * Handler for PEER_CLOSE CPL messages.
  */
 static int
 do_peer_close(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
 	struct adapter *sc = qs->adap;
 	struct tom_data *td = sc->tom_softc;
 	const struct cpl_peer_close *hdr = mtod(m, void *);
 	unsigned int tid = GET_TID(hdr);
 	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
 	struct inpcb *inp = toep->tp_inp;
 	struct tcpcb *tp;
 	struct socket *so;
 
 	INP_INFO_RLOCK(&V_tcbinfo);
 	INP_WLOCK(inp);
 	tp = intotcpcb(inp);
 
 	CTR5(KTR_CXGB, "%s: tid %u (%s), toep_flags 0x%x, inp %p", __func__,
 	    tid, tp ? tcpstates[tp->t_state] : "no tp" , toep->tp_flags, inp);
 
 	if (toep->tp_flags & TP_ABORT_RPL_PENDING)
 		goto done;
 
 	so = inp_inpcbtosocket(inp);
 
 	socantrcvmore(so);
 	tp->rcv_nxt++;
 
 	switch (tp->t_state) {
 	case TCPS_SYN_RECEIVED:
 		tp->t_starttime = ticks;
 		/* FALLTHROUGH */ 
 	case TCPS_ESTABLISHED:
 		tp->t_state = TCPS_CLOSE_WAIT;
 		break;
 	case TCPS_FIN_WAIT_1:
 		tp->t_state = TCPS_CLOSING;
 		break;
 	case TCPS_FIN_WAIT_2:
 		tcp_twstart(tp);
 		INP_UNLOCK_ASSERT(inp);	/* safe, we have a ref on the  inp */
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 
 		INP_WLOCK(inp);
 		toepcb_release(toep);	/* no more CPLs expected */
 
 		m_freem(m);
 		return (0);
 	default:
 		log(LOG_ERR, "%s: TID %u received PEER_CLOSE in bad state %d\n",
 		    __func__, toep->tp_tid, tp->t_state);
 	}
 
 done:
 	INP_WUNLOCK(inp);
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 
 	m_freem(m);
 	return (0);
 }
 
 /*
  * Handler for CLOSE_CON_RPL CPL messages.  peer ACK to our FIN received.
  */
 static int
 do_close_con_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
 	struct adapter *sc = qs->adap;
 	struct tom_data *td = sc->tom_softc;
 	const struct cpl_close_con_rpl *rpl = mtod(m, void *);
 	unsigned int tid = GET_TID(rpl);
 	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
 	struct inpcb *inp = toep->tp_inp;
 	struct tcpcb *tp;
 	struct socket *so;
 
 	INP_INFO_RLOCK(&V_tcbinfo);
 	INP_WLOCK(inp);
 	tp = intotcpcb(inp);
 
 	CTR4(KTR_CXGB, "%s: tid %u (%s), toep_flags 0x%x", __func__, tid,
 	    tp ? tcpstates[tp->t_state] : "no tp", toep->tp_flags);
 
 	if ((toep->tp_flags & TP_ABORT_RPL_PENDING))
 		goto done;
 
 	so = inp_inpcbtosocket(inp);
 	tp->snd_una = ntohl(rpl->snd_nxt) - 1;  /* exclude FIN */
 
 	switch (tp->t_state) {
 	case TCPS_CLOSING:
 		tcp_twstart(tp);
 release:
 		INP_UNLOCK_ASSERT(inp);	/* safe, we have a ref on the  inp */
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 
 		INP_WLOCK(inp);
 		toepcb_release(toep);	/* no more CPLs expected */
 	
 		m_freem(m);
 		return (0);
 	case TCPS_LAST_ACK:
 		if (tcp_close(tp))
 			INP_WUNLOCK(inp);
 		goto release;
 
 	case TCPS_FIN_WAIT_1:
 		if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
 			soisdisconnected(so);
 		tp->t_state = TCPS_FIN_WAIT_2;
 		break;
 	default:
 		log(LOG_ERR,
 		    "%s: TID %u received CLOSE_CON_RPL in bad state %d\n",
 		    __func__, toep->tp_tid, tp->t_state);
 	}
 
 done:
 	INP_WUNLOCK(inp);
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 
 	m_freem(m);
 	return (0);
 }
 
 static int
 do_smt_write_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
 	struct cpl_smt_write_rpl *rpl = mtod(m, void *);
 
 	if (rpl->status != CPL_ERR_NONE) {
 		log(LOG_ERR,
 		    "Unexpected SMT_WRITE_RPL status %u for entry %u\n",
 		    rpl->status, GET_TID(rpl));
 	}
 
 	m_freem(m);
 	return (0);
 }
 
 static int
 do_set_tcb_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
 	struct cpl_set_tcb_rpl *rpl = mtod(m, void *);
 
 	if (rpl->status != CPL_ERR_NONE) {
 		log(LOG_ERR, "Unexpected SET_TCB_RPL status %u for tid %u\n",
 		    rpl->status, GET_TID(rpl));
 	}
 
 	m_freem(m);
 	return (0);
 }
 
 /*
  * Handle an ABORT_RPL_RSS CPL message.
  */
 static int
 do_abort_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
 	struct adapter *sc = qs->adap;
 	struct tom_data *td = sc->tom_softc;
 	const struct cpl_abort_rpl_rss *rpl = mtod(m, void *);
 	unsigned int tid = GET_TID(rpl);
 	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
 	struct inpcb *inp;
 
 	/*
 	 * Ignore replies to post-close aborts indicating that the abort was
 	 * requested too late.  These connections are terminated when we get
 	 * PEER_CLOSE or CLOSE_CON_RPL and by the time the abort_rpl_rss
 	 * arrives the TID is either no longer used or it has been recycled.
 	 */
 	if (rpl->status == CPL_ERR_ABORT_FAILED) {
 		m_freem(m);
 		return (0);
 	}
 
 	if (toep->tp_flags & TP_IS_A_SYNQ_ENTRY)
 		return (do_abort_rpl_synqe(qs, r, m));
 
 	CTR4(KTR_CXGB, "%s: tid %d, toep %p, status %d", __func__, tid, toep,
 	    rpl->status);
 
 	inp = toep->tp_inp;
 	INP_WLOCK(inp);
 
 	if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
 		if (!(toep->tp_flags & TP_ABORT_RPL_RCVD)) {
 			toep->tp_flags |= TP_ABORT_RPL_RCVD;
 			INP_WUNLOCK(inp);
 		} else {
 			toep->tp_flags &= ~TP_ABORT_RPL_RCVD;
 			toep->tp_flags &= TP_ABORT_RPL_PENDING;
 			toepcb_release(toep);	/* no more CPLs expected */
 		}
 	}
 
 	m_freem(m);
 	return (0);
 }
 
 /*
  * Convert the status code of an ABORT_REQ into a FreeBSD error code.
  */
 static int
 abort_status_to_errno(struct tcpcb *tp, int abort_reason)
 {
 	switch (abort_reason) {
 	case CPL_ERR_BAD_SYN:
 	case CPL_ERR_CONN_RESET:
 		return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET);
 	case CPL_ERR_XMIT_TIMEDOUT:
 	case CPL_ERR_PERSIST_TIMEDOUT:
 	case CPL_ERR_FINWAIT2_TIMEDOUT:
 	case CPL_ERR_KEEPALIVE_TIMEDOUT:
 		return (ETIMEDOUT);
 	default:
 		return (EIO);
 	}
 }
 
 /*
  * Returns whether an ABORT_REQ_RSS message is a negative advice.
  */
 static inline int
 is_neg_adv_abort(unsigned int status)
 {
 	return status == CPL_ERR_RTX_NEG_ADVICE ||
 	    status == CPL_ERR_PERSIST_NEG_ADVICE;
 }
 
 void
 send_abort_rpl(struct toedev *tod, int tid, int qset)
 {
 	struct mbuf *reply;
 	struct cpl_abort_rpl *rpl;
 	struct adapter *sc = tod->tod_softc;
 
 	reply = M_GETHDR_OFLD(qset, CPL_PRIORITY_DATA, rpl);
 	if (!reply)
 		CXGB_UNIMPLEMENTED();
 
 	rpl->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL));
 	rpl->wr.wrh_lo = htonl(V_WR_TID(tid));
 	OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, tid));
 	rpl->cmd = CPL_ABORT_NO_RST;
 
 	t3_offload_tx(sc, reply);
 }
 
 /*
  * Handle an ABORT_REQ_RSS CPL message.  If we're waiting for an ABORT_RPL we
  * ignore this request except that we need to reply to it.
  */
 static int
 do_abort_req(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
 	struct adapter *sc = qs->adap;
 	struct tom_data *td = sc->tom_softc;
 	struct toedev *tod = &td->tod;
 	const struct cpl_abort_req_rss *req = mtod(m, void *);
 	unsigned int tid = GET_TID(req);
 	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct socket *so;
 	int qset = toep->tp_qset;
 
 	if (is_neg_adv_abort(req->status)) {
 		CTR4(KTR_CXGB, "%s: negative advice %d for tid %u (%x)",
 		    __func__, req->status, tid, toep->tp_flags);
 		m_freem(m);
 		return (0);
 	}
 
 	if (toep->tp_flags & TP_IS_A_SYNQ_ENTRY)
 		return (do_abort_req_synqe(qs, r, m));
 
 	inp = toep->tp_inp;
 	INP_INFO_RLOCK(&V_tcbinfo);	/* for tcp_close */
 	INP_WLOCK(inp);
 
 	tp = intotcpcb(inp);
 	so = inp->inp_socket;
 
 	CTR6(KTR_CXGB, "%s: tid %u (%s), toep %p (%x), status %d",
 	    __func__, tid, tcpstates[tp->t_state], toep, toep->tp_flags,
 	    req->status);
 
 	if (!(toep->tp_flags & TP_ABORT_REQ_RCVD)) {
 		toep->tp_flags |= TP_ABORT_REQ_RCVD;
 		toep->tp_flags |= TP_ABORT_SHUTDOWN;
 		INP_WUNLOCK(inp);
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		m_freem(m);
 		return (0);
 	}
 	toep->tp_flags &= ~TP_ABORT_REQ_RCVD;
 
 	/*
 	 * If we'd sent a reset on this toep, we'll ignore this and clean up in
 	 * the T3's reply to our reset instead.
 	 */
 	if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
 		toep->tp_flags |= TP_ABORT_RPL_SENT;
 		INP_WUNLOCK(inp);
 	} else {
 		so_error_set(so, abort_status_to_errno(tp, req->status));
 		tp = tcp_close(tp);
 		if (tp == NULL)
 			INP_WLOCK(inp);	/* re-acquire */
 		toepcb_release(toep);	/* no more CPLs expected */
 	}
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 
 	send_abort_rpl(tod, tid, qset);
 	m_freem(m);
 	return (0);
 }
 
 static void
 assign_rxopt(struct tcpcb *tp, uint16_t tcpopt)
 {
 	struct toepcb *toep = tp->t_toe;
 	struct adapter *sc = toep->tp_tod->tod_softc;
 
-	tp->t_maxseg = tp->t_maxopd = sc->params.mtus[G_TCPOPT_MSS(tcpopt)] - 40;
+	tp->t_maxseg = sc->params.mtus[G_TCPOPT_MSS(tcpopt)] - 40;
 
 	if (G_TCPOPT_TSTAMP(tcpopt)) {
 		tp->t_flags |= TF_RCVD_TSTMP;
 		tp->t_flags |= TF_REQ_TSTMP;	/* forcibly set */
 		tp->ts_recent = 0;		/* XXX */
 		tp->ts_recent_age = tcp_ts_getticks();
-		tp->t_maxseg -= TCPOLEN_TSTAMP_APPA;
 	}
 
 	if (G_TCPOPT_SACK(tcpopt))
 		tp->t_flags |= TF_SACK_PERMIT;
 	else
 		tp->t_flags &= ~TF_SACK_PERMIT;
 
 	if (G_TCPOPT_WSCALE_OK(tcpopt))
 		tp->t_flags |= TF_RCVD_SCALE;
 
 	if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) ==
 	    (TF_RCVD_SCALE | TF_REQ_SCALE)) {
 		tp->rcv_scale = tp->request_r_scale;
 		tp->snd_scale = G_TCPOPT_SND_WSCALE(tcpopt);
 	}
 
 }
 
 /*
  * The ISS and IRS are from after the exchange of SYNs and are off by 1.
  */
 void
 make_established(struct socket *so, uint32_t cpl_iss, uint32_t cpl_irs,
     uint16_t cpl_tcpopt)
 {
 	struct inpcb *inp = sotoinpcb(so);
 	struct tcpcb *tp = intotcpcb(inp);
 	struct toepcb *toep = tp->t_toe;
 	long bufsize;
 	uint32_t iss = be32toh(cpl_iss) - 1;	/* true ISS */
 	uint32_t irs = be32toh(cpl_irs) - 1;	/* true IRS */
 	uint16_t tcpopt = be16toh(cpl_tcpopt);
 
 	INP_WLOCK_ASSERT(inp);
 
 	tp->t_state = TCPS_ESTABLISHED;
 	tp->t_starttime = ticks;
 	TCPSTAT_INC(tcps_connects);
 
 	CTR4(KTR_CXGB, "%s tid %u, toep %p, inp %p", tcpstates[tp->t_state],
 	    toep->tp_tid, toep, inp);
 
 	tp->irs = irs;
 	tcp_rcvseqinit(tp);
 	tp->rcv_wnd = toep->tp_rx_credits << 10;
 	tp->rcv_adv += tp->rcv_wnd;
 	tp->last_ack_sent = tp->rcv_nxt;
 
 	/*
 	 * If we were unable to send all rx credits via opt0, save the remainder
 	 * in rx_credits so that they can be handed over with the next credit
 	 * update.
 	 */
 	SOCKBUF_LOCK(&so->so_rcv);
 	bufsize = select_rcv_wnd(so);
 	SOCKBUF_UNLOCK(&so->so_rcv);
 	toep->tp_rx_credits = bufsize - tp->rcv_wnd;
 
 	tp->iss = iss;
 	tcp_sendseqinit(tp);
 	tp->snd_una = iss + 1;
 	tp->snd_nxt = iss + 1;
 	tp->snd_max = iss + 1;
 
 	assign_rxopt(tp, tcpopt);
 	soisconnected(so);
 }
 
 /*
  * Fill in the right TID for CPL messages waiting in the out-of-order queue
  * and send them to the TOE.
  */
 static void
 fixup_and_send_ofo(struct toepcb *toep)
 {
 	struct mbuf *m;
 	struct toedev *tod = toep->tp_tod;
 	struct adapter *sc = tod->tod_softc;
 	struct inpcb *inp = toep->tp_inp;
 	unsigned int tid = toep->tp_tid;
 
 	inp_lock_assert(inp);
 
 	while ((m = mbufq_dequeue(&toep->out_of_order_queue)) != NULL) {
 		struct ofld_hdr *oh = mtod(m, void *);
 		/*
 		 * A variety of messages can be waiting but the fields we'll
 		 * be touching are common to all so any message type will do.
 		 */
 		struct cpl_close_con_req *p = (void *)(oh + 1);
 
 		p->wr.wrh_lo = htonl(V_WR_TID(tid));
 		OPCODE_TID(p) = htonl(MK_OPCODE_TID(p->ot.opcode, tid));
 		t3_offload_tx(sc, m);
 	}
 }
 
 /*
  * Process a CPL_ACT_ESTABLISH message.
  */
 static int
 do_act_establish(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
 	struct adapter *sc = qs->adap;
 	struct tom_data *td = sc->tom_softc;
 	struct cpl_act_establish *req = mtod(m, void *);
 	unsigned int tid = GET_TID(req);
 	unsigned int atid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
 	struct toepcb *toep = lookup_atid(&td->tid_maps, atid);
 	struct inpcb *inp = toep->tp_inp;
 	struct tcpcb *tp;
 	struct socket *so; 
 
 	CTR3(KTR_CXGB, "%s: atid %u, tid %u", __func__, atid, tid);
 
 	free_atid(&td->tid_maps, atid);
 
 	INP_WLOCK(inp);
 	tp = intotcpcb(inp);
 
 	KASSERT(toep->tp_qset == qs->idx,
 	    ("%s qset mismatch %d %d", __func__, toep->tp_qset, qs->idx));
 	KASSERT(toep->tp_tid == atid,
 	    ("%s atid mismatch %d %d", __func__, toep->tp_tid, atid));
 
 	toep->tp_tid = tid;
 	insert_tid(td, toep, tid);
 
 	if (inp->inp_flags & INP_DROPPED) {
 		/* socket closed by the kernel before hw told us it connected */
 		send_reset(toep);
 		goto done;
 	}
 
 	KASSERT(tp->t_state == TCPS_SYN_SENT,
 	    ("TID %u expected TCPS_SYN_SENT, found %d.", tid, tp->t_state));
 
 	so = inp->inp_socket;
 	make_established(so, req->snd_isn, req->rcv_isn, req->tcp_opt);
 
 	/*
 	 * Now that we finally have a TID send any CPL messages that we had to
 	 * defer for lack of a TID.
 	 */
 	if (mbufq_len(&toep->out_of_order_queue))
 		fixup_and_send_ofo(toep);
 
 done:
 	INP_WUNLOCK(inp);
 	m_freem(m);
 	return (0);
 }
 
 /*
  * Process an acknowledgment of WR completion.  Advance snd_una and send the
  * next batch of work requests from the write queue.
  */
 static void
 wr_ack(struct toepcb *toep, struct mbuf *m)
 {
 	struct inpcb *inp = toep->tp_inp;
 	struct tcpcb *tp;
 	struct cpl_wr_ack *hdr = mtod(m, void *);
 	struct socket *so;
 	unsigned int credits = ntohs(hdr->credits);
 	u32 snd_una = ntohl(hdr->snd_una);
 	int bytes = 0;
 	struct sockbuf *snd;
 	struct mbuf *p;
 	struct ofld_hdr *oh;
 
 	inp_wlock(inp);
 	tp = intotcpcb(inp);
 	so = inp->inp_socket;
 	toep->tp_wr_avail += credits;
 	if (toep->tp_wr_unacked > toep->tp_wr_max - toep->tp_wr_avail)
 		toep->tp_wr_unacked = toep->tp_wr_max - toep->tp_wr_avail;
 
 	while (credits) {
 		p = peek_wr(toep);
 
 		if (__predict_false(!p)) {
 			CTR5(KTR_CXGB, "%s: %u extra WR_ACK credits, "
 			    "tid %u, state %u, wr_avail %u", __func__, credits,
 			    toep->tp_tid, tp->t_state, toep->tp_wr_avail);
 
 			log(LOG_ERR, "%u WR_ACK credits for TID %u with "
 			    "nothing pending, state %u wr_avail=%u\n",
 			    credits, toep->tp_tid, tp->t_state, toep->tp_wr_avail);
 			break;
 		}
 
 		oh = mtod(p, struct ofld_hdr *);
 
 		KASSERT(credits >= G_HDR_NDESC(oh->flags),
 		    ("%s: partial credits?  %d %d", __func__, credits,
 		    G_HDR_NDESC(oh->flags)));
 
 		dequeue_wr(toep);
 		credits -= G_HDR_NDESC(oh->flags);
 		bytes += oh->plen;
 
 		if (oh->flags & F_HDR_SGL)
 			sglist_free(oh->sgl);
 		m_freem(p);
 	}
 
 	if (__predict_false(SEQ_LT(snd_una, tp->snd_una)))
 		goto out_free;
 
 	if (tp->snd_una != snd_una) {
 		tp->snd_una = snd_una;
 		tp->ts_recent_age = tcp_ts_getticks();
 		if (tp->snd_una == tp->snd_nxt)
 			toep->tp_flags &= ~TP_TX_WAIT_IDLE;
 	}
 
 	snd = so_sockbuf_snd(so);
 	if (bytes) {
 		SOCKBUF_LOCK(snd);
 		sbdrop_locked(snd, bytes);
 		so_sowwakeup_locked(so);
 	}
 
 	if (snd->sb_sndptroff < sbused(snd))
 		t3_push_frames(so, 0);
 
 out_free:
 	inp_wunlock(tp->t_inpcb);
 	m_freem(m);
 }
 
 /*
  * Handler for TX_DATA_ACK CPL messages.
  */
 static int
 do_wr_ack(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
 	struct adapter *sc = qs->adap;
 	struct tom_data *td = sc->tom_softc;
 	struct cpl_wr_ack *hdr = mtod(m, void *);
 	unsigned int tid = GET_TID(hdr);
 	struct toepcb *toep = lookup_tid(&td->tid_maps, tid);
 
 	/* XXX bad race */
 	if (toep)
 		wr_ack(toep, m);
 
 	return (0);
 }
 
 void
 t3_init_cpl_io(struct adapter *sc)
 {
 	t3_register_cpl_handler(sc, CPL_ACT_ESTABLISH, do_act_establish);
 	t3_register_cpl_handler(sc, CPL_ACT_OPEN_RPL, do_act_open_rpl);
 	t3_register_cpl_handler(sc, CPL_RX_URG_NOTIFY, do_rx_urg_notify);
 	t3_register_cpl_handler(sc, CPL_RX_DATA, do_rx_data);
 	t3_register_cpl_handler(sc, CPL_TX_DMA_ACK, do_wr_ack);
 	t3_register_cpl_handler(sc, CPL_PEER_CLOSE, do_peer_close);
 	t3_register_cpl_handler(sc, CPL_ABORT_REQ_RSS, do_abort_req);
 	t3_register_cpl_handler(sc, CPL_ABORT_RPL_RSS, do_abort_rpl);
 	t3_register_cpl_handler(sc, CPL_CLOSE_CON_RPL, do_close_con_rpl);
 	t3_register_cpl_handler(sc, CPL_SMT_WRITE_RPL, do_smt_write_rpl);
 	t3_register_cpl_handler(sc, CPL_SET_TCB_RPL, do_set_tcb_rpl);
 }
 #endif
Index: projects/release-pkg/sys/dev/cxgb/ulp/tom/cxgb_listen.c
===================================================================
--- projects/release-pkg/sys/dev/cxgb/ulp/tom/cxgb_listen.c	(revision 293335)
+++ projects/release-pkg/sys/dev/cxgb/ulp/tom/cxgb_listen.c	(revision 293336)
@@ -1,1141 +1,1136 @@
 /*-
  * Copyright (c) 2012 Chelsio Communications, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 
 #ifdef TCP_OFFLOAD
 #include <sys/param.h>
 #include <sys/refcount.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <netinet/in.h>
 #include <netinet/ip.h>
+#include <netinet/in_fib.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #define TCPSTATES
 #include <netinet/tcp_fsm.h>
 #include <netinet/toecore.h>
 
 #include "cxgb_include.h"
 #include "ulp/tom/cxgb_tom.h"
 #include "ulp/tom/cxgb_l2t.h"
 #include "ulp/tom/cxgb_toepcb.h"
 
 static void t3_send_reset_synqe(struct toedev *, struct synq_entry *);
 
 static int
 alloc_stid(struct tid_info *t, void *ctx)
 {
 	int stid = -1;
 
 	mtx_lock(&t->stid_lock);
 	if (t->sfree) {
 		union listen_entry *p = t->sfree;
 
 		stid = (p - t->stid_tab) + t->stid_base;
 		t->sfree = p->next;
 		p->ctx = ctx;
 		t->stids_in_use++;
 	}
 	mtx_unlock(&t->stid_lock);
 	return (stid);
 }
 
 static void
 free_stid(struct tid_info *t, int stid)
 {
 	union listen_entry *p = stid2entry(t, stid);
 
 	mtx_lock(&t->stid_lock);
 	p->next = t->sfree;
 	t->sfree = p;
 	t->stids_in_use--;
 	mtx_unlock(&t->stid_lock);
 }
 
 static struct listen_ctx *
 alloc_lctx(struct tom_data *td, struct inpcb *inp, int qset)
 {
 	struct listen_ctx *lctx;
 
 	INP_WLOCK_ASSERT(inp);
 
 	lctx = malloc(sizeof(struct listen_ctx), M_CXGB, M_NOWAIT | M_ZERO);
 	if (lctx == NULL)
 		return (NULL);
 
 	lctx->stid = alloc_stid(&td->tid_maps, lctx);
 	if (lctx->stid < 0) {
 		free(lctx, M_CXGB);
 		return (NULL);
 	}
 
 	lctx->inp = inp;
 	in_pcbref(inp);
 
 	lctx->qset = qset;
 	refcount_init(&lctx->refcnt, 1);
 	TAILQ_INIT(&lctx->synq);
 
 	return (lctx);
 }
 
 /* Don't call this directly, use release_lctx instead */
 static int
 free_lctx(struct tom_data *td, struct listen_ctx *lctx)
 {
 	struct inpcb *inp = lctx->inp;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(lctx->refcnt == 0,
 	    ("%s: refcnt %d", __func__, lctx->refcnt));
 	KASSERT(TAILQ_EMPTY(&lctx->synq),
 	    ("%s: synq not empty.", __func__));
 	KASSERT(lctx->stid >= 0, ("%s: bad stid %d.", __func__, lctx->stid));
 
 	CTR4(KTR_CXGB, "%s: stid %u, lctx %p, inp %p",
 	    __func__, lctx->stid, lctx, lctx->inp);
 
 	free_stid(&td->tid_maps, lctx->stid);
 	free(lctx, M_CXGB);
 
 	return in_pcbrele_wlocked(inp);
 }
 
 static void
 hold_lctx(struct listen_ctx *lctx)
 {
 
 	refcount_acquire(&lctx->refcnt);
 }
 
 static inline uint32_t
 listen_hashfn(void *key, u_long mask)
 {
 
 	return (fnv_32_buf(&key, sizeof(key), FNV1_32_INIT) & mask);
 }
 
 /*
  * Add a listen_ctx entry to the listen hash table.
  */
 static void
 listen_hash_add(struct tom_data *td, struct listen_ctx *lctx)
 {
 	int bucket = listen_hashfn(lctx->inp, td->listen_mask);
 
 	mtx_lock(&td->lctx_hash_lock);
 	LIST_INSERT_HEAD(&td->listen_hash[bucket], lctx, link);
 	td->lctx_count++;
 	mtx_unlock(&td->lctx_hash_lock);
 }
 
 /*
  * Look for the listening socket's context entry in the hash and return it.
  */
 static struct listen_ctx *
 listen_hash_find(struct tom_data *td, struct inpcb *inp)
 {
 	int bucket = listen_hashfn(inp, td->listen_mask);
 	struct listen_ctx *lctx;
 
 	mtx_lock(&td->lctx_hash_lock);
 	LIST_FOREACH(lctx, &td->listen_hash[bucket], link) {
 		if (lctx->inp == inp)
 			break;
 	}
 	mtx_unlock(&td->lctx_hash_lock);
 
 	return (lctx);
 }
 
 /*
  * Removes the listen_ctx structure for inp from the hash and returns it.
  */
 static struct listen_ctx *
 listen_hash_del(struct tom_data *td, struct inpcb *inp)
 {
 	int bucket = listen_hashfn(inp, td->listen_mask);
 	struct listen_ctx *lctx, *l;
 
 	mtx_lock(&td->lctx_hash_lock);
 	LIST_FOREACH_SAFE(lctx, &td->listen_hash[bucket], link, l) {
 		if (lctx->inp == inp) {
 			LIST_REMOVE(lctx, link);
 			td->lctx_count--;
 			break;
 		}
 	}
 	mtx_unlock(&td->lctx_hash_lock);
 
 	return (lctx);
 }
 
 /*
  * Releases a hold on the lctx.  Must be called with the listening socket's inp
  * locked.  The inp may be freed by this function and it returns NULL to
  * indicate this.
  */
 static struct inpcb *
 release_lctx(struct tom_data *td, struct listen_ctx *lctx)
 {
 	struct inpcb *inp = lctx->inp;
 	int inp_freed = 0;
 
 	INP_WLOCK_ASSERT(inp);
 	if (refcount_release(&lctx->refcnt))
 		inp_freed = free_lctx(td, lctx);
 
 	return (inp_freed ? NULL : inp);
 }
 
 static int
 create_server(struct adapter *sc, struct listen_ctx *lctx)
 {
 	struct mbuf *m;
 	struct cpl_pass_open_req *req;
 	struct inpcb *inp = lctx->inp;
 
 	m = M_GETHDR_OFLD(lctx->qset, CPL_PRIORITY_CONTROL, req);
 	if (m == NULL)
 		return (ENOMEM);
 
 	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, lctx->stid));
 	req->local_port = inp->inp_lport; 
 	memcpy(&req->local_ip, &inp->inp_laddr, 4);
 	req->peer_port = 0;
 	req->peer_ip = 0;
 	req->peer_netmask = 0;
 	req->opt0h = htonl(F_DELACK | F_TCAM_BYPASS);
 	req->opt0l = htonl(V_RCV_BUFSIZ(16));
 	req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK));
 
 	t3_offload_tx(sc, m);
 
 	return (0);
 }
 
 static int
 destroy_server(struct adapter *sc, struct listen_ctx *lctx)
 {
 	struct mbuf *m;
 	struct cpl_close_listserv_req *req;
 
 	m = M_GETHDR_OFLD(lctx->qset, CPL_PRIORITY_CONTROL, req);
 	if (m == NULL)
 		return (ENOMEM);
 
 	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ,
 	    lctx->stid));
 	req->cpu_idx = 0;
 
 	t3_offload_tx(sc, m);
 
 	return (0);
 }
 
 /*
  * Process a CPL_CLOSE_LISTSRV_RPL message.  If the status is good we release
  * the STID.
  */
 static int
 do_close_server_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
 	struct adapter *sc = qs->adap;
 	struct tom_data *td = sc->tom_softc;
 	struct cpl_close_listserv_rpl *rpl = mtod(m, void *);
 	unsigned int stid = GET_TID(rpl);
 	struct listen_ctx *lctx = lookup_stid(&td->tid_maps, stid);
 	struct inpcb *inp = lctx->inp;
 
 	CTR3(KTR_CXGB, "%s: stid %u, status %u", __func__, stid, rpl->status);
 
 	if (rpl->status != CPL_ERR_NONE) {
 		log(LOG_ERR, "%s: failed (%u) to close listener for stid %u",
 		    __func__, rpl->status, stid);
 	} else {
 		INP_WLOCK(inp);
 		KASSERT(listen_hash_del(td, lctx->inp) == NULL,
 		    ("%s: inp %p still in listen hash", __func__, inp));
 		if (release_lctx(td, lctx) != NULL)
 			INP_WUNLOCK(inp);
 	}
 
 	m_freem(m);
 	return (0);
 }
 
 /*
  * Process a CPL_PASS_OPEN_RPL message.  Remove the lctx from the listen hash
  * table and free it if there was any error, otherwise nothing to do.
  */
 static int
 do_pass_open_rpl(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
 	struct adapter *sc = qs->adap;
 	struct tom_data *td = sc->tom_softc;
        	struct cpl_pass_open_rpl *rpl = mtod(m, void *);
 	int stid = GET_TID(rpl);
 	struct listen_ctx *lctx;
 	struct inpcb *inp;
 
 	/*
 	 * We get these replies also when setting up HW filters.  Just throw
 	 * those away.
 	 */
 	if (stid >= td->tid_maps.stid_base + td->tid_maps.nstids)
 		goto done;
 
 	lctx = lookup_stid(&td->tid_maps, stid);
 	inp = lctx->inp;
 
 	INP_WLOCK(inp);
 
 	CTR4(KTR_CXGB, "%s: stid %u, status %u, flags 0x%x",
 	    __func__, stid, rpl->status, lctx->flags);
 
 	lctx->flags &= ~LCTX_RPL_PENDING;
 
 	if (rpl->status != CPL_ERR_NONE) {
 		log(LOG_ERR, "%s: %s: hw listen (stid %d) failed: %d\n",
 		    __func__, device_get_nameunit(sc->dev), stid, rpl->status);
 	}
 
 #ifdef INVARIANTS
 	/*
 	 * If the inp has been dropped (listening socket closed) then
 	 * listen_stop must have run and taken the inp out of the hash.
 	 */
 	if (inp->inp_flags & INP_DROPPED) {
 		KASSERT(listen_hash_del(td, inp) == NULL,
 		    ("%s: inp %p still in listen hash", __func__, inp));
 	}
 #endif
 
 	if (inp->inp_flags & INP_DROPPED && rpl->status != CPL_ERR_NONE) {
 		if (release_lctx(td, lctx) != NULL)
 			INP_WUNLOCK(inp);
 		goto done;
 	}
 
 	/*
 	 * Listening socket stopped listening earlier and now the chip tells us
 	 * it has started the hardware listener.  Stop it; the lctx will be
 	 * released in do_close_server_rpl.
 	 */
 	if (inp->inp_flags & INP_DROPPED) {
 		destroy_server(sc, lctx);
 		INP_WUNLOCK(inp);
 		goto done;
 	}
 
 	/*
 	 * Failed to start hardware listener.  Take inp out of the hash and
 	 * release our reference on it.  An error message has been logged
 	 * already.
 	 */
 	if (rpl->status != CPL_ERR_NONE) {
 		listen_hash_del(td, inp);
 		if (release_lctx(td, lctx) != NULL)
 			INP_WUNLOCK(inp);
 		goto done;
 	}
 
 	/* hardware listener open for business */
 
 	INP_WUNLOCK(inp);
 done:
 	m_freem(m);
 	return (0);
 }
 
 static void
 pass_accept_req_to_protohdrs(const struct cpl_pass_accept_req *cpl,
     struct in_conninfo *inc, struct tcphdr *th, struct tcpopt *to)
 {
 	const struct tcp_options *t3opt = &cpl->tcp_options;
 
 	bzero(inc, sizeof(*inc));
 	inc->inc_faddr.s_addr = cpl->peer_ip;
 	inc->inc_laddr.s_addr = cpl->local_ip;
 	inc->inc_fport = cpl->peer_port;
 	inc->inc_lport = cpl->local_port;
 
 	bzero(th, sizeof(*th));
 	th->th_sport = cpl->peer_port;
 	th->th_dport = cpl->local_port;
 	th->th_seq = be32toh(cpl->rcv_isn); /* as in tcp_fields_to_host */
 	th->th_flags = TH_SYN;
 
 	bzero(to, sizeof(*to));
 	if (t3opt->mss) {
 		to->to_flags |= TOF_MSS;
 		to->to_mss = be16toh(t3opt->mss);
 	}
 	if (t3opt->wsf) {
 		to->to_flags |= TOF_SCALE;
 		to->to_wscale = t3opt->wsf;
 	}
 	if (t3opt->tstamp)
 		to->to_flags |= TOF_TS;
 	if (t3opt->sack)
 		to->to_flags |= TOF_SACKPERM;
 }
 
 static inline void
 hold_synqe(struct synq_entry *synqe)
 {
 
 	refcount_acquire(&synqe->refcnt);
 }
 
 static inline void
 release_synqe(struct synq_entry *synqe)
 {
 
 	if (refcount_release(&synqe->refcnt))
 		m_freem(synqe->m);
 }
 
 /*
  * Use the trailing space in the mbuf in which the PASS_ACCEPT_REQ arrived to
  * store some state temporarily.  There will be enough room in the mbuf's
  * trailing space as the CPL is not that large.
  *
  * XXX: bad hack.
  */
 static struct synq_entry *
 mbuf_to_synq_entry(struct mbuf *m)
 {
 	int len = roundup(sizeof (struct synq_entry), 8);
 
 	if (__predict_false(M_TRAILINGSPACE(m) < len)) {
 	    panic("%s: no room for synq_entry (%td, %d)\n", __func__,
 	    M_TRAILINGSPACE(m), len);
 	}
 
 	return ((void *)(M_START(m) + M_SIZE(m) - len));
 }
 
 #ifdef KTR
 #define REJECT_PASS_ACCEPT()	do { \
 	reject_reason = __LINE__; \
 	goto reject; \
 } while (0)
 #else
 #define REJECT_PASS_ACCEPT()	do { goto reject; } while (0)
 #endif
 
 /*
  * The context associated with a tid entry via insert_tid could be a synq_entry
  * or a toepcb.  The only way CPL handlers can tell is via a bit in these flags.
  */
 CTASSERT(offsetof(struct toepcb, tp_flags) == offsetof(struct synq_entry, flags));
 
 /*
  * Handle a CPL_PASS_ACCEPT_REQ message.
  */
 static int
 do_pass_accept_req(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
 	struct adapter *sc = qs->adap;
 	struct tom_data *td = sc->tom_softc;
 	struct toedev *tod = &td->tod;
 	const struct cpl_pass_accept_req *req = mtod(m, void *);
 	unsigned int stid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
 	unsigned int tid = GET_TID(req);
 	struct listen_ctx *lctx = lookup_stid(&td->tid_maps, stid);
 	struct l2t_entry *e = NULL;
+	struct nhop4_basic nh4;
 	struct sockaddr_in nam;
-	struct rtentry *rt;
 	struct inpcb *inp;
 	struct socket *so;
 	struct port_info *pi;
 	struct ifnet *ifp;
 	struct in_conninfo inc;
 	struct tcphdr th;
 	struct tcpopt to;
 	struct synq_entry *synqe = NULL;
 	int i;
 #ifdef KTR
 	int reject_reason;
 #endif
 
 	CTR4(KTR_CXGB, "%s: stid %u, tid %u, lctx %p", __func__, stid, tid,
 	    lctx);
 
 	pass_accept_req_to_protohdrs(req, &inc, &th, &to);
 
 	/*
 	 * Don't offload if the interface that received the SYN doesn't have
 	 * IFCAP_TOE enabled.
 	 */
 	pi = NULL;
 	for_each_port(sc, i) {
 		if (memcmp(sc->port[i].hw_addr, req->dst_mac, ETHER_ADDR_LEN))
 			continue;
 		pi = &sc->port[i];
 		break;
 	}
 	if (pi == NULL)
 		REJECT_PASS_ACCEPT();
 	ifp = pi->ifp;
 	if ((ifp->if_capenable & IFCAP_TOE4) == 0)
 		REJECT_PASS_ACCEPT();
 
 	/*
 	 * Don't offload if the outgoing interface for the route back to the
 	 * peer is not the same as the interface that received the SYN.
 	 */
 	bzero(&nam, sizeof(nam));
 	nam.sin_len = sizeof(nam);
 	nam.sin_family = AF_INET;
 	nam.sin_addr = inc.inc_faddr;
-	rt = rtalloc1((struct sockaddr *)&nam, 0, 0);
-	if (rt == NULL)
+	if (fib4_lookup_nh_basic(RT_DEFAULT_FIB, nam.sin_addr, 0, 0, &nh4) != 0)
 		REJECT_PASS_ACCEPT();
 	else {
-		struct sockaddr *nexthop;
-
-		RT_UNLOCK(rt);
-		nexthop = rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway :
-		    (struct sockaddr *)&nam;
-		if (rt->rt_ifp == ifp)
-			e = t3_l2t_get(pi, rt->rt_ifp, nexthop);
-		RTFREE(rt);
+		nam.sin_addr = nh4.nh_addr;
+		if (nh4.nh_ifp == ifp)
+			e = t3_l2t_get(pi, ifp, (struct sockaddr *)&nam);
 		if (e == NULL)
 			REJECT_PASS_ACCEPT();	/* no l2te, or ifp mismatch */
 	}
 
 	INP_INFO_RLOCK(&V_tcbinfo);
 
 	/* Don't offload if the 4-tuple is already in use */
 	if (toe_4tuple_check(&inc, &th, ifp) != 0) {
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		REJECT_PASS_ACCEPT();
 	}
 
 	inp = lctx->inp;	/* listening socket (not owned by the TOE) */
 	INP_WLOCK(inp);
 	if (__predict_false(inp->inp_flags & INP_DROPPED)) {
 		/*
 		 * The listening socket has closed.  The reply from the TOE to
 		 * our CPL_CLOSE_LISTSRV_REQ will ultimately release all
 		 * resources tied to this listen context.
 		 */
 		INP_WUNLOCK(inp);
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		REJECT_PASS_ACCEPT();
 	}
 	so = inp->inp_socket;
 
 	/* Reuse the mbuf that delivered the CPL to us */
 	synqe = mbuf_to_synq_entry(m);
 	synqe->flags = TP_IS_A_SYNQ_ENTRY;
 	synqe->m = m;
 	synqe->lctx = lctx;
 	synqe->tid = tid;
 	synqe->e = e;
 	synqe->opt0h = calc_opt0h(so, 0, 0, e);
 	synqe->qset = pi->first_qset + (arc4random() % pi->nqsets);
 	SOCKBUF_LOCK(&so->so_rcv);
 	synqe->rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ);
 	SOCKBUF_UNLOCK(&so->so_rcv);
 	refcount_init(&synqe->refcnt, 1);
 	atomic_store_rel_int(&synqe->reply, RPL_OK);
 
 	insert_tid(td, synqe, tid);
 	TAILQ_INSERT_TAIL(&lctx->synq, synqe, link);
 	hold_synqe(synqe);
 	hold_lctx(lctx);
 
 	/* syncache_add releases both pcbinfo and pcb locks */
 	toe_syncache_add(&inc, &to, &th, inp, tod, synqe);
 	INP_UNLOCK_ASSERT(inp);
 	INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 
 	/*
 	 * If we replied during syncache_add (reply is RPL_DONE), good.
 	 * Otherwise (reply is unchanged - RPL_OK) it's no longer ok to reply.
 	 * The mbuf will stick around as long as the entry is in the syncache.
 	 * The kernel is free to retry syncache_respond but we'll ignore it due
 	 * to RPL_DONT.
 	 */
 	if (atomic_cmpset_int(&synqe->reply, RPL_OK, RPL_DONT)) {
 
 		INP_WLOCK(inp);
 		if (__predict_false(inp->inp_flags & INP_DROPPED)) {
 			/* listener closed.  synqe must have been aborted. */
 			KASSERT(synqe->flags & TP_ABORT_SHUTDOWN,
 			    ("%s: listener %p closed but synqe %p not aborted",
 			    __func__, inp, synqe));
 
 			CTR5(KTR_CXGB,
 			    "%s: stid %u, tid %u, lctx %p, synqe %p, ABORTED",
 			    __func__, stid, tid, lctx, synqe);
 			INP_WUNLOCK(inp);
 			release_synqe(synqe);
 			return (__LINE__);
 		}
 
 		KASSERT(!(synqe->flags & TP_ABORT_SHUTDOWN),
 		    ("%s: synqe %p aborted, but listener %p not dropped.",
 		    __func__, synqe, inp));
 
 		TAILQ_REMOVE(&lctx->synq, synqe, link);
 		release_synqe(synqe);	/* removed from synq list */
 		inp = release_lctx(td, lctx);
 		if (inp)
 			INP_WUNLOCK(inp);
 
 		release_synqe(synqe);	/* about to exit function */
 		REJECT_PASS_ACCEPT();
 	}
 
 	KASSERT(synqe->reply == RPL_DONE,
 	    ("%s: reply %d", __func__, synqe->reply));
 
 	CTR3(KTR_CXGB, "%s: stid %u, tid %u, OK", __func__, stid, tid);
 	release_synqe(synqe);
 	return (0);
 
 reject:
 	CTR4(KTR_CXGB, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid,
 	    reject_reason);
 
 	if (synqe == NULL)
 		m_freem(m);
 	if (e)
 		l2t_release(td->l2t, e);
 	queue_tid_release(tod, tid);
 
 	return (0);
 }
 
 static void
 pass_establish_to_protohdrs(const struct cpl_pass_establish *cpl,
     struct in_conninfo *inc, struct tcphdr *th, struct tcpopt *to)
 {
 	uint16_t tcp_opt = be16toh(cpl->tcp_opt);
 
 	bzero(inc, sizeof(*inc));
 	inc->inc_faddr.s_addr = cpl->peer_ip;
 	inc->inc_laddr.s_addr = cpl->local_ip;
 	inc->inc_fport = cpl->peer_port;
 	inc->inc_lport = cpl->local_port;
 
 	bzero(th, sizeof(*th));
 	th->th_sport = cpl->peer_port;
 	th->th_dport = cpl->local_port;
 	th->th_flags = TH_ACK;
 	th->th_seq = be32toh(cpl->rcv_isn); /* as in tcp_fields_to_host */
 	th->th_ack = be32toh(cpl->snd_isn); /* ditto */
 
 	bzero(to, sizeof(*to));
 	if (G_TCPOPT_TSTAMP(tcp_opt))
 		to->to_flags |= TOF_TS;
 }
 
 /*
  * Process a CPL_PASS_ESTABLISH message.  The T3 has already established a
  * connection and we need to do the software side setup.
  */
 static int
 do_pass_establish(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
 	struct adapter *sc = qs->adap;
 	struct tom_data *td = sc->tom_softc;
 	struct cpl_pass_establish *cpl = mtod(m, void *);
 	struct toedev *tod = &td->tod;
 	unsigned int tid = GET_TID(cpl);
 	struct synq_entry *synqe = lookup_tid(&td->tid_maps, tid);
 	struct toepcb *toep;
 	struct socket *so;
 	struct listen_ctx *lctx = synqe->lctx;
 	struct inpcb *inp = lctx->inp, *new_inp;
 	struct tcpopt to;
 	struct tcphdr th;
 	struct in_conninfo inc;
 #ifdef KTR
 	int stid = G_PASS_OPEN_TID(ntohl(cpl->tos_tid));
 #endif
 
 	CTR5(KTR_CXGB, "%s: stid %u, tid %u, lctx %p, inp_flags 0x%x",
 	    __func__, stid, tid, lctx, inp->inp_flags);
 
 	KASSERT(qs->idx == synqe->qset,
 	    ("%s qset mismatch %d %d", __func__, qs->idx, synqe->qset));
 
 	INP_INFO_RLOCK(&V_tcbinfo);	/* for syncache_expand */
 	INP_WLOCK(inp);
 
 	if (__predict_false(inp->inp_flags & INP_DROPPED)) {
 		/*
 		 * The listening socket has closed.  The TOM must have aborted
 		 * all the embryonic connections (including this one) that were
 		 * on the lctx's synq.  do_abort_rpl for the tid is responsible
 		 * for cleaning up.
 		 */
 		KASSERT(synqe->flags & TP_ABORT_SHUTDOWN,
 		    ("%s: listen socket dropped but tid %u not aborted.",
 		    __func__, tid));
 		INP_WUNLOCK(inp);
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		m_freem(m);
 		return (0);
 	}
 
 	pass_establish_to_protohdrs(cpl, &inc, &th, &to);
 
 	/* Lie in order to pass the checks in syncache_expand */
 	to.to_tsecr = synqe->ts;
 	th.th_ack = synqe->iss + 1;
 
 	toep = toepcb_alloc(tod);
 	if (toep == NULL) {
 reset:
 		t3_send_reset_synqe(tod, synqe);
 		INP_WUNLOCK(inp);
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		m_freem(m);
 		return (0);
 	}
 	toep->tp_qset = qs->idx;
 	toep->tp_l2t = synqe->e;
 	toep->tp_tid = tid;
 	toep->tp_rx_credits = synqe->rx_credits;
 
 	synqe->toep = toep;
 	synqe->cpl = cpl;
 
 	so = inp->inp_socket;
 	if (!toe_syncache_expand(&inc, &to, &th, &so) || so == NULL) {
 		toepcb_free(toep);
 		goto reset;
 	}
 
 	/* New connection inpcb is already locked by syncache_expand(). */
 	new_inp = sotoinpcb(so);
 	INP_WLOCK_ASSERT(new_inp);
 
 	if (__predict_false(!(synqe->flags & TP_SYNQE_EXPANDED))) {
 		tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0);
 		t3_offload_socket(tod, synqe, so);
 	}
 
 	INP_WUNLOCK(new_inp);
 
 	/* Remove the synq entry and release its reference on the lctx */
 	TAILQ_REMOVE(&lctx->synq, synqe, link);
 	inp = release_lctx(td, lctx);
 	if (inp)
 		INP_WUNLOCK(inp);
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 	release_synqe(synqe);
 
 	m_freem(m);
 	return (0);
 }
 
 void
 t3_init_listen_cpl_handlers(struct adapter *sc)
 {
 	t3_register_cpl_handler(sc, CPL_PASS_OPEN_RPL, do_pass_open_rpl);
 	t3_register_cpl_handler(sc, CPL_CLOSE_LISTSRV_RPL, do_close_server_rpl);
 	t3_register_cpl_handler(sc, CPL_PASS_ACCEPT_REQ, do_pass_accept_req);
 	t3_register_cpl_handler(sc, CPL_PASS_ESTABLISH, do_pass_establish);
 }
 
 /*
  * Start a listening server by sending a passive open request to HW.
  *
  * Can't take adapter lock here and access to sc->flags, sc->open_device_map,
  * sc->offload_map, if_capenable are all race prone.
  */
 int
 t3_listen_start(struct toedev *tod, struct tcpcb *tp)
 {
 	struct tom_data *td = t3_tomdata(tod);
 	struct adapter *sc = tod->tod_softc;
 	struct port_info *pi;
 	struct inpcb *inp = tp->t_inpcb;
 	struct listen_ctx *lctx;
 	int i;
 
 	INP_WLOCK_ASSERT(inp);
 
 	if ((inp->inp_vflag & INP_IPV4) == 0)
 		return (0);
 
 #ifdef notyet
 	ADAPTER_LOCK(sc);
 	if (IS_BUSY(sc)) {
 		log(LOG_ERR, "%s: listen request ignored, %s is busy",
 		    __func__, device_get_nameunit(sc->dev));
 		goto done;
 	}
 
 	KASSERT(sc->flags & TOM_INIT_DONE,
 	    ("%s: TOM not initialized", __func__));
 #endif
 
 	if ((sc->open_device_map & sc->offload_map) == 0)
 		goto done;	/* no port that's UP with IFCAP_TOE enabled */
 
 	/*
 	 * Find a running port with IFCAP_TOE4.  We'll use the first such port's
 	 * queues to send the passive open and receive the reply to it.
 	 *
 	 * XXX: need a way to mark an port in use by offload.  if_cxgbe should
 	 * then reject any attempt to bring down such a port (and maybe reject
 	 * attempts to disable IFCAP_TOE on that port too?).
 	 */
 	for_each_port(sc, i) {
 		if (isset(&sc->open_device_map, i) &&
 		    sc->port[i].ifp->if_capenable & IFCAP_TOE4)
 				break;
 	}
 	KASSERT(i < sc->params.nports,
 	    ("%s: no running port with TOE capability enabled.", __func__));
 	pi = &sc->port[i];
 
 	if (listen_hash_find(td, inp) != NULL)
 		goto done;	/* already setup */
 
 	lctx = alloc_lctx(td, inp, pi->first_qset);
 	if (lctx == NULL) {
 		log(LOG_ERR,
 		    "%s: listen request ignored, %s couldn't allocate lctx\n",
 		    __func__, device_get_nameunit(sc->dev));
 		goto done;
 	}
 	listen_hash_add(td, lctx);
 
 	CTR5(KTR_CXGB, "%s: stid %u (%s), lctx %p, inp %p", __func__,
 	    lctx->stid, tcpstates[tp->t_state], lctx, inp);
 
 	if (create_server(sc, lctx) != 0) {
 		log(LOG_ERR, "%s: %s failed to create hw listener.\n", __func__,
 		    device_get_nameunit(sc->dev));
 		(void) listen_hash_del(td, inp);
 		inp = release_lctx(td, lctx);
 		/* can't be freed, host stack has a reference */
 		KASSERT(inp != NULL, ("%s: inp freed", __func__));
 		goto done;
 	}
 	lctx->flags |= LCTX_RPL_PENDING;
 done:
 #ifdef notyet
 	ADAPTER_UNLOCK(sc);
 #endif
 	return (0);
 }
 
 /*
  * Stop a listening server by sending a close_listsvr request to HW.
  * The server TID is freed when we get the reply.
  */
 int
 t3_listen_stop(struct toedev *tod, struct tcpcb *tp)
 {
 	struct listen_ctx *lctx;
 	struct adapter *sc = tod->tod_softc;
 	struct tom_data *td = t3_tomdata(tod);
 	struct inpcb *inp = tp->t_inpcb;
 	struct synq_entry *synqe;
 
 	INP_WLOCK_ASSERT(inp);
 
 	lctx = listen_hash_del(td, inp);
 	if (lctx == NULL)
 		return (ENOENT);	/* no hardware listener for this inp */
 
 	CTR4(KTR_CXGB, "%s: stid %u, lctx %p, flags %x", __func__, lctx->stid,
 	    lctx, lctx->flags);
 
 	/*
 	 * If the reply to the PASS_OPEN is still pending we'll wait for it to
 	 * arrive and clean up when it does.
 	 */
 	if (lctx->flags & LCTX_RPL_PENDING) {
 		KASSERT(TAILQ_EMPTY(&lctx->synq),
 		    ("%s: synq not empty.", __func__));
 		return (EINPROGRESS);
 	}
 
 	/*
 	 * The host stack will abort all the connections on the listening
 	 * socket's so_comp.  It doesn't know about the connections on the synq
 	 * so we need to take care of those.
 	 */
 	TAILQ_FOREACH(synqe, &lctx->synq, link) {
 		KASSERT(synqe->lctx == lctx, ("%s: synq corrupt", __func__));
 		t3_send_reset_synqe(tod, synqe);
 	}
 
 	destroy_server(sc, lctx);
 	return (0);
 }
 
 void
 t3_syncache_added(struct toedev *tod __unused, void *arg)
 {
 	struct synq_entry *synqe = arg;
 
 	hold_synqe(synqe);
 }
 
 void
 t3_syncache_removed(struct toedev *tod __unused, void *arg)
 {
 	struct synq_entry *synqe = arg;
 
 	release_synqe(synqe);
 }
 
 /* XXX */
 extern void tcp_dooptions(struct tcpopt *, u_char *, int, int);
 
 int
 t3_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m)
 {
 	struct adapter *sc = tod->tod_softc;
 	struct synq_entry *synqe = arg;
 	struct l2t_entry *e = synqe->e;
 	struct ip *ip = mtod(m, struct ip *);
 	struct tcphdr *th = (void *)(ip + 1);
 	struct cpl_pass_accept_rpl *rpl;
 	struct mbuf *r;
 	struct listen_ctx *lctx = synqe->lctx;
 	struct tcpopt to;
 	int mtu_idx, cpu_idx;
 
 	/*
 	 * The first time we run it's during the call to syncache_add.  That's
 	 * the only one we care about.
 	 */
 	if (atomic_cmpset_int(&synqe->reply, RPL_OK, RPL_DONE) == 0)
 		goto done;	/* reply to the CPL only if it's ok to do so */
 
 	r = M_GETHDR_OFLD(lctx->qset, CPL_PRIORITY_CONTROL, rpl);
 	if (r == NULL)
 		goto done;
 
 	/*
 	 * Use only the provided mbuf (with ip and tcp headers) and what's in
 	 * synqe.  Avoid looking at the listening socket (lctx->inp) here.
 	 *
 	 * XXX: if the incoming SYN had the TCP timestamp option but the kernel
 	 * decides it doesn't want to use TCP timestamps we have no way of
 	 * relaying this info to the chip on a per-tid basis (all we have is a
 	 * global knob).
 	 */
 	bzero(&to, sizeof(to));
 	tcp_dooptions(&to, (void *)(th + 1), (th->th_off << 2) - sizeof(*th),
 	    TO_SYN);
 
 	/* stash them for later */
 	synqe->iss = be32toh(th->th_seq);
 	synqe->ts = to.to_tsval;
 
 	mtu_idx = find_best_mtu_idx(sc, NULL, to.to_mss);
 	cpu_idx = sc->rrss_map[synqe->qset];
 
 	rpl->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 	rpl->wr.wrh_lo = 0;
 	OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, synqe->tid));
 	rpl->opt2 = calc_opt2(cpu_idx);
 	rpl->rsvd = rpl->opt2;		/* workaround for HW bug */
 	rpl->peer_ip = ip->ip_dst.s_addr;
 	rpl->opt0h = synqe->opt0h |
 	    calc_opt0h(NULL, mtu_idx, to.to_wscale, NULL);
 	rpl->opt0l_status = htobe32(CPL_PASS_OPEN_ACCEPT) |
 	    calc_opt0l(NULL, synqe->rx_credits);
 
 	l2t_send(sc, r, e);
 done:
 	m_freem(m);
 	return (0);
 }
 
 int
 do_abort_req_synqe(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
 	struct adapter *sc = qs->adap;
 	struct tom_data *td = sc->tom_softc;
 	struct toedev *tod = &td->tod;
 	const struct cpl_abort_req_rss *req = mtod(m, void *);
 	unsigned int tid = GET_TID(req);
 	struct synq_entry *synqe = lookup_tid(&td->tid_maps, tid);
 	struct listen_ctx *lctx = synqe->lctx;
 	struct inpcb *inp = lctx->inp;
 
 	KASSERT(synqe->flags & TP_IS_A_SYNQ_ENTRY,
 	    ("%s: !SYNQ_ENTRY", __func__));
 
 	CTR6(KTR_CXGB, "%s: tid %u, synqe %p (%x), lctx %p, status %d",
 	    __func__, tid, synqe, synqe->flags, synqe->lctx, req->status);
 
 	INP_WLOCK(inp);
 
 	if (!(synqe->flags & TP_ABORT_REQ_RCVD)) {
 		synqe->flags |= TP_ABORT_REQ_RCVD;
 		synqe->flags |= TP_ABORT_SHUTDOWN;
 		INP_WUNLOCK(inp);
 		m_freem(m);
 		return (0);
 	}
 	synqe->flags &= ~TP_ABORT_REQ_RCVD;
 
 	/*
 	 * If we'd sent a reset on this synqe, we'll ignore this and clean up in
 	 * the T3's reply to our reset instead.
 	 */
 	if (synqe->flags & TP_ABORT_RPL_PENDING) {
 		synqe->flags |= TP_ABORT_RPL_SENT;
 		INP_WUNLOCK(inp);
 	} else {
 		TAILQ_REMOVE(&lctx->synq, synqe, link);
 		inp = release_lctx(td, lctx);
 		if (inp)
 			INP_WUNLOCK(inp);
 		release_tid(tod, tid, qs->idx);
 		l2t_release(td->l2t, synqe->e);
 		release_synqe(synqe);
 	}
 
 	send_abort_rpl(tod, tid, qs->idx);
 	m_freem(m);
 	return (0);
 }
 
 int
 do_abort_rpl_synqe(struct sge_qset *qs, struct rsp_desc *r, struct mbuf *m)
 {
 	struct adapter *sc = qs->adap;
 	struct tom_data *td = sc->tom_softc;
 	struct toedev *tod = &td->tod;
 	const struct cpl_abort_rpl_rss *rpl = mtod(m, void *);
 	unsigned int tid = GET_TID(rpl);
 	struct synq_entry *synqe = lookup_tid(&td->tid_maps, tid);
 	struct listen_ctx *lctx = synqe->lctx;
 	struct inpcb *inp = lctx->inp;
 
 	CTR3(KTR_CXGB, "%s: tid %d, synqe %p, status %d", tid, synqe,
 	    rpl->status);
 
 	INP_WLOCK(inp);
 
 	if (synqe->flags & TP_ABORT_RPL_PENDING) {
 		if (!(synqe->flags & TP_ABORT_RPL_RCVD)) {
 			synqe->flags |= TP_ABORT_RPL_RCVD;
 			INP_WUNLOCK(inp);
 		} else {
 			synqe->flags &= ~TP_ABORT_RPL_RCVD;
 			synqe->flags &= TP_ABORT_RPL_PENDING;
 
 			TAILQ_REMOVE(&lctx->synq, synqe, link);
 			inp = release_lctx(td, lctx);
 			if (inp)
 				INP_WUNLOCK(inp);
 			release_tid(tod, tid, qs->idx);
 			l2t_release(td->l2t, synqe->e);
 			release_synqe(synqe);
 		}
 	}
 
 	m_freem(m);
 	return (0);
 }
 
 static void
 t3_send_reset_synqe(struct toedev *tod, struct synq_entry *synqe)
 {
 	struct cpl_abort_req *req;
 	unsigned int tid = synqe->tid;
 	struct adapter *sc = tod->tod_softc;
 	struct mbuf *m;
 #ifdef INVARIANTS
 	struct listen_ctx *lctx = synqe->lctx;
 	struct inpcb *inp = lctx->inp;
 #endif
 
 	INP_WLOCK_ASSERT(inp);
 
 	CTR4(KTR_CXGB, "%s: tid %d, synqe %p (%x)", __func__, tid, synqe,
 	    synqe->flags);
 
 	if (synqe->flags & TP_ABORT_SHUTDOWN)
 		return;
 
 	synqe->flags |= (TP_ABORT_RPL_PENDING | TP_ABORT_SHUTDOWN);
 
 	m = M_GETHDR_OFLD(synqe->qset, CPL_PRIORITY_DATA, req);
 	if (m == NULL)
 		CXGB_UNIMPLEMENTED();
 
 	req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ));
 	req->wr.wrh_lo = htonl(V_WR_TID(tid));
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid));
 	req->rsvd0 = 0;
 	req->rsvd1 = !(synqe->flags & TP_DATASENT);
 	req->cmd = CPL_ABORT_SEND_RST;
 
 	l2t_send(sc, m, synqe->e);
 }
 
 void
 t3_offload_socket(struct toedev *tod, void *arg, struct socket *so)
 {
 	struct adapter *sc = tod->tod_softc;
 	struct tom_data *td = sc->tom_softc;
 	struct synq_entry *synqe = arg;
 #ifdef INVARIANTS
 	struct inpcb *inp = sotoinpcb(so);
 #endif
 	struct cpl_pass_establish *cpl = synqe->cpl;
 	struct toepcb *toep = synqe->toep;
 
 	INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */
 	INP_WLOCK_ASSERT(inp);
 
 	offload_socket(so, toep);
 	make_established(so, cpl->snd_isn, cpl->rcv_isn, cpl->tcp_opt);
 	update_tid(td, toep, synqe->tid);
 	synqe->flags |= TP_SYNQE_EXPANDED;
 }
 #endif
Index: projects/release-pkg/sys/dev/cxgbe/iw_cxgbe/cm.c
===================================================================
--- projects/release-pkg/sys/dev/cxgbe/iw_cxgbe/cm.c	(revision 293335)
+++ projects/release-pkg/sys/dev/cxgbe/iw_cxgbe/cm.c	(revision 293336)
@@ -1,2439 +1,2438 @@
 /*
  * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
  * General Public License (GPL) Version 2, available from the file
  * COPYING in the main directory of this source tree, or the
  * OpenIB.org BSD license below:
  *
  *     Redistribution and use in source and binary forms, with or
  *     without modification, are permitted provided that the following
  *     conditions are met:
  *
  *      - Redistributions of source code must retain the above
  *	  copyright notice, this list of conditions and the following
  *	  disclaimer.
  *
  *      - Redistributions in binary form must reproduce the above
  *	  copyright notice, this list of conditions and the following
  *	  disclaimer in the documentation and/or other materials
  *	  provided with the distribution.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 
 #ifdef TCP_OFFLOAD
 #include <sys/types.h>
 #include <sys/malloc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sockio.h>
 #include <sys/taskqueue.h>
 #include <netinet/in.h>
 #include <net/route.h>
 
 #include <netinet/in_systm.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip.h>
+#include <netinet/in_fib.h>
 #include <netinet/ip_var.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp.h>
 #include <netinet/tcpip.h>
 
 #include <netinet/toecore.h>
 
 struct sge_iq;
 struct rss_header;
 #include <linux/types.h>
 #include "offload.h"
 #include "tom/t4_tom.h"
 
 #define TOEPCB(so)  ((struct toepcb *)(so_sototcpcb((so))->t_toe))
 
 #include "iw_cxgbe.h"
 #include <linux/module.h>
 #include <linux/workqueue.h>
 #include <linux/notifier.h>
 #include <linux/inetdevice.h>
 #include <linux/if_vlan.h>
 #include <net/netevent.h>
 
 static spinlock_t req_lock;
 static TAILQ_HEAD(c4iw_ep_list, c4iw_ep_common) req_list;
 static struct work_struct c4iw_task;
 static struct workqueue_struct *c4iw_taskq;
 static LIST_HEAD(timeout_list);
 static spinlock_t timeout_lock;
 
 static void process_req(struct work_struct *ctx);
 static void start_ep_timer(struct c4iw_ep *ep);
 static void stop_ep_timer(struct c4iw_ep *ep);
 static int set_tcpinfo(struct c4iw_ep *ep);
 static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc);
 static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate);
 static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate);
 static void *alloc_ep(int size, gfp_t flags);
 void __free_ep(struct c4iw_ep_common *epc);
-static struct rtentry * find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
-		__be16 peer_port, u8 tos);
+static int find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
+		__be16 peer_port, u8 tos, struct nhop4_extended *pnh4);
 static int close_socket(struct c4iw_ep_common *epc, int close);
 static int shutdown_socket(struct c4iw_ep_common *epc);
 static void abort_socket(struct c4iw_ep *ep);
 static void send_mpa_req(struct c4iw_ep *ep);
 static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen);
 static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen);
 static void close_complete_upcall(struct c4iw_ep *ep, int status);
 static int abort_connection(struct c4iw_ep *ep);
 static void peer_close_upcall(struct c4iw_ep *ep);
 static void peer_abort_upcall(struct c4iw_ep *ep);
 static void connect_reply_upcall(struct c4iw_ep *ep, int status);
 static int connect_request_upcall(struct c4iw_ep *ep);
 static void established_upcall(struct c4iw_ep *ep);
 static void process_mpa_reply(struct c4iw_ep *ep);
 static void process_mpa_request(struct c4iw_ep *ep);
 static void process_peer_close(struct c4iw_ep *ep);
 static void process_conn_error(struct c4iw_ep *ep);
 static void process_close_complete(struct c4iw_ep *ep);
 static void ep_timeout(unsigned long arg);
 static void init_sock(struct c4iw_ep_common *epc);
 static void process_data(struct c4iw_ep *ep);
 static void process_connected(struct c4iw_ep *ep);
 static struct socket * dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct c4iw_ep *child_ep);
 static void process_newconn(struct c4iw_ep *parent_ep);
 static int c4iw_so_upcall(struct socket *so, void *arg, int waitflag);
 static void process_socket_event(struct c4iw_ep *ep);
 static void release_ep_resources(struct c4iw_ep *ep);
 
 #define START_EP_TIMER(ep) \
     do { \
 	    CTR3(KTR_IW_CXGBE, "start_ep_timer (%s:%d) ep %p", \
 		__func__, __LINE__, (ep)); \
 	    start_ep_timer(ep); \
     } while (0)
 
 #define STOP_EP_TIMER(ep) \
     do { \
 	    CTR3(KTR_IW_CXGBE, "stop_ep_timer (%s:%d) ep %p", \
 		__func__, __LINE__, (ep)); \
 	    stop_ep_timer(ep); \
     } while (0)
 
 #ifdef KTR
 static char *states[] = {
 	"idle",
 	"listen",
 	"connecting",
 	"mpa_wait_req",
 	"mpa_req_sent",
 	"mpa_req_rcvd",
 	"mpa_rep_sent",
 	"fpdu_mode",
 	"aborting",
 	"closing",
 	"moribund",
 	"dead",
 	NULL,
 };
 #endif
 
 static void
 process_req(struct work_struct *ctx)
 {
 	struct c4iw_ep_common *epc;
 
 	spin_lock(&req_lock);
 	while (!TAILQ_EMPTY(&req_list)) {
 		epc = TAILQ_FIRST(&req_list);
 		TAILQ_REMOVE(&req_list, epc, entry);
 		epc->entry.tqe_prev = NULL;
 		spin_unlock(&req_lock);
 		if (epc->so)
 			process_socket_event((struct c4iw_ep *)epc);
 		c4iw_put_ep(epc);
 		spin_lock(&req_lock);
 	}
 	spin_unlock(&req_lock);
 }
 
 /*
  * XXX: doesn't belong here in the iWARP driver.
  * XXX: assumes that the connection was offloaded by cxgbe/t4_tom if TF_TOE is
  *      set.  Is this a valid assumption for active open?
  */
 static int
 set_tcpinfo(struct c4iw_ep *ep)
 {
 	struct socket *so = ep->com.so;
 	struct inpcb *inp = sotoinpcb(so);
 	struct tcpcb *tp;
 	struct toepcb *toep;
 	int rc = 0;
 
 	INP_WLOCK(inp);
 	tp = intotcpcb(inp);
 	if ((tp->t_flags & TF_TOE) == 0) {
 		rc = EINVAL;
 		log(LOG_ERR, "%s: connection not offloaded (so %p, ep %p)\n",
 		    __func__, so, ep);
 		goto done;
 	}
 	toep = TOEPCB(so);
 
 	ep->hwtid = toep->tid;
 	ep->snd_seq = tp->snd_nxt;
 	ep->rcv_seq = tp->rcv_nxt;
 	ep->emss = max(tp->t_maxseg, 128);
 done:
 	INP_WUNLOCK(inp);
 	return (rc);
 
 }
 
-static struct rtentry *
+static int
 find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
-		__be16 peer_port, u8 tos)
+		__be16 peer_port, u8 tos, struct nhop4_extended *pnh4)
 {
-	struct route iproute;
-	struct sockaddr_in *dst = (struct sockaddr_in *)&iproute.ro_dst;
+	struct in_addr addr;
+	int err;
 
 	CTR5(KTR_IW_CXGBE, "%s:frtB %x, %x, %d, %d", __func__, local_ip,
 	    peer_ip, ntohs(local_port), ntohs(peer_port));
-	bzero(&iproute, sizeof iproute);
-	dst->sin_family = AF_INET;
-	dst->sin_len = sizeof *dst;
-	dst->sin_addr.s_addr = peer_ip;
 
-	rtalloc(&iproute);
-	CTR2(KTR_IW_CXGBE, "%s:frtE %p", __func__, (uint64_t)iproute.ro_rt);
-	return iproute.ro_rt;
+	addr.s_addr = peer_ip;
+	err = fib4_lookup_nh_ext(RT_DEFAULT_FIB, addr, NHR_REF, 0, pnh4);
+
+	CTR2(KTR_IW_CXGBE, "%s:frtE %d", __func__, err);
+	return err;
 }
 
 static int
 close_socket(struct c4iw_ep_common *epc, int close)
 {
 	struct socket *so = epc->so;
 	int rc;
 
 	CTR4(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s", __func__, epc, so,
 	    states[epc->state]);
 
 	SOCK_LOCK(so);
 	soupcall_clear(so, SO_RCV);
 	SOCK_UNLOCK(so);
 
 	if (close)
                 rc = soclose(so);
         else
                 rc = soshutdown(so, SHUT_WR | SHUT_RD);
 	epc->so = NULL;
 
 	return (rc);
 }
 
 static int
 shutdown_socket(struct c4iw_ep_common *epc)
 {
 
 	CTR4(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s", __func__, epc->so, epc,
 	    states[epc->state]);
 
 	return (soshutdown(epc->so, SHUT_WR));
 }
 
 static void
 abort_socket(struct c4iw_ep *ep)
 {
 	struct sockopt sopt;
 	int rc;
 	struct linger l;
 
 	CTR4(KTR_IW_CXGBE, "%s ep %p so %p state %s", __func__, ep, ep->com.so,
 	    states[ep->com.state]);
 
 	l.l_onoff = 1;
 	l.l_linger = 0;
 
 	/* linger_time of 0 forces RST to be sent */
 	sopt.sopt_dir = SOPT_SET;
 	sopt.sopt_level = SOL_SOCKET;
 	sopt.sopt_name = SO_LINGER;
 	sopt.sopt_val = (caddr_t)&l;
 	sopt.sopt_valsize = sizeof l;
 	sopt.sopt_td = NULL;
 	rc = sosetopt(ep->com.so, &sopt);
 	if (rc) {
 		log(LOG_ERR, "%s: can't set linger to 0, no RST! err %d\n",
 		    __func__, rc);
 	}
 }
 
 static void
 process_peer_close(struct c4iw_ep *ep)
 {
 	struct c4iw_qp_attributes attrs;
 	int disconnect = 1;
 	int release = 0;
 
 	CTR4(KTR_IW_CXGBE, "%s:ppcB ep %p so %p state %s", __func__, ep,
 	    ep->com.so, states[ep->com.state]);
 
 	mutex_lock(&ep->com.mutex);
 	switch (ep->com.state) {
 
 		case MPA_REQ_WAIT:
 			CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT CLOSING",
 			    __func__, ep);
 			__state_set(&ep->com, CLOSING);
 			break;
 
 		case MPA_REQ_SENT:
 			CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT CLOSING",
 			    __func__, ep);
 			__state_set(&ep->com, DEAD);
 			connect_reply_upcall(ep, -ECONNABORTED);
 
 			disconnect = 0;
 			STOP_EP_TIMER(ep);
 			close_socket(&ep->com, 0);
 			ep->com.cm_id->rem_ref(ep->com.cm_id);
 			ep->com.cm_id = NULL;
 			ep->com.qp = NULL;
 			release = 1;
 			break;
 
 		case MPA_REQ_RCVD:
 
 			/*
 			 * We're gonna mark this puppy DEAD, but keep
 			 * the reference on it until the ULP accepts or
 			 * rejects the CR.
 			 */
 			CTR2(KTR_IW_CXGBE, "%s:ppc3 %p MPA_REQ_RCVD CLOSING",
 			    __func__, ep);
 			__state_set(&ep->com, CLOSING);
 			c4iw_get_ep(&ep->com);
 			break;
 
 		case MPA_REP_SENT:
 			CTR2(KTR_IW_CXGBE, "%s:ppc4 %p MPA_REP_SENT CLOSING",
 			    __func__, ep);
 			__state_set(&ep->com, CLOSING);
 			break;
 
 		case FPDU_MODE:
 			CTR2(KTR_IW_CXGBE, "%s:ppc5 %p FPDU_MODE CLOSING",
 			    __func__, ep);
 			START_EP_TIMER(ep);
 			__state_set(&ep->com, CLOSING);
 			attrs.next_state = C4IW_QP_STATE_CLOSING;
 			c4iw_modify_qp(ep->com.dev, ep->com.qp,
 					C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
 			peer_close_upcall(ep);
 			break;
 
 		case ABORTING:
 			CTR2(KTR_IW_CXGBE, "%s:ppc6 %p ABORTING (disconn)",
 			    __func__, ep);
 			disconnect = 0;
 			break;
 
 		case CLOSING:
 			CTR2(KTR_IW_CXGBE, "%s:ppc7 %p CLOSING MORIBUND",
 			    __func__, ep);
 			__state_set(&ep->com, MORIBUND);
 			disconnect = 0;
 			break;
 
 		case MORIBUND:
 			CTR2(KTR_IW_CXGBE, "%s:ppc8 %p MORIBUND DEAD", __func__,
 			    ep);
 			STOP_EP_TIMER(ep);
 			if (ep->com.cm_id && ep->com.qp) {
 				attrs.next_state = C4IW_QP_STATE_IDLE;
 				c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
 						C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
 			}
 			close_socket(&ep->com, 0);
 			close_complete_upcall(ep, 0);
 			__state_set(&ep->com, DEAD);
 			release = 1;
 			disconnect = 0;
 			break;
 
 		case DEAD:
 			CTR2(KTR_IW_CXGBE, "%s:ppc9 %p DEAD (disconn)",
 			    __func__, ep);
 			disconnect = 0;
 			break;
 
 		default:
 			panic("%s: ep %p state %d", __func__, ep,
 			    ep->com.state);
 			break;
 	}
 
 	mutex_unlock(&ep->com.mutex);
 
 	if (disconnect) {
 
 		CTR2(KTR_IW_CXGBE, "%s:ppca %p", __func__, ep);
 		c4iw_ep_disconnect(ep, 0, M_NOWAIT);
 	}
 	if (release) {
 
 		CTR2(KTR_IW_CXGBE, "%s:ppcb %p", __func__, ep);
 		c4iw_put_ep(&ep->com);
 	}
 	CTR2(KTR_IW_CXGBE, "%s:ppcE %p", __func__, ep);
 	return;
 }
 
 static void
 process_conn_error(struct c4iw_ep *ep)
 {
 	struct c4iw_qp_attributes attrs;
 	int ret;
 	int state;
 
 	state = state_read(&ep->com);
 	CTR5(KTR_IW_CXGBE, "%s:pceB ep %p so %p so->so_error %u state %s",
 	    __func__, ep, ep->com.so, ep->com.so->so_error,
 	    states[ep->com.state]);
 
 	switch (state) {
 
 		case MPA_REQ_WAIT:
 			STOP_EP_TIMER(ep);
 			break;
 
 		case MPA_REQ_SENT:
 			STOP_EP_TIMER(ep);
 			connect_reply_upcall(ep, -ECONNRESET);
 			break;
 
 		case MPA_REP_SENT:
 			ep->com.rpl_err = ECONNRESET;
 			CTR1(KTR_IW_CXGBE, "waking up ep %p", ep);
 			break;
 
 		case MPA_REQ_RCVD:
 
 			/*
 			 * We're gonna mark this puppy DEAD, but keep
 			 * the reference on it until the ULP accepts or
 			 * rejects the CR.
 			 */
 			c4iw_get_ep(&ep->com);
 			break;
 
 		case MORIBUND:
 		case CLOSING:
 			STOP_EP_TIMER(ep);
 			/*FALLTHROUGH*/
 		case FPDU_MODE:
 
 			if (ep->com.cm_id && ep->com.qp) {
 
 				attrs.next_state = C4IW_QP_STATE_ERROR;
 				ret = c4iw_modify_qp(ep->com.qp->rhp,
 					ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
 					&attrs, 1);
 				if (ret)
 					log(LOG_ERR,
 							"%s - qp <- error failed!\n",
 							__func__);
 			}
 			peer_abort_upcall(ep);
 			break;
 
 		case ABORTING:
 			break;
 
 		case DEAD:
 			CTR2(KTR_IW_CXGBE, "%s so_error %d IN DEAD STATE!!!!",
 			    __func__, ep->com.so->so_error);
 			return;
 
 		default:
 			panic("%s: ep %p state %d", __func__, ep, state);
 			break;
 	}
 
 	if (state != ABORTING) {
 
 		CTR2(KTR_IW_CXGBE, "%s:pce1 %p", __func__, ep);
 		close_socket(&ep->com, 0);
 		state_set(&ep->com, DEAD);
 		c4iw_put_ep(&ep->com);
 	}
 	CTR2(KTR_IW_CXGBE, "%s:pceE %p", __func__, ep);
 	return;
 }
 
 static void
 process_close_complete(struct c4iw_ep *ep)
 {
 	struct c4iw_qp_attributes attrs;
 	int release = 0;
 
 	CTR4(KTR_IW_CXGBE, "%s:pccB ep %p so %p state %s", __func__, ep,
 	    ep->com.so, states[ep->com.state]);
 
 	/* The cm_id may be null if we failed to connect */
 	mutex_lock(&ep->com.mutex);
 
 	switch (ep->com.state) {
 
 		case CLOSING:
 			CTR2(KTR_IW_CXGBE, "%s:pcc1 %p CLOSING MORIBUND",
 			    __func__, ep);
 			__state_set(&ep->com, MORIBUND);
 			break;
 
 		case MORIBUND:
 			CTR2(KTR_IW_CXGBE, "%s:pcc1 %p MORIBUND DEAD", __func__,
 			    ep);
 			STOP_EP_TIMER(ep);
 
 			if ((ep->com.cm_id) && (ep->com.qp)) {
 
 				CTR2(KTR_IW_CXGBE, "%s:pcc2 %p QP_STATE_IDLE",
 				    __func__, ep);
 				attrs.next_state = C4IW_QP_STATE_IDLE;
 				c4iw_modify_qp(ep->com.dev,
 						ep->com.qp,
 						C4IW_QP_ATTR_NEXT_STATE,
 						&attrs, 1);
 			}
 
 			if (ep->parent_ep) {
 
 				CTR2(KTR_IW_CXGBE, "%s:pcc3 %p", __func__, ep);
 				close_socket(&ep->com, 1);
 			}
 			else {
 
 				CTR2(KTR_IW_CXGBE, "%s:pcc4 %p", __func__, ep);
 				close_socket(&ep->com, 0);
 			}
 			close_complete_upcall(ep, 0);
 			__state_set(&ep->com, DEAD);
 			release = 1;
 			break;
 
 		case ABORTING:
 			CTR2(KTR_IW_CXGBE, "%s:pcc5 %p ABORTING", __func__, ep);
 			break;
 
 		case DEAD:
 		default:
 			CTR2(KTR_IW_CXGBE, "%s:pcc6 %p DEAD", __func__, ep);
 			panic("%s:pcc6 %p DEAD", __func__, ep);
 			break;
 	}
 	mutex_unlock(&ep->com.mutex);
 
 	if (release) {
 
 		CTR2(KTR_IW_CXGBE, "%s:pcc7 %p", __func__, ep);
 		c4iw_put_ep(&ep->com);
 	}
 	CTR2(KTR_IW_CXGBE, "%s:pccE %p", __func__, ep);
 	return;
 }
 
 static void
 init_sock(struct c4iw_ep_common *epc)
 {
 	int rc;
 	struct sockopt sopt;
 	struct socket *so = epc->so;
 	int on = 1;
 
 	SOCK_LOCK(so);
 	soupcall_set(so, SO_RCV, c4iw_so_upcall, epc);
 	so->so_state |= SS_NBIO;
 	SOCK_UNLOCK(so);
 	sopt.sopt_dir = SOPT_SET;
 	sopt.sopt_level = IPPROTO_TCP;
 	sopt.sopt_name = TCP_NODELAY;
 	sopt.sopt_val = (caddr_t)&on;
 	sopt.sopt_valsize = sizeof on;
 	sopt.sopt_td = NULL;
 	rc = sosetopt(so, &sopt);
 	if (rc) {
 		log(LOG_ERR, "%s: can't set TCP_NODELAY on so %p (%d)\n",
 		    __func__, so, rc);
 	}
 }
 
 static void
 process_data(struct c4iw_ep *ep)
 {
 	struct sockaddr_in *local, *remote;
 
 	CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s, sbused %d", __func__,
 	    ep->com.so, ep, states[ep->com.state], sbused(&ep->com.so->so_rcv));
 
 	switch (state_read(&ep->com)) {
 	case MPA_REQ_SENT:
 		process_mpa_reply(ep);
 		break;
 	case MPA_REQ_WAIT:
 		in_getsockaddr(ep->com.so, (struct sockaddr **)&local);
 		in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote);
 		ep->com.local_addr = *local;
 		ep->com.remote_addr = *remote;
 		free(local, M_SONAME);
 		free(remote, M_SONAME);
 		process_mpa_request(ep);
 		break;
 	default:
 		if (sbused(&ep->com.so->so_rcv))
 			log(LOG_ERR, "%s: Unexpected streaming data. ep %p, "
 			    "state %d, so %p, so_state 0x%x, sbused %u\n",
 			    __func__, ep, state_read(&ep->com), ep->com.so,
 			    ep->com.so->so_state, sbused(&ep->com.so->so_rcv));
 		break;
 	}
 }
 
 static void
 process_connected(struct c4iw_ep *ep)
 {
 
 	if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error)
 		send_mpa_req(ep);
 	else {
 		connect_reply_upcall(ep, -ep->com.so->so_error);
 		close_socket(&ep->com, 0);
 		state_set(&ep->com, DEAD);
 		c4iw_put_ep(&ep->com);
 	}
 }
 
 static struct socket *
 dequeue_socket(struct socket *head, struct sockaddr_in **remote,
     struct c4iw_ep *child_ep)
 {
 	struct socket *so;
 
 	ACCEPT_LOCK();
 	so = TAILQ_FIRST(&head->so_comp);
 	if (!so) {
 		ACCEPT_UNLOCK();
 		return (NULL);
 	}
 	TAILQ_REMOVE(&head->so_comp, so, so_list);
 	head->so_qlen--;
 	SOCK_LOCK(so);
 	so->so_qstate &= ~SQ_COMP;
 	so->so_head = NULL;
 	soref(so);
 	soupcall_set(so, SO_RCV, c4iw_so_upcall, child_ep);
 	so->so_state |= SS_NBIO;
 	SOCK_UNLOCK(so);
 	ACCEPT_UNLOCK();
 	soaccept(so, (struct sockaddr **)remote);
 
 	return (so);
 }
 
 static void
 process_newconn(struct c4iw_ep *parent_ep)
 {
 	struct socket *child_so;
 	struct c4iw_ep *child_ep;
 	struct sockaddr_in *remote;
 
 	child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT);
 	if (!child_ep) {
 		CTR3(KTR_IW_CXGBE, "%s: parent so %p, parent ep %p, ENOMEM",
 		    __func__, parent_ep->com.so, parent_ep);
 		log(LOG_ERR, "%s: failed to allocate ep entry\n", __func__);
 		return;
 	}
 
 	child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep);
 	if (!child_so) {
 		CTR4(KTR_IW_CXGBE,
 		    "%s: parent so %p, parent ep %p, child ep %p, dequeue err",
 		    __func__, parent_ep->com.so, parent_ep, child_ep);
 		log(LOG_ERR, "%s: failed to dequeue child socket\n", __func__);
 		__free_ep(&child_ep->com);
 		return;
 
 	}
 
 	CTR5(KTR_IW_CXGBE,
 	    "%s: parent so %p, parent ep %p, child so %p, child ep %p",
 	     __func__, parent_ep->com.so, parent_ep, child_so, child_ep);
 
 	child_ep->com.local_addr = parent_ep->com.local_addr;
 	child_ep->com.remote_addr = *remote;
 	child_ep->com.dev = parent_ep->com.dev;
 	child_ep->com.so = child_so;
 	child_ep->com.cm_id = NULL;
 	child_ep->com.thread = parent_ep->com.thread;
 	child_ep->parent_ep = parent_ep;
 
 	free(remote, M_SONAME);
 	c4iw_get_ep(&parent_ep->com);
 	child_ep->parent_ep = parent_ep;
 	init_timer(&child_ep->timer);
 	state_set(&child_ep->com, MPA_REQ_WAIT);
 	START_EP_TIMER(child_ep);
 
 	/* maybe the request has already been queued up on the socket... */
 	process_mpa_request(child_ep);
 }
 
 static int
 c4iw_so_upcall(struct socket *so, void *arg, int waitflag)
 {
 	struct c4iw_ep *ep = arg;
 
 	spin_lock(&req_lock);
 
 	CTR6(KTR_IW_CXGBE,
 	    "%s: so %p, so_state 0x%x, ep %p, ep_state %s, tqe_prev %p",
 	    __func__, so, so->so_state, ep, states[ep->com.state],
 	    ep->com.entry.tqe_prev);
 
 	if (ep && ep->com.so && !ep->com.entry.tqe_prev) {
 		KASSERT(ep->com.so == so, ("%s: XXX review.", __func__));
 		c4iw_get_ep(&ep->com);
 		TAILQ_INSERT_TAIL(&req_list, &ep->com, entry);
 		queue_work(c4iw_taskq, &c4iw_task);
 	}
 
 	spin_unlock(&req_lock);
 	return (SU_OK);
 }
 
 static void
 process_socket_event(struct c4iw_ep *ep)
 {
 	int state = state_read(&ep->com);
 	struct socket *so = ep->com.so;
 
 	CTR6(KTR_IW_CXGBE, "process_socket_event: so %p, so_state 0x%x, "
 	    "so_err %d, sb_state 0x%x, ep %p, ep_state %s", so, so->so_state,
 	    so->so_error, so->so_rcv.sb_state, ep, states[state]);
 
 	if (state == CONNECTING) {
 		process_connected(ep);
 		return;
 	}
 
 	if (state == LISTEN) {
 		process_newconn(ep);
 		return;
 	}
 
 	/* connection error */
 	if (so->so_error) {
 		process_conn_error(ep);
 		return;
 	}
 
 	/* peer close */
 	if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state < CLOSING) {
 		process_peer_close(ep);
 		return;
 	}
 
 	/* close complete */
 	if (so->so_state & SS_ISDISCONNECTED) {
 		process_close_complete(ep);
 		return;
 	}
 
 	/* rx data */
 	process_data(ep);
 }
 
 SYSCTL_NODE(_hw, OID_AUTO, iw_cxgbe, CTLFLAG_RD, 0, "iw_cxgbe driver parameters");
 
 int db_delay_usecs = 1;
 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, db_delay_usecs, CTLFLAG_RWTUN, &db_delay_usecs, 0,
 		"Usecs to delay awaiting db fifo to drain");
 
 static int dack_mode = 1;
 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, dack_mode, CTLFLAG_RWTUN, &dack_mode, 0,
 		"Delayed ack mode (default = 1)");
 
 int c4iw_max_read_depth = 8;
 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_max_read_depth, CTLFLAG_RWTUN, &c4iw_max_read_depth, 0,
 		"Per-connection max ORD/IRD (default = 8)");
 
 static int enable_tcp_timestamps;
 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_timestamps, CTLFLAG_RWTUN, &enable_tcp_timestamps, 0,
 		"Enable tcp timestamps (default = 0)");
 
 static int enable_tcp_sack;
 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_sack, CTLFLAG_RWTUN, &enable_tcp_sack, 0,
 		"Enable tcp SACK (default = 0)");
 
 static int enable_tcp_window_scaling = 1;
 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_window_scaling, CTLFLAG_RWTUN, &enable_tcp_window_scaling, 0,
 		"Enable tcp window scaling (default = 1)");
 
 int c4iw_debug = 1;
 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_debug, CTLFLAG_RWTUN, &c4iw_debug, 0,
 		"Enable debug logging (default = 0)");
 
 static int peer2peer;
 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, peer2peer, CTLFLAG_RWTUN, &peer2peer, 0,
 		"Support peer2peer ULPs (default = 0)");
 
 static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ;
 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, p2p_type, CTLFLAG_RWTUN, &p2p_type, 0,
 		"RDMAP opcode to use for the RTR message: 1 = RDMA_READ 0 = RDMA_WRITE (default 1)");
 
 static int ep_timeout_secs = 60;
 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, ep_timeout_secs, CTLFLAG_RWTUN, &ep_timeout_secs, 0,
 		"CM Endpoint operation timeout in seconds (default = 60)");
 
 static int mpa_rev = 1;
 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, mpa_rev, CTLFLAG_RWTUN, &mpa_rev, 0,
 		"MPA Revision, 0 supports amso1100, 1 is RFC5044 spec compliant, 2 is IETF MPA Peer Connect Draft compliant (default = 1)");
 
 static int markers_enabled;
 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, markers_enabled, CTLFLAG_RWTUN, &markers_enabled, 0,
 		"Enable MPA MARKERS (default(0) = disabled)");
 
 static int crc_enabled = 1;
 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, crc_enabled, CTLFLAG_RWTUN, &crc_enabled, 0,
 		"Enable MPA CRC (default(1) = enabled)");
 
 static int rcv_win = 256 * 1024;
 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, rcv_win, CTLFLAG_RWTUN, &rcv_win, 0,
 		"TCP receive window in bytes (default = 256KB)");
 
 static int snd_win = 128 * 1024;
 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, snd_win, CTLFLAG_RWTUN, &snd_win, 0,
 		"TCP send window in bytes (default = 128KB)");
 
 int db_fc_threshold = 2000;
 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, db_fc_threshold, CTLFLAG_RWTUN, &db_fc_threshold, 0,
 		"QP count/threshold that triggers automatic");
 
 static void
 start_ep_timer(struct c4iw_ep *ep)
 {
 
 	if (timer_pending(&ep->timer)) {
 		CTR2(KTR_IW_CXGBE, "%s: ep %p, already started", __func__, ep);
 		printk(KERN_ERR "%s timer already started! ep %p\n", __func__,
 		    ep);
 		return;
 	}
 	clear_bit(TIMEOUT, &ep->com.flags);
 	c4iw_get_ep(&ep->com);
 	ep->timer.expires = jiffies + ep_timeout_secs * HZ;
 	ep->timer.data = (unsigned long)ep;
 	ep->timer.function = ep_timeout;
 	add_timer(&ep->timer);
 }
 
 static void
 stop_ep_timer(struct c4iw_ep *ep)
 {
 
 	del_timer_sync(&ep->timer);
 	if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
 		c4iw_put_ep(&ep->com);
 	}
 }
 
 static enum
 c4iw_ep_state state_read(struct c4iw_ep_common *epc)
 {
 	enum c4iw_ep_state state;
 
 	mutex_lock(&epc->mutex);
 	state = epc->state;
 	mutex_unlock(&epc->mutex);
 
 	return (state);
 }
 
 static void
 __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
 {
 
 	epc->state = new;
 }
 
 static void
 state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
 {
 
 	mutex_lock(&epc->mutex);
 	__state_set(epc, new);
 	mutex_unlock(&epc->mutex);
 }
 
 static void *
 alloc_ep(int size, gfp_t gfp)
 {
 	struct c4iw_ep_common *epc;
 
 	epc = kzalloc(size, gfp);
 	if (epc == NULL)
 		return (NULL);
 
 	kref_init(&epc->kref);
 	mutex_init(&epc->mutex);
 	c4iw_init_wr_wait(&epc->wr_wait);
 
 	return (epc);
 }
 
 void
 __free_ep(struct c4iw_ep_common *epc)
 {
 	CTR2(KTR_IW_CXGBE, "%s:feB %p", __func__, epc);
 	KASSERT(!epc->so, ("%s warning ep->so %p \n", __func__, epc->so));
 	KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __func__, epc));
 	free(epc, M_DEVBUF);
 	CTR2(KTR_IW_CXGBE, "%s:feE %p", __func__, epc);
 }
 
 void _c4iw_free_ep(struct kref *kref)
 {
 	struct c4iw_ep *ep;
 	struct c4iw_ep_common *epc;
 
 	ep = container_of(kref, struct c4iw_ep, com.kref);
 	epc = &ep->com;
 	KASSERT(!epc->so, ("%s ep->so %p", __func__, epc->so));
 	KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list",
 	    __func__, epc));
 	kfree(ep);
 }
 
 static void release_ep_resources(struct c4iw_ep *ep)
 {
 	CTR2(KTR_IW_CXGBE, "%s:rerB %p", __func__, ep);
 	set_bit(RELEASE_RESOURCES, &ep->com.flags);
 	c4iw_put_ep(&ep->com);
 	CTR2(KTR_IW_CXGBE, "%s:rerE %p", __func__, ep);
 }
 
 static void
 send_mpa_req(struct c4iw_ep *ep)
 {
 	int mpalen;
 	struct mpa_message *mpa;
 	struct mpa_v2_conn_params mpa_v2_params;
 	struct mbuf *m;
 	char mpa_rev_to_use = mpa_rev;
 	int err;
 
 	if (ep->retry_with_mpa_v1)
 		mpa_rev_to_use = 1;
 	mpalen = sizeof(*mpa) + ep->plen;
 	if (mpa_rev_to_use == 2)
 		mpalen += sizeof(struct mpa_v2_conn_params);
 
 	mpa = malloc(mpalen, M_CXGBE, M_NOWAIT);
 	if (mpa == NULL) {
 failed:
 		connect_reply_upcall(ep, -ENOMEM);
 		return;
 	}
 
 	memset(mpa, 0, mpalen);
 	memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
 	mpa->flags = (crc_enabled ? MPA_CRC : 0) |
 		(markers_enabled ? MPA_MARKERS : 0) |
 		(mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0);
 	mpa->private_data_size = htons(ep->plen);
 	mpa->revision = mpa_rev_to_use;
 
 	if (mpa_rev_to_use == 1) {
 		ep->tried_with_mpa_v1 = 1;
 		ep->retry_with_mpa_v1 = 0;
 	}
 
 	if (mpa_rev_to_use == 2) {
 		mpa->private_data_size +=
 			htons(sizeof(struct mpa_v2_conn_params));
 		mpa_v2_params.ird = htons((u16)ep->ird);
 		mpa_v2_params.ord = htons((u16)ep->ord);
 
 		if (peer2peer) {
 			mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
 
 			if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) {
 				mpa_v2_params.ord |=
 				    htons(MPA_V2_RDMA_WRITE_RTR);
 			} else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) {
 				mpa_v2_params.ord |=
 					htons(MPA_V2_RDMA_READ_RTR);
 			}
 		}
 		memcpy(mpa->private_data, &mpa_v2_params,
 			sizeof(struct mpa_v2_conn_params));
 
 		if (ep->plen) {
 
 			memcpy(mpa->private_data +
 				sizeof(struct mpa_v2_conn_params),
 				ep->mpa_pkt + sizeof(*mpa), ep->plen);
 		}
 	} else {
 
 		if (ep->plen)
 			memcpy(mpa->private_data,
 					ep->mpa_pkt + sizeof(*mpa), ep->plen);
 		CTR2(KTR_IW_CXGBE, "%s:smr7 %p", __func__, ep);
 	}
 
 	m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA);
 	if (m == NULL) {
 		free(mpa, M_CXGBE);
 		goto failed;
 	}
 	m_copyback(m, 0, mpalen, (void *)mpa);
 	free(mpa, M_CXGBE);
 
 	err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT,
 	    ep->com.thread);
 	if (err)
 		goto failed;
 
 	START_EP_TIMER(ep);
 	state_set(&ep->com, MPA_REQ_SENT);
 	ep->mpa_attr.initiator = 1;
 }
 
 static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
 {
 	int mpalen ;
 	struct mpa_message *mpa;
 	struct mpa_v2_conn_params mpa_v2_params;
 	struct mbuf *m;
 	int err;
 
 	CTR4(KTR_IW_CXGBE, "%s:smrejB %p %u %d", __func__, ep, ep->hwtid,
 	    ep->plen);
 
 	mpalen = sizeof(*mpa) + plen;
 
 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
 
 		mpalen += sizeof(struct mpa_v2_conn_params);
 		CTR4(KTR_IW_CXGBE, "%s:smrej1 %p %u %d", __func__, ep,
 		    ep->mpa_attr.version, mpalen);
 	}
 
 	mpa = malloc(mpalen, M_CXGBE, M_NOWAIT);
 	if (mpa == NULL)
 		return (-ENOMEM);
 
 	memset(mpa, 0, mpalen);
 	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
 	mpa->flags = MPA_REJECT;
 	mpa->revision = mpa_rev;
 	mpa->private_data_size = htons(plen);
 
 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
 
 		mpa->flags |= MPA_ENHANCED_RDMA_CONN;
 		mpa->private_data_size +=
 			htons(sizeof(struct mpa_v2_conn_params));
 		mpa_v2_params.ird = htons(((u16)ep->ird) |
 				(peer2peer ? MPA_V2_PEER2PEER_MODEL :
 				 0));
 		mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ?
 					(p2p_type ==
 					 FW_RI_INIT_P2PTYPE_RDMA_WRITE ?
 					 MPA_V2_RDMA_WRITE_RTR : p2p_type ==
 					 FW_RI_INIT_P2PTYPE_READ_REQ ?
 					 MPA_V2_RDMA_READ_RTR : 0) : 0));
 		memcpy(mpa->private_data, &mpa_v2_params,
 				sizeof(struct mpa_v2_conn_params));
 
 		if (ep->plen)
 			memcpy(mpa->private_data +
 					sizeof(struct mpa_v2_conn_params), pdata, plen);
 		CTR5(KTR_IW_CXGBE, "%s:smrej3 %p %d %d %d", __func__, ep,
 		    mpa_v2_params.ird, mpa_v2_params.ord, ep->plen);
 	} else
 		if (plen)
 			memcpy(mpa->private_data, pdata, plen);
 
 	m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA);
 	if (m == NULL) {
 		free(mpa, M_CXGBE);
 		return (-ENOMEM);
 	}
 	m_copyback(m, 0, mpalen, (void *)mpa);
 	free(mpa, M_CXGBE);
 
 	err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread);
 	if (!err)
 		ep->snd_seq += mpalen;
 	CTR4(KTR_IW_CXGBE, "%s:smrejE %p %u %d", __func__, ep, ep->hwtid, err);
 	return err;
 }
 
 static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
 {
 	int mpalen;
 	struct mpa_message *mpa;
 	struct mbuf *m;
 	struct mpa_v2_conn_params mpa_v2_params;
 	int err;
 
 	CTR2(KTR_IW_CXGBE, "%s:smrepB %p", __func__, ep);
 
 	mpalen = sizeof(*mpa) + plen;
 
 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
 
 		CTR3(KTR_IW_CXGBE, "%s:smrep1 %p %d", __func__, ep,
 		    ep->mpa_attr.version);
 		mpalen += sizeof(struct mpa_v2_conn_params);
 	}
 
 	mpa = malloc(mpalen, M_CXGBE, M_NOWAIT);
 	if (mpa == NULL)
 		return (-ENOMEM);
 
 	memset(mpa, 0, sizeof(*mpa));
 	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
 	mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
 		(markers_enabled ? MPA_MARKERS : 0);
 	mpa->revision = ep->mpa_attr.version;
 	mpa->private_data_size = htons(plen);
 
 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
 
 		mpa->flags |= MPA_ENHANCED_RDMA_CONN;
 		mpa->private_data_size +=
 			htons(sizeof(struct mpa_v2_conn_params));
 		mpa_v2_params.ird = htons((u16)ep->ird);
 		mpa_v2_params.ord = htons((u16)ep->ord);
 		CTR5(KTR_IW_CXGBE, "%s:smrep3 %p %d %d %d", __func__, ep,
 		    ep->mpa_attr.version, mpa_v2_params.ird, mpa_v2_params.ord);
 
 		if (peer2peer && (ep->mpa_attr.p2p_type !=
 			FW_RI_INIT_P2PTYPE_DISABLED)) {
 
 			mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
 
 			if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) {
 
 				mpa_v2_params.ord |=
 					htons(MPA_V2_RDMA_WRITE_RTR);
 				CTR5(KTR_IW_CXGBE, "%s:smrep4 %p %d %d %d",
 				    __func__, ep, p2p_type, mpa_v2_params.ird,
 				    mpa_v2_params.ord);
 			}
 			else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) {
 
 				mpa_v2_params.ord |=
 					htons(MPA_V2_RDMA_READ_RTR);
 				CTR5(KTR_IW_CXGBE, "%s:smrep5 %p %d %d %d",
 				    __func__, ep, p2p_type, mpa_v2_params.ird,
 				    mpa_v2_params.ord);
 			}
 		}
 
 		memcpy(mpa->private_data, &mpa_v2_params,
 			sizeof(struct mpa_v2_conn_params));
 
 		if (ep->plen)
 			memcpy(mpa->private_data +
 				sizeof(struct mpa_v2_conn_params), pdata, plen);
 	} else
 		if (plen)
 			memcpy(mpa->private_data, pdata, plen);
 
 	m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA);
 	if (m == NULL) {
 		free(mpa, M_CXGBE);
 		return (-ENOMEM);
 	}
 	m_copyback(m, 0, mpalen, (void *)mpa);
 	free(mpa, M_CXGBE);
 
 
 	state_set(&ep->com, MPA_REP_SENT);
 	ep->snd_seq += mpalen;
 	err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT,
 			ep->com.thread);
 	CTR3(KTR_IW_CXGBE, "%s:smrepE %p %d", __func__, ep, err);
 	return err;
 }
 
 
 
 static void close_complete_upcall(struct c4iw_ep *ep, int status)
 {
 	struct iw_cm_event event;
 
 	CTR2(KTR_IW_CXGBE, "%s:ccuB %p", __func__, ep);
 	memset(&event, 0, sizeof(event));
 	event.event = IW_CM_EVENT_CLOSE;
 	event.status = status;
 
 	if (ep->com.cm_id) {
 
 		CTR2(KTR_IW_CXGBE, "%s:ccu1 %1", __func__, ep);
 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
 		ep->com.cm_id->rem_ref(ep->com.cm_id);
 		ep->com.cm_id = NULL;
 		ep->com.qp = NULL;
 		set_bit(CLOSE_UPCALL, &ep->com.history);
 	}
 	CTR2(KTR_IW_CXGBE, "%s:ccuE %p", __func__, ep);
 }
 
 static int abort_connection(struct c4iw_ep *ep)
 {
 	int err;
 
 	CTR2(KTR_IW_CXGBE, "%s:abB %p", __func__, ep);
 	state_set(&ep->com, ABORTING);
 	abort_socket(ep);
 	err = close_socket(&ep->com, 0);
 	set_bit(ABORT_CONN, &ep->com.history);
 	CTR2(KTR_IW_CXGBE, "%s:abE %p", __func__, ep);
 	return err;
 }
 
 static void peer_close_upcall(struct c4iw_ep *ep)
 {
 	struct iw_cm_event event;
 
 	CTR2(KTR_IW_CXGBE, "%s:pcuB %p", __func__, ep);
 	memset(&event, 0, sizeof(event));
 	event.event = IW_CM_EVENT_DISCONNECT;
 
 	if (ep->com.cm_id) {
 
 		CTR2(KTR_IW_CXGBE, "%s:pcu1 %p", __func__, ep);
 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
 		set_bit(DISCONN_UPCALL, &ep->com.history);
 	}
 	CTR2(KTR_IW_CXGBE, "%s:pcuE %p", __func__, ep);
 }
 
 static void peer_abort_upcall(struct c4iw_ep *ep)
 {
 	struct iw_cm_event event;
 
 	CTR2(KTR_IW_CXGBE, "%s:pauB %p", __func__, ep);
 	memset(&event, 0, sizeof(event));
 	event.event = IW_CM_EVENT_CLOSE;
 	event.status = -ECONNRESET;
 
 	if (ep->com.cm_id) {
 
 		CTR2(KTR_IW_CXGBE, "%s:pau1 %p", __func__, ep);
 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
 		ep->com.cm_id->rem_ref(ep->com.cm_id);
 		ep->com.cm_id = NULL;
 		ep->com.qp = NULL;
 		set_bit(ABORT_UPCALL, &ep->com.history);
 	}
 	CTR2(KTR_IW_CXGBE, "%s:pauE %p", __func__, ep);
 }
 
 static void connect_reply_upcall(struct c4iw_ep *ep, int status)
 {
 	struct iw_cm_event event;
 
 	CTR3(KTR_IW_CXGBE, "%s:cruB %p", __func__, ep, status);
 	memset(&event, 0, sizeof(event));
 	event.event = IW_CM_EVENT_CONNECT_REPLY;
 	event.status = (status ==-ECONNABORTED)?-ECONNRESET: status;
 	event.local_addr = ep->com.local_addr;
 	event.remote_addr = ep->com.remote_addr;
 
 	if ((status == 0) || (status == -ECONNREFUSED)) {
 
 		if (!ep->tried_with_mpa_v1) {
 
 			CTR2(KTR_IW_CXGBE, "%s:cru1 %p", __func__, ep);
 			/* this means MPA_v2 is used */
 			event.private_data_len = ep->plen -
 				sizeof(struct mpa_v2_conn_params);
 			event.private_data = ep->mpa_pkt +
 				sizeof(struct mpa_message) +
 				sizeof(struct mpa_v2_conn_params);
 		} else {
 
 			CTR2(KTR_IW_CXGBE, "%s:cru2 %p", __func__, ep);
 			/* this means MPA_v1 is used */
 			event.private_data_len = ep->plen;
 			event.private_data = ep->mpa_pkt +
 				sizeof(struct mpa_message);
 		}
 	}
 
 	if (ep->com.cm_id) {
 
 		CTR2(KTR_IW_CXGBE, "%s:cru3 %p", __func__, ep);
 		set_bit(CONN_RPL_UPCALL, &ep->com.history);
 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
 	}
 
 	if(status == -ECONNABORTED) {
 
 		CTR3(KTR_IW_CXGBE, "%s:cruE %p %d", __func__, ep, status);
 		return;
 	}
 
 	if (status < 0) {
 
 		CTR3(KTR_IW_CXGBE, "%s:cru4 %p %d", __func__, ep, status);
 		ep->com.cm_id->rem_ref(ep->com.cm_id);
 		ep->com.cm_id = NULL;
 		ep->com.qp = NULL;
 	}
 
 	CTR2(KTR_IW_CXGBE, "%s:cruE %p", __func__, ep);
 }
 
 static int connect_request_upcall(struct c4iw_ep *ep)
 {
 	struct iw_cm_event event;
 	int ret;
 
 	CTR3(KTR_IW_CXGBE, "%s: ep %p, mpa_v1 %d", __func__, ep,
 	    ep->tried_with_mpa_v1);
 
 	memset(&event, 0, sizeof(event));
 	event.event = IW_CM_EVENT_CONNECT_REQUEST;
 	event.local_addr = ep->com.local_addr;
 	event.remote_addr = ep->com.remote_addr;
 	event.provider_data = ep;
 	event.so = ep->com.so;
 
 	if (!ep->tried_with_mpa_v1) {
 		/* this means MPA_v2 is used */
 		event.ord = ep->ord;
 		event.ird = ep->ird;
 		event.private_data_len = ep->plen -
 			sizeof(struct mpa_v2_conn_params);
 		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) +
 			sizeof(struct mpa_v2_conn_params);
 	} else {
 
 		/* this means MPA_v1 is used. Send max supported */
 		event.ord = c4iw_max_read_depth;
 		event.ird = c4iw_max_read_depth;
 		event.private_data_len = ep->plen;
 		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
 	}
 
 	c4iw_get_ep(&ep->com);
 	ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id,
 	    &event);
 	if(ret)
 		c4iw_put_ep(&ep->com);
 
 	set_bit(CONNREQ_UPCALL, &ep->com.history);
 	c4iw_put_ep(&ep->parent_ep->com);
 	return ret;
 }
 
 static void established_upcall(struct c4iw_ep *ep)
 {
 	struct iw_cm_event event;
 
 	CTR2(KTR_IW_CXGBE, "%s:euB %p", __func__, ep);
 	memset(&event, 0, sizeof(event));
 	event.event = IW_CM_EVENT_ESTABLISHED;
 	event.ird = ep->ird;
 	event.ord = ep->ord;
 
 	if (ep->com.cm_id) {
 
 		CTR2(KTR_IW_CXGBE, "%s:eu1 %p", __func__, ep);
 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
 		set_bit(ESTAB_UPCALL, &ep->com.history);
 	}
 	CTR2(KTR_IW_CXGBE, "%s:euE %p", __func__, ep);
 }
 
 
 
 static void process_mpa_reply(struct c4iw_ep *ep)
 {
 	struct mpa_message *mpa;
 	struct mpa_v2_conn_params *mpa_v2_params;
 	u16 plen;
 	u16 resp_ird, resp_ord;
 	u8 rtr_mismatch = 0, insuff_ird = 0;
 	struct c4iw_qp_attributes attrs;
 	enum c4iw_qp_attr_mask mask;
 	int err;
 	struct mbuf *top, *m;
 	int flags = MSG_DONTWAIT;
 	struct uio uio;
 
 	CTR2(KTR_IW_CXGBE, "%s:pmrB %p", __func__, ep);
 
 	/*
 	 * Stop mpa timer.  If it expired, then the state has
 	 * changed and we bail since ep_timeout already aborted
 	 * the connection.
 	 */
 	STOP_EP_TIMER(ep);
 	if (state_read(&ep->com) != MPA_REQ_SENT)
 		return;
 
 	uio.uio_resid = 1000000;
 	uio.uio_td = ep->com.thread;
 	err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags);
 
 	if (err) {
 
 		if (err == EWOULDBLOCK) {
 
 			CTR2(KTR_IW_CXGBE, "%s:pmr1 %p", __func__, ep);
 			START_EP_TIMER(ep);
 			return;
 		}
 		err = -err;
 		CTR2(KTR_IW_CXGBE, "%s:pmr2 %p", __func__, ep);
 		goto err;
 	}
 
 	if (ep->com.so->so_rcv.sb_mb) {
 
 		CTR2(KTR_IW_CXGBE, "%s:pmr3 %p", __func__, ep);
 		printf("%s data after soreceive called! so %p sb_mb %p top %p\n",
 		       __func__, ep->com.so, ep->com.so->so_rcv.sb_mb, top);
 	}
 
 	m = top;
 
 	do {
 
 		CTR2(KTR_IW_CXGBE, "%s:pmr4 %p", __func__, ep);
 		/*
 		 * If we get more than the supported amount of private data
 		 * then we must fail this connection.
 		 */
 		if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) {
 
 			CTR3(KTR_IW_CXGBE, "%s:pmr5 %p %d", __func__, ep,
 			    ep->mpa_pkt_len + m->m_len);
 			err = (-EINVAL);
 			goto err;
 		}
 
 		/*
 		 * copy the new data into our accumulation buffer.
 		 */
 		m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len]));
 		ep->mpa_pkt_len += m->m_len;
 		if (!m->m_next)
 			m = m->m_nextpkt;
 		else
 			m = m->m_next;
 	} while (m);
 
 	m_freem(top);
 	/*
 	 * if we don't even have the mpa message, then bail.
 	 */
 	if (ep->mpa_pkt_len < sizeof(*mpa))
 		return;
 	mpa = (struct mpa_message *) ep->mpa_pkt;
 
 	/* Validate MPA header. */
 	if (mpa->revision > mpa_rev) {
 
 		CTR4(KTR_IW_CXGBE, "%s:pmr6 %p %d %d", __func__, ep,
 		    mpa->revision, mpa_rev);
 		printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d, "
 				" Received = %d\n", __func__, mpa_rev, mpa->revision);
 		err = -EPROTO;
 		goto err;
 	}
 
 	if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
 
 		CTR2(KTR_IW_CXGBE, "%s:pmr7 %p", __func__, ep);
 		err = -EPROTO;
 		goto err;
 	}
 
 	plen = ntohs(mpa->private_data_size);
 
 	/*
 	 * Fail if there's too much private data.
 	 */
 	if (plen > MPA_MAX_PRIVATE_DATA) {
 
 		CTR2(KTR_IW_CXGBE, "%s:pmr8 %p", __func__, ep);
 		err = -EPROTO;
 		goto err;
 	}
 
 	/*
 	 * If plen does not account for pkt size
 	 */
 	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
 
 		CTR2(KTR_IW_CXGBE, "%s:pmr9 %p", __func__, ep);
 		err = -EPROTO;
 		goto err;
 	}
 
 	ep->plen = (u8) plen;
 
 	/*
 	 * If we don't have all the pdata yet, then bail.
 	 * We'll continue process when more data arrives.
 	 */
 	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) {
 
 		CTR2(KTR_IW_CXGBE, "%s:pmra %p", __func__, ep);
 		return;
 	}
 
 	if (mpa->flags & MPA_REJECT) {
 
 		CTR2(KTR_IW_CXGBE, "%s:pmrb %p", __func__, ep);
 		err = -ECONNREFUSED;
 		goto err;
 	}
 
 	/*
 	 * If we get here we have accumulated the entire mpa
 	 * start reply message including private data. And
 	 * the MPA header is valid.
 	 */
 	state_set(&ep->com, FPDU_MODE);
 	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
 	ep->mpa_attr.recv_marker_enabled = markers_enabled;
 	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
 	ep->mpa_attr.version = mpa->revision;
 	ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
 
 	if (mpa->revision == 2) {
 
 		CTR2(KTR_IW_CXGBE, "%s:pmrc %p", __func__, ep);
 		ep->mpa_attr.enhanced_rdma_conn =
 			mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
 
 		if (ep->mpa_attr.enhanced_rdma_conn) {
 
 			CTR2(KTR_IW_CXGBE, "%s:pmrd %p", __func__, ep);
 			mpa_v2_params = (struct mpa_v2_conn_params *)
 				(ep->mpa_pkt + sizeof(*mpa));
 			resp_ird = ntohs(mpa_v2_params->ird) &
 				MPA_V2_IRD_ORD_MASK;
 			resp_ord = ntohs(mpa_v2_params->ord) &
 				MPA_V2_IRD_ORD_MASK;
 
 			/*
 			 * This is a double-check. Ideally, below checks are
 			 * not required since ird/ord stuff has been taken
 			 * care of in c4iw_accept_cr
 			 */
 			if ((ep->ird < resp_ord) || (ep->ord > resp_ird)) {
 
 				CTR2(KTR_IW_CXGBE, "%s:pmre %p", __func__, ep);
 				err = -ENOMEM;
 				ep->ird = resp_ord;
 				ep->ord = resp_ird;
 				insuff_ird = 1;
 			}
 
 			if (ntohs(mpa_v2_params->ird) &
 				MPA_V2_PEER2PEER_MODEL) {
 
 				CTR2(KTR_IW_CXGBE, "%s:pmrf %p", __func__, ep);
 				if (ntohs(mpa_v2_params->ord) &
 					MPA_V2_RDMA_WRITE_RTR) {
 
 					CTR2(KTR_IW_CXGBE, "%s:pmrg %p", __func__, ep);
 					ep->mpa_attr.p2p_type =
 						FW_RI_INIT_P2PTYPE_RDMA_WRITE;
 				}
 				else if (ntohs(mpa_v2_params->ord) &
 					MPA_V2_RDMA_READ_RTR) {
 
 					CTR2(KTR_IW_CXGBE, "%s:pmrh %p", __func__, ep);
 					ep->mpa_attr.p2p_type =
 						FW_RI_INIT_P2PTYPE_READ_REQ;
 				}
 			}
 		}
 	} else {
 
 		CTR2(KTR_IW_CXGBE, "%s:pmri %p", __func__, ep);
 
 		if (mpa->revision == 1) {
 
 			CTR2(KTR_IW_CXGBE, "%s:pmrj %p", __func__, ep);
 
 			if (peer2peer) {
 
 				CTR2(KTR_IW_CXGBE, "%s:pmrk %p", __func__, ep);
 				ep->mpa_attr.p2p_type = p2p_type;
 			}
 		}
 	}
 
 	if (set_tcpinfo(ep)) {
 
 		CTR2(KTR_IW_CXGBE, "%s:pmrl %p", __func__, ep);
 		printf("%s set_tcpinfo error\n", __func__);
 		goto err;
 	}
 
 	CTR6(KTR_IW_CXGBE, "%s - crc_enabled = %d, recv_marker_enabled = %d, "
 	    "xmit_marker_enabled = %d, version = %d p2p_type = %d", __func__,
 	    ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
 	    ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
 	    ep->mpa_attr.p2p_type);
 
 	/*
 	 * If responder's RTR does not match with that of initiator, assign
 	 * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not
 	 * generated when moving QP to RTS state.
 	 * A TERM message will be sent after QP has moved to RTS state
 	 */
 	if ((ep->mpa_attr.version == 2) && peer2peer &&
 		(ep->mpa_attr.p2p_type != p2p_type)) {
 
 		CTR2(KTR_IW_CXGBE, "%s:pmrm %p", __func__, ep);
 		ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
 		rtr_mismatch = 1;
 	}
 
 
 	//ep->ofld_txq = TOEPCB(ep->com.so)->ofld_txq;
 	attrs.mpa_attr = ep->mpa_attr;
 	attrs.max_ird = ep->ird;
 	attrs.max_ord = ep->ord;
 	attrs.llp_stream_handle = ep;
 	attrs.next_state = C4IW_QP_STATE_RTS;
 
 	mask = C4IW_QP_ATTR_NEXT_STATE |
 		C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR |
 		C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD;
 
 	/* bind QP and TID with INIT_WR */
 	err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1);
 
 	if (err) {
 
 		CTR2(KTR_IW_CXGBE, "%s:pmrn %p", __func__, ep);
 		goto err;
 	}
 
 	/*
 	 * If responder's RTR requirement did not match with what initiator
 	 * supports, generate TERM message
 	 */
 	if (rtr_mismatch) {
 
 		CTR2(KTR_IW_CXGBE, "%s:pmro %p", __func__, ep);
 		printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__);
 		attrs.layer_etype = LAYER_MPA | DDP_LLP;
 		attrs.ecode = MPA_NOMATCH_RTR;
 		attrs.next_state = C4IW_QP_STATE_TERMINATE;
 		err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
 			C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
 		err = -ENOMEM;
 		goto out;
 	}
 
 	/*
 	 * Generate TERM if initiator IRD is not sufficient for responder
 	 * provided ORD. Currently, we do the same behaviour even when
 	 * responder provided IRD is also not sufficient as regards to
 	 * initiator ORD.
 	 */
 	if (insuff_ird) {
 
 		CTR2(KTR_IW_CXGBE, "%s:pmrp %p", __func__, ep);
 		printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n",
 				__func__);
 		attrs.layer_etype = LAYER_MPA | DDP_LLP;
 		attrs.ecode = MPA_INSUFF_IRD;
 		attrs.next_state = C4IW_QP_STATE_TERMINATE;
 		err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
 			C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
 		err = -ENOMEM;
 		goto out;
 	}
 	goto out;
 err:
 	state_set(&ep->com, ABORTING);
 	abort_connection(ep);
 out:
 	connect_reply_upcall(ep, err);
 	CTR2(KTR_IW_CXGBE, "%s:pmrE %p", __func__, ep);
 	return;
 }
 
 static void
 process_mpa_request(struct c4iw_ep *ep)
 {
 	struct mpa_message *mpa;
 	u16 plen;
 	int flags = MSG_DONTWAIT;
 	int rc;
 	struct iovec iov;
 	struct uio uio;
 	enum c4iw_ep_state state = state_read(&ep->com);
 
 	CTR3(KTR_IW_CXGBE, "%s: ep %p, state %s", __func__, ep, states[state]);
 
 	if (state != MPA_REQ_WAIT)
 		return;
 
 	iov.iov_base = &ep->mpa_pkt[ep->mpa_pkt_len];
 	iov.iov_len = sizeof(ep->mpa_pkt) - ep->mpa_pkt_len;
 	uio.uio_iov = &iov;
 	uio.uio_iovcnt = 1;
 	uio.uio_offset = 0;
 	uio.uio_resid = sizeof(ep->mpa_pkt) - ep->mpa_pkt_len;
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_rw = UIO_READ;
 	uio.uio_td = NULL; /* uio.uio_td = ep->com.thread; */
 
 	rc = soreceive(ep->com.so, NULL, &uio, NULL, NULL, &flags);
 	if (rc == EAGAIN)
 		return;
 	else if (rc) {
 abort:
 		STOP_EP_TIMER(ep);
 		abort_connection(ep);
 		return;
 	}
 	KASSERT(uio.uio_offset > 0, ("%s: sorecieve on so %p read no data",
 	    __func__, ep->com.so));
 	ep->mpa_pkt_len += uio.uio_offset;
 
 	/*
 	 * If we get more than the supported amount of private data then we must
 	 * fail this connection.  XXX: check so_rcv->sb_cc, or peek with another
 	 * soreceive, or increase the size of mpa_pkt by 1 and abort if the last
 	 * byte is filled by the soreceive above.
 	 */
 
 	/* Don't even have the MPA message.  Wait for more data to arrive. */
 	if (ep->mpa_pkt_len < sizeof(*mpa))
 		return;
 	mpa = (struct mpa_message *) ep->mpa_pkt;
 
 	/*
 	 * Validate MPA Header.
 	 */
 	if (mpa->revision > mpa_rev) {
 		log(LOG_ERR, "%s: MPA version mismatch. Local = %d,"
 		    " Received = %d\n", __func__, mpa_rev, mpa->revision);
 		goto abort;
 	}
 
 	if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)))
 		goto abort;
 
 	/*
 	 * Fail if there's too much private data.
 	 */
 	plen = ntohs(mpa->private_data_size);
 	if (plen > MPA_MAX_PRIVATE_DATA)
 		goto abort;
 
 	/*
 	 * If plen does not account for pkt size
 	 */
 	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen))
 		goto abort;
 
 	ep->plen = (u8) plen;
 
 	/*
 	 * If we don't have all the pdata yet, then bail.
 	 */
 	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
 		return;
 
 	/*
 	 * If we get here we have accumulated the entire mpa
 	 * start reply message including private data.
 	 */
 	ep->mpa_attr.initiator = 0;
 	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
 	ep->mpa_attr.recv_marker_enabled = markers_enabled;
 	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
 	ep->mpa_attr.version = mpa->revision;
 	if (mpa->revision == 1)
 		ep->tried_with_mpa_v1 = 1;
 	ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
 
 	if (mpa->revision == 2) {
 		ep->mpa_attr.enhanced_rdma_conn =
 		    mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
 		if (ep->mpa_attr.enhanced_rdma_conn) {
 			struct mpa_v2_conn_params *mpa_v2_params;
 			u16 ird, ord;
 
 			mpa_v2_params = (void *)&ep->mpa_pkt[sizeof(*mpa)];
 			ird = ntohs(mpa_v2_params->ird);
 			ord = ntohs(mpa_v2_params->ord);
 
 			ep->ird = ird & MPA_V2_IRD_ORD_MASK;
 			ep->ord = ord & MPA_V2_IRD_ORD_MASK;
 			if (ird & MPA_V2_PEER2PEER_MODEL && peer2peer) {
 				if (ord & MPA_V2_RDMA_WRITE_RTR) {
 					ep->mpa_attr.p2p_type =
 					    FW_RI_INIT_P2PTYPE_RDMA_WRITE;
 				} else if (ord & MPA_V2_RDMA_READ_RTR) {
 					ep->mpa_attr.p2p_type =
 					    FW_RI_INIT_P2PTYPE_READ_REQ;
 				}
 			}
 		}
 	} else if (mpa->revision == 1 && peer2peer)
 		ep->mpa_attr.p2p_type = p2p_type;
 
 	if (set_tcpinfo(ep))
 		goto abort;
 
 	CTR5(KTR_IW_CXGBE, "%s: crc_enabled = %d, recv_marker_enabled = %d, "
 	    "xmit_marker_enabled = %d, version = %d", __func__,
 	    ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
 	    ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
 
 	state_set(&ep->com, MPA_REQ_RCVD);
 	STOP_EP_TIMER(ep);
 
 	/* drive upcall */
 	mutex_lock(&ep->parent_ep->com.mutex);
 	if (ep->parent_ep->com.state != DEAD) {
 		if(connect_request_upcall(ep)) {
 			abort_connection(ep);
 		}
 	}else
 		abort_connection(ep);
 	mutex_unlock(&ep->parent_ep->com.mutex);
 }
 
 /*
  * Upcall from the adapter indicating data has been transmitted.
  * For us its just the single MPA request or reply.  We can now free
  * the skb holding the mpa message.
  */
 int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
 {
 	int err;
 	struct c4iw_ep *ep = to_ep(cm_id);
 	CTR2(KTR_IW_CXGBE, "%s:crcB %p", __func__, ep);
 
 	if (state_read(&ep->com) == DEAD) {
 
 		CTR2(KTR_IW_CXGBE, "%s:crc1 %p", __func__, ep);
 		c4iw_put_ep(&ep->com);
 		return -ECONNRESET;
 	}
 	set_bit(ULP_REJECT, &ep->com.history);
 	BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
 
 	if (mpa_rev == 0) {
 
 		CTR2(KTR_IW_CXGBE, "%s:crc2 %p", __func__, ep);
 		abort_connection(ep);
 	}
 	else {
 
 		CTR2(KTR_IW_CXGBE, "%s:crc3 %p", __func__, ep);
 		err = send_mpa_reject(ep, pdata, pdata_len);
 		err = soshutdown(ep->com.so, 3);
 	}
 	c4iw_put_ep(&ep->com);
 	CTR2(KTR_IW_CXGBE, "%s:crc4 %p", __func__, ep);
 	return 0;
 }
 
 int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 {
 	int err;
 	struct c4iw_qp_attributes attrs;
 	enum c4iw_qp_attr_mask mask;
 	struct c4iw_ep *ep = to_ep(cm_id);
 	struct c4iw_dev *h = to_c4iw_dev(cm_id->device);
 	struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
 
 	CTR2(KTR_IW_CXGBE, "%s:cacB %p", __func__, ep);
 
 	if (state_read(&ep->com) == DEAD) {
 
 		CTR2(KTR_IW_CXGBE, "%s:cac1 %p", __func__, ep);
 		err = -ECONNRESET;
 		goto err;
 	}
 
 	BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
 	BUG_ON(!qp);
 
 	set_bit(ULP_ACCEPT, &ep->com.history);
 
 	if ((conn_param->ord > c4iw_max_read_depth) ||
 		(conn_param->ird > c4iw_max_read_depth)) {
 
 		CTR2(KTR_IW_CXGBE, "%s:cac2 %p", __func__, ep);
 		abort_connection(ep);
 		err = -EINVAL;
 		goto err;
 	}
 
 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
 
 		CTR2(KTR_IW_CXGBE, "%s:cac3 %p", __func__, ep);
 
 		if (conn_param->ord > ep->ird) {
 
 			CTR2(KTR_IW_CXGBE, "%s:cac4 %p", __func__, ep);
 			ep->ird = conn_param->ird;
 			ep->ord = conn_param->ord;
 			send_mpa_reject(ep, conn_param->private_data,
 					conn_param->private_data_len);
 			abort_connection(ep);
 			err = -ENOMEM;
 			goto err;
 		}
 
 		if (conn_param->ird > ep->ord) {
 
 			CTR2(KTR_IW_CXGBE, "%s:cac5 %p", __func__, ep);
 
 			if (!ep->ord) {
 
 				CTR2(KTR_IW_CXGBE, "%s:cac6 %p", __func__, ep);
 				conn_param->ird = 1;
 			}
 			else {
 				CTR2(KTR_IW_CXGBE, "%s:cac7 %p", __func__, ep);
 				abort_connection(ep);
 				err = -ENOMEM;
 				goto err;
 			}
 		}
 
 	}
 	ep->ird = conn_param->ird;
 	ep->ord = conn_param->ord;
 
 	if (ep->mpa_attr.version != 2) {
 
 		CTR2(KTR_IW_CXGBE, "%s:cac8 %p", __func__, ep);
 
 		if (peer2peer && ep->ird == 0) {
 
 			CTR2(KTR_IW_CXGBE, "%s:cac9 %p", __func__, ep);
 			ep->ird = 1;
 		}
 	}
 
 
 	cm_id->add_ref(cm_id);
 	ep->com.cm_id = cm_id;
 	ep->com.qp = qp;
 	//ep->ofld_txq = TOEPCB(ep->com.so)->ofld_txq;
 
 	/* bind QP to EP and move to RTS */
 	attrs.mpa_attr = ep->mpa_attr;
 	attrs.max_ird = ep->ird;
 	attrs.max_ord = ep->ord;
 	attrs.llp_stream_handle = ep;
 	attrs.next_state = C4IW_QP_STATE_RTS;
 
 	/* bind QP and TID with INIT_WR */
 	mask = C4IW_QP_ATTR_NEXT_STATE |
 		C4IW_QP_ATTR_LLP_STREAM_HANDLE |
 		C4IW_QP_ATTR_MPA_ATTR |
 		C4IW_QP_ATTR_MAX_IRD |
 		C4IW_QP_ATTR_MAX_ORD;
 
 	err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1);
 
 	if (err) {
 
 		CTR2(KTR_IW_CXGBE, "%s:caca %p", __func__, ep);
 		goto err1;
 	}
 	err = send_mpa_reply(ep, conn_param->private_data,
 			conn_param->private_data_len);
 
 	if (err) {
 
 		CTR2(KTR_IW_CXGBE, "%s:caca %p", __func__, ep);
 		goto err1;
 	}
 
 	state_set(&ep->com, FPDU_MODE);
 	established_upcall(ep);
 	c4iw_put_ep(&ep->com);
 	CTR2(KTR_IW_CXGBE, "%s:cacE %p", __func__, ep);
 	return 0;
 err1:
 	ep->com.cm_id = NULL;
 	ep->com.qp = NULL;
 	cm_id->rem_ref(cm_id);
 err:
 	c4iw_put_ep(&ep->com);
 	CTR2(KTR_IW_CXGBE, "%s:cacE err %p", __func__, ep);
 	return err;
 }
 
 
 
 int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 {
 	int err = 0;
 	struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
 	struct c4iw_ep *ep = NULL;
-	struct rtentry *rt;
+	struct nhop4_extended nh4;
 	struct toedev *tdev;
 
 	CTR2(KTR_IW_CXGBE, "%s:ccB %p", __func__, cm_id);
 
 	if ((conn_param->ord > c4iw_max_read_depth) ||
 		(conn_param->ird > c4iw_max_read_depth)) {
 
 		CTR2(KTR_IW_CXGBE, "%s:cc1 %p", __func__, cm_id);
 		err = -EINVAL;
 		goto out;
 	}
 	ep = alloc_ep(sizeof(*ep), M_NOWAIT);
 
 	if (!ep) {
 
 		CTR2(KTR_IW_CXGBE, "%s:cc2 %p", __func__, cm_id);
 		printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
 		err = -ENOMEM;
 		goto out;
 	}
 	init_timer(&ep->timer);
 	ep->plen = conn_param->private_data_len;
 
 	if (ep->plen) {
 
 		CTR2(KTR_IW_CXGBE, "%s:cc3 %p", __func__, ep);
 		memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
 				conn_param->private_data, ep->plen);
 	}
 	ep->ird = conn_param->ird;
 	ep->ord = conn_param->ord;
 
 	if (peer2peer && ep->ord == 0) {
 
 		CTR2(KTR_IW_CXGBE, "%s:cc4 %p", __func__, ep);
 		ep->ord = 1;
 	}
 
 	cm_id->add_ref(cm_id);
 	ep->com.dev = dev;
 	ep->com.cm_id = cm_id;
 	ep->com.qp = get_qhp(dev, conn_param->qpn);
 
 	if (!ep->com.qp) {
 
 		CTR2(KTR_IW_CXGBE, "%s:cc5 %p", __func__, ep);
 		err = -EINVAL;
 		goto fail2;
 	}
 	ep->com.thread = curthread;
 	ep->com.so = cm_id->so;
 
 	init_sock(&ep->com);
 
 	/* find a route */
-	rt = find_route(
+	err = find_route(
 		cm_id->local_addr.sin_addr.s_addr,
 		cm_id->remote_addr.sin_addr.s_addr,
 		cm_id->local_addr.sin_port,
-		cm_id->remote_addr.sin_port, 0);
+		cm_id->remote_addr.sin_port, 0, &nh4);
 
-	if (!rt) {
+	if (err) {
 
 		CTR2(KTR_IW_CXGBE, "%s:cc7 %p", __func__, ep);
 		printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
 		err = -EHOSTUNREACH;
 		goto fail2;
 	}
 
-	if (!(rt->rt_ifp->if_capenable & IFCAP_TOE)) {
+	if (!(nh4.nh_ifp->if_capenable & IFCAP_TOE)) {
 
 		CTR2(KTR_IW_CXGBE, "%s:cc8 %p", __func__, ep);
 		printf("%s - interface not TOE capable.\n", __func__);
 		close_socket(&ep->com, 0);
 		err = -ENOPROTOOPT;
 		goto fail3;
 	}
-	tdev = TOEDEV(rt->rt_ifp);
+	tdev = TOEDEV(nh4.nh_ifp);
 
 	if (tdev == NULL) {
 
 		CTR2(KTR_IW_CXGBE, "%s:cc9 %p", __func__, ep);
 		printf("%s - No toedev for interface.\n", __func__);
 		goto fail3;
 	}
-	RTFREE(rt);
+	fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4);
 
 	state_set(&ep->com, CONNECTING);
 	ep->tos = 0;
 	ep->com.local_addr = cm_id->local_addr;
 	ep->com.remote_addr = cm_id->remote_addr;
 	err = soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr,
 		ep->com.thread);
 
 	if (!err) {
 		CTR2(KTR_IW_CXGBE, "%s:cca %p", __func__, ep);
 		goto out;
 	} else {
 		close_socket(&ep->com, 0);
 		goto fail2;
 	}
 
 fail3:
 	CTR2(KTR_IW_CXGBE, "%s:ccb %p", __func__, ep);
-	RTFREE(rt);
+	fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4);
 fail2:
 	cm_id->rem_ref(cm_id);
 	c4iw_put_ep(&ep->com);
 out:
 	CTR2(KTR_IW_CXGBE, "%s:ccE %p", __func__, ep);
 	return err;
 }
 
 /*
  * iwcm->create_listen.  Returns -errno on failure.
  */
 int
 c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
 {
 	int rc;
 	struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
 	struct c4iw_listen_ep *ep;
 	struct socket *so = cm_id->so;
 
 	ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
 	CTR5(KTR_IW_CXGBE, "%s: cm_id %p, lso %p, ep %p, inp %p", __func__,
 	    cm_id, so, ep, so->so_pcb);
 	if (ep == NULL) {
 		log(LOG_ERR, "%s: failed to alloc memory for endpoint\n",
 		    __func__);
 		rc = ENOMEM;
 		goto failed;
 	}
 
 	cm_id->add_ref(cm_id);
 	ep->com.cm_id = cm_id;
 	ep->com.dev = dev;
 	ep->backlog = backlog;
 	ep->com.local_addr = cm_id->local_addr;
 	ep->com.thread = curthread;
 	state_set(&ep->com, LISTEN);
 	ep->com.so = so;
 	init_sock(&ep->com);
 
 	rc = solisten(so, ep->backlog, ep->com.thread);
 	if (rc != 0) {
 		log(LOG_ERR, "%s: failed to start listener: %d\n", __func__,
 		    rc);
 		close_socket(&ep->com, 0);
 		cm_id->rem_ref(cm_id);
 		c4iw_put_ep(&ep->com);
 		goto failed;
 	}
 
 	cm_id->provider_data = ep;
 	return (0);
 
 failed:
 	CTR3(KTR_IW_CXGBE, "%s: cm_id %p, FAILED (%d)", __func__, cm_id, rc);
 	return (-rc);
 }
 
 int
 c4iw_destroy_listen(struct iw_cm_id *cm_id)
 {
 	int rc;
 	struct c4iw_listen_ep *ep = to_listen_ep(cm_id);
 
 	CTR4(KTR_IW_CXGBE, "%s: cm_id %p, so %p, inp %p", __func__, cm_id,
 	    cm_id->so, cm_id->so->so_pcb);
 
 	state_set(&ep->com, DEAD);
 	rc = close_socket(&ep->com, 0);
 	cm_id->rem_ref(cm_id);
 	c4iw_put_ep(&ep->com);
 
 	return (rc);
 }
 
 int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
 {
 	int ret = 0;
 	int close = 0;
 	int fatal = 0;
 	struct c4iw_rdev *rdev;
 
 	mutex_lock(&ep->com.mutex);
 
 	CTR2(KTR_IW_CXGBE, "%s:cedB %p", __func__, ep);
 
 	rdev = &ep->com.dev->rdev;
 
 	if (c4iw_fatal_error(rdev)) {
 
 		CTR2(KTR_IW_CXGBE, "%s:ced1 %p", __func__, ep);
 		fatal = 1;
 		close_complete_upcall(ep, -ECONNRESET);
 		ep->com.state = DEAD;
 	}
 	CTR3(KTR_IW_CXGBE, "%s:ced2 %p %s", __func__, ep,
 	    states[ep->com.state]);
 
 	switch (ep->com.state) {
 
 		case MPA_REQ_WAIT:
 		case MPA_REQ_SENT:
 		case MPA_REQ_RCVD:
 		case MPA_REP_SENT:
 		case FPDU_MODE:
 			close = 1;
 			if (abrupt)
 				ep->com.state = ABORTING;
 			else {
 				ep->com.state = CLOSING;
 				START_EP_TIMER(ep);
 			}
 			set_bit(CLOSE_SENT, &ep->com.flags);
 			break;
 
 		case CLOSING:
 
 			if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
 
 				close = 1;
 				if (abrupt) {
 					STOP_EP_TIMER(ep);
 					ep->com.state = ABORTING;
 				} else
 					ep->com.state = MORIBUND;
 			}
 			break;
 
 		case MORIBUND:
 		case ABORTING:
 		case DEAD:
 			CTR3(KTR_IW_CXGBE,
 			    "%s ignoring disconnect ep %p state %u", __func__,
 			    ep, ep->com.state);
 			break;
 
 		default:
 			BUG();
 			break;
 	}
 
 	mutex_unlock(&ep->com.mutex);
 
 	if (close) {
 
 		CTR2(KTR_IW_CXGBE, "%s:ced3 %p", __func__, ep);
 
 		if (abrupt) {
 
 			CTR2(KTR_IW_CXGBE, "%s:ced4 %p", __func__, ep);
 			set_bit(EP_DISC_ABORT, &ep->com.history);
 			ret = abort_connection(ep);
 		} else {
 
 			CTR2(KTR_IW_CXGBE, "%s:ced5 %p", __func__, ep);
 			set_bit(EP_DISC_CLOSE, &ep->com.history);
 
 			if (!ep->parent_ep)
 				__state_set(&ep->com, MORIBUND);
 			ret = shutdown_socket(&ep->com);
 		}
 
 		if (ret) {
 
 			fatal = 1;
 		}
 	}
 
 	if (fatal) {
 
 		release_ep_resources(ep);
 		CTR2(KTR_IW_CXGBE, "%s:ced6 %p", __func__, ep);
 	}
 	CTR2(KTR_IW_CXGBE, "%s:cedE %p", __func__, ep);
 	return ret;
 }
 
 #ifdef C4IW_EP_REDIRECT
 int c4iw_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
 		struct l2t_entry *l2t)
 {
 	struct c4iw_ep *ep = ctx;
 
 	if (ep->dst != old)
 		return 0;
 
 	PDBG("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new,
 			l2t);
 	dst_hold(new);
 	cxgb4_l2t_release(ep->l2t);
 	ep->l2t = l2t;
 	dst_release(old);
 	ep->dst = new;
 	return 1;
 }
 #endif
 
 
 
 static void ep_timeout(unsigned long arg)
 {
 	struct c4iw_ep *ep = (struct c4iw_ep *)arg;
 	int kickit = 0;
 
 	CTR2(KTR_IW_CXGBE, "%s:etB %p", __func__, ep);
 	spin_lock(&timeout_lock);
 
 	if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
 
 		list_add_tail(&ep->entry, &timeout_list);
 		kickit = 1;
 	}
 	spin_unlock(&timeout_lock);
 
 	if (kickit) {
 
 		CTR2(KTR_IW_CXGBE, "%s:et1 %p", __func__, ep);
 		queue_work(c4iw_taskq, &c4iw_task);
 	}
 	CTR2(KTR_IW_CXGBE, "%s:etE %p", __func__, ep);
 }
 
 static int fw6_wr_rpl(struct adapter *sc, const __be64 *rpl)
 {
 	uint64_t val = be64toh(*rpl);
 	int ret;
 	struct c4iw_wr_wait *wr_waitp;
 
 	ret = (int)((val >> 8) & 0xff);
 	wr_waitp = (struct c4iw_wr_wait *)rpl[1];
 	CTR3(KTR_IW_CXGBE, "%s wr_waitp %p ret %u", __func__, wr_waitp, ret);
 	if (wr_waitp)
 		c4iw_wake_up(wr_waitp, ret ? -ret : 0);
 
 	return (0);
 }
 
 static int fw6_cqe_handler(struct adapter *sc, const __be64 *rpl)
 {
 	struct t4_cqe cqe =*(const struct t4_cqe *)(&rpl[0]);
 
 	CTR2(KTR_IW_CXGBE, "%s rpl %p", __func__, rpl);
 	c4iw_ev_dispatch(sc->iwarp_softc, &cqe);
 
 	return (0);
 }
 
 static int terminate(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 
 	struct adapter *sc = iq->adapter;
 
 	const struct cpl_rdma_terminate *rpl = (const void *)(rss + 1);
 	unsigned int tid = GET_TID(rpl);
 	struct c4iw_qp_attributes attrs;
 	struct toepcb *toep = lookup_tid(sc, tid);
 	struct socket *so = inp_inpcbtosocket(toep->inp);
 	struct c4iw_ep *ep = so->so_rcv.sb_upcallarg;
 
 	CTR2(KTR_IW_CXGBE, "%s:tB %p %d", __func__, ep);
 
 	if (ep && ep->com.qp) {
 
 		printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n", tid,
 				ep->com.qp->wq.sq.qid);
 		attrs.next_state = C4IW_QP_STATE_TERMINATE;
 		c4iw_modify_qp(ep->com.dev, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs,
 				1);
 	} else
 		printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", tid);
 	CTR2(KTR_IW_CXGBE, "%s:tE %p %d", __func__, ep);
 
 	return 0;
 }
 
 	void
 c4iw_cm_init_cpl(struct adapter *sc)
 {
 
 	t4_register_cpl_handler(sc, CPL_RDMA_TERMINATE, terminate);
 	t4_register_fw_msg_handler(sc, FW6_TYPE_WR_RPL, fw6_wr_rpl);
 	t4_register_fw_msg_handler(sc, FW6_TYPE_CQE, fw6_cqe_handler);
 	t4_register_an_handler(sc, c4iw_ev_handler);
 }
 
 	void
 c4iw_cm_term_cpl(struct adapter *sc)
 {
 
 	t4_register_cpl_handler(sc, CPL_RDMA_TERMINATE, NULL);
 	t4_register_fw_msg_handler(sc, FW6_TYPE_WR_RPL, NULL);
 	t4_register_fw_msg_handler(sc, FW6_TYPE_CQE, NULL);
 }
 
 int __init c4iw_cm_init(void)
 {
 
 	TAILQ_INIT(&req_list);
 	spin_lock_init(&req_lock);
 	INIT_LIST_HEAD(&timeout_list);
 	spin_lock_init(&timeout_lock);
 
 	INIT_WORK(&c4iw_task, process_req);
 
 	c4iw_taskq = create_singlethread_workqueue("iw_cxgbe");
 	if (!c4iw_taskq)
 		return -ENOMEM;
 
 
 	return 0;
 }
 
 void __exit c4iw_cm_term(void)
 {
 	WARN_ON(!TAILQ_EMPTY(&req_list));
 	WARN_ON(!list_empty(&timeout_list));
 	flush_workqueue(c4iw_taskq);
 	destroy_workqueue(c4iw_taskq);
 }
 #endif
Index: projects/release-pkg/sys/dev/cxgbe/tom/t4_cpl_io.c
===================================================================
--- projects/release-pkg/sys/dev/cxgbe/tom/t4_cpl_io.c	(revision 293335)
+++ projects/release-pkg/sys/dev/cxgbe/tom/t4_cpl_io.c	(revision 293336)
@@ -1,1750 +1,1749 @@
 /*-
  * Copyright (c) 2012, 2015 Chelsio Communications, Inc.
  * All rights reserved.
  * Written by: Navdeep Parhar <np@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 
 #ifdef TCP_OFFLOAD
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/module.h>
 #include <sys/protosw.h>
 #include <sys/domain.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sglist.h>
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet/tcp_var.h>
 #define TCPSTATES
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/toecore.h>
 
 #include "common/common.h"
 #include "common/t4_msg.h"
 #include "common/t4_regs.h"
 #include "common/t4_tcb.h"
 #include "tom/t4_tom_l2t.h"
 #include "tom/t4_tom.h"
 
 VNET_DECLARE(int, tcp_do_autosndbuf);
 #define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf)
 VNET_DECLARE(int, tcp_autosndbuf_inc);
 #define V_tcp_autosndbuf_inc VNET(tcp_autosndbuf_inc)
 VNET_DECLARE(int, tcp_autosndbuf_max);
 #define V_tcp_autosndbuf_max VNET(tcp_autosndbuf_max)
 VNET_DECLARE(int, tcp_do_autorcvbuf);
 #define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf)
 VNET_DECLARE(int, tcp_autorcvbuf_inc);
 #define V_tcp_autorcvbuf_inc VNET(tcp_autorcvbuf_inc)
 VNET_DECLARE(int, tcp_autorcvbuf_max);
 #define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max)
 
 void
 send_flowc_wr(struct toepcb *toep, struct flowc_tx_params *ftxp)
 {
 	struct wrqe *wr;
 	struct fw_flowc_wr *flowc;
 	unsigned int nparams = ftxp ? 8 : 6, flowclen;
 	struct vi_info *vi = toep->vi;
 	struct port_info *pi = vi->pi;
 	struct adapter *sc = pi->adapter;
 	unsigned int pfvf = G_FW_VIID_PFN(vi->viid) << S_FW_VIID_PFN;
 	struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx];
 
 	KASSERT(!(toep->flags & TPF_FLOWC_WR_SENT),
 	    ("%s: flowc for tid %u sent already", __func__, toep->tid));
 
 	flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval);
 
 	wr = alloc_wrqe(roundup2(flowclen, 16), toep->ofld_txq);
 	if (wr == NULL) {
 		/* XXX */
 		panic("%s: allocation failure.", __func__);
 	}
 	flowc = wrtod(wr);
 	memset(flowc, 0, wr->wr_len);
 
 	flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) |
 	    V_FW_FLOWC_WR_NPARAMS(nparams));
 	flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) |
 	    V_FW_WR_FLOWID(toep->tid));
 
 	flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
 	flowc->mnemval[0].val = htobe32(pfvf);
 	flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
 	flowc->mnemval[1].val = htobe32(pi->tx_chan);
 	flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
 	flowc->mnemval[2].val = htobe32(pi->tx_chan);
 	flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
 	flowc->mnemval[3].val = htobe32(toep->ofld_rxq->iq.abs_id);
 	if (ftxp) {
 		uint32_t sndbuf = min(ftxp->snd_space, sc->tt.sndbuf);
 
 		flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT;
 		flowc->mnemval[4].val = htobe32(ftxp->snd_nxt);
 		flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT;
 		flowc->mnemval[5].val = htobe32(ftxp->rcv_nxt);
 		flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF;
 		flowc->mnemval[6].val = htobe32(sndbuf);
 		flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS;
 		flowc->mnemval[7].val = htobe32(ftxp->mss);
 
 		CTR6(KTR_CXGBE,
 		    "%s: tid %u, mss %u, sndbuf %u, snd_nxt 0x%x, rcv_nxt 0x%x",
 		    __func__, toep->tid, ftxp->mss, sndbuf, ftxp->snd_nxt,
 		    ftxp->rcv_nxt);
 	} else {
 		flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDBUF;
 		flowc->mnemval[4].val = htobe32(512);
 		flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_MSS;
 		flowc->mnemval[5].val = htobe32(512);
 
 		CTR2(KTR_CXGBE, "%s: tid %u", __func__, toep->tid);
 	}
 
 	txsd->tx_credits = howmany(flowclen, 16);
 	txsd->plen = 0;
 	KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0,
 	    ("%s: not enough credits (%d)", __func__, toep->tx_credits));
 	toep->tx_credits -= txsd->tx_credits;
 	if (__predict_false(++toep->txsd_pidx == toep->txsd_total))
 		toep->txsd_pidx = 0;
 	toep->txsd_avail--;
 
 	toep->flags |= TPF_FLOWC_WR_SENT;
         t4_wrq_tx(sc, wr);
 }
 
 void
 send_reset(struct adapter *sc, struct toepcb *toep, uint32_t snd_nxt)
 {
 	struct wrqe *wr;
 	struct cpl_abort_req *req;
 	int tid = toep->tid;
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp = intotcpcb(inp);	/* don't use if INP_DROPPED */
 
 	INP_WLOCK_ASSERT(inp);
 
 	CTR6(KTR_CXGBE, "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x%s",
 	    __func__, toep->tid,
 	    inp->inp_flags & INP_DROPPED ? "inp dropped" :
 	    tcpstates[tp->t_state],
 	    toep->flags, inp->inp_flags,
 	    toep->flags & TPF_ABORT_SHUTDOWN ?
 	    " (abort already in progress)" : "");
 
 	if (toep->flags & TPF_ABORT_SHUTDOWN)
 		return;	/* abort already in progress */
 
 	toep->flags |= TPF_ABORT_SHUTDOWN;
 
 	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
 	    ("%s: flowc_wr not sent for tid %d.", __func__, tid));
 
 	wr = alloc_wrqe(sizeof(*req), toep->ofld_txq);
 	if (wr == NULL) {
 		/* XXX */
 		panic("%s: allocation failure.", __func__);
 	}
 	req = wrtod(wr);
 
 	INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, tid);
 	if (inp->inp_flags & INP_DROPPED)
 		req->rsvd0 = htobe32(snd_nxt);
 	else
 		req->rsvd0 = htobe32(tp->snd_nxt);
 	req->rsvd1 = !(toep->flags & TPF_TX_DATA_SENT);
 	req->cmd = CPL_ABORT_SEND_RST;
 
 	/*
 	 * XXX: What's the correct way to tell that the inp hasn't been detached
 	 * from its socket?  Should I even be flushing the snd buffer here?
 	 */
 	if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) {
 		struct socket *so = inp->inp_socket;
 
 		if (so != NULL)	/* because I'm not sure.  See comment above */
 			sbflush(&so->so_snd);
 	}
 
 	t4_l2t_send(sc, wr, toep->l2te);
 }
 
 /*
  * Called when a connection is established to translate the TCP options
  * reported by HW to FreeBSD's native format.
  */
 static void
 assign_rxopt(struct tcpcb *tp, unsigned int opt)
 {
 	struct toepcb *toep = tp->t_toe;
 	struct inpcb *inp = tp->t_inpcb;
 	struct adapter *sc = td_adapter(toep->td);
 	int n;
 
 	INP_LOCK_ASSERT(inp);
 
 	if (inp->inp_inc.inc_flags & INC_ISIPV6)
 		n = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
 	else
 		n = sizeof(struct ip) + sizeof(struct tcphdr);
-	tp->t_maxseg = tp->t_maxopd = sc->params.mtus[G_TCPOPT_MSS(opt)] - n;
+	tp->t_maxseg = sc->params.mtus[G_TCPOPT_MSS(opt)] - n;
 
 	CTR4(KTR_CXGBE, "%s: tid %d, mtu_idx %u (%u)", __func__, toep->tid,
 	    G_TCPOPT_MSS(opt), sc->params.mtus[G_TCPOPT_MSS(opt)]);
 
 	if (G_TCPOPT_TSTAMP(opt)) {
 		tp->t_flags |= TF_RCVD_TSTMP;	/* timestamps ok */
 		tp->ts_recent = 0;		/* hmmm */
 		tp->ts_recent_age = tcp_ts_getticks();
-		tp->t_maxseg -= TCPOLEN_TSTAMP_APPA;
 	}
 
 	if (G_TCPOPT_SACK(opt))
 		tp->t_flags |= TF_SACK_PERMIT;	/* should already be set */
 	else
 		tp->t_flags &= ~TF_SACK_PERMIT;	/* sack disallowed by peer */
 
 	if (G_TCPOPT_WSCALE_OK(opt))
 		tp->t_flags |= TF_RCVD_SCALE;
 
 	/* Doing window scaling? */
 	if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) ==
 	    (TF_RCVD_SCALE | TF_REQ_SCALE)) {
 		tp->rcv_scale = tp->request_r_scale;
 		tp->snd_scale = G_TCPOPT_SND_WSCALE(opt);
 	}
 }
 
 /*
  * Completes some final bits of initialization for just established connections
  * and changes their state to TCPS_ESTABLISHED.
  *
  * The ISNs are from after the exchange of SYNs.  i.e., the true ISN + 1.
  */
 void
 make_established(struct toepcb *toep, uint32_t snd_isn, uint32_t rcv_isn,
     uint16_t opt)
 {
 	struct inpcb *inp = toep->inp;
 	struct socket *so = inp->inp_socket;
 	struct tcpcb *tp = intotcpcb(inp);
 	long bufsize;
 	uint32_t iss = be32toh(snd_isn) - 1;	/* true ISS */
 	uint32_t irs = be32toh(rcv_isn) - 1;	/* true IRS */
 	uint16_t tcpopt = be16toh(opt);
 	struct flowc_tx_params ftxp;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(tp->t_state == TCPS_SYN_SENT ||
 	    tp->t_state == TCPS_SYN_RECEIVED,
 	    ("%s: TCP state %s", __func__, tcpstates[tp->t_state]));
 
 	CTR4(KTR_CXGBE, "%s: tid %d, toep %p, inp %p",
 	    __func__, toep->tid, toep, inp);
 
 	tp->t_state = TCPS_ESTABLISHED;
 	tp->t_starttime = ticks;
 	TCPSTAT_INC(tcps_connects);
 
 	tp->irs = irs;
 	tcp_rcvseqinit(tp);
 	tp->rcv_wnd = toep->rx_credits << 10;
 	tp->rcv_adv += tp->rcv_wnd;
 	tp->last_ack_sent = tp->rcv_nxt;
 
 	/*
 	 * If we were unable to send all rx credits via opt0, save the remainder
 	 * in rx_credits so that they can be handed over with the next credit
 	 * update.
 	 */
 	SOCKBUF_LOCK(&so->so_rcv);
 	bufsize = select_rcv_wnd(so);
 	SOCKBUF_UNLOCK(&so->so_rcv);
 	toep->rx_credits = bufsize - tp->rcv_wnd;
 
 	tp->iss = iss;
 	tcp_sendseqinit(tp);
 	tp->snd_una = iss + 1;
 	tp->snd_nxt = iss + 1;
 	tp->snd_max = iss + 1;
 
 	assign_rxopt(tp, tcpopt);
 
 	SOCKBUF_LOCK(&so->so_snd);
 	if (so->so_snd.sb_flags & SB_AUTOSIZE && V_tcp_do_autosndbuf)
 		bufsize = V_tcp_autosndbuf_max;
 	else
 		bufsize = sbspace(&so->so_snd);
 	SOCKBUF_UNLOCK(&so->so_snd);
 
 	ftxp.snd_nxt = tp->snd_nxt;
 	ftxp.rcv_nxt = tp->rcv_nxt;
 	ftxp.snd_space = bufsize;
 	ftxp.mss = tp->t_maxseg;
 	send_flowc_wr(toep, &ftxp);
 
 	soisconnected(so);
 }
 
 static int
 send_rx_credits(struct adapter *sc, struct toepcb *toep, int credits)
 {
 	struct wrqe *wr;
 	struct cpl_rx_data_ack *req;
 	uint32_t dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1);
 
 	KASSERT(credits >= 0, ("%s: %d credits", __func__, credits));
 
 	wr = alloc_wrqe(sizeof(*req), toep->ctrlq);
 	if (wr == NULL)
 		return (0);
 	req = wrtod(wr);
 
 	INIT_TP_WR_MIT_CPL(req, CPL_RX_DATA_ACK, toep->tid);
 	req->credit_dack = htobe32(dack | V_RX_CREDITS(credits));
 
 	t4_wrq_tx(sc, wr);
 	return (credits);
 }
 
 void
 t4_rcvd(struct toedev *tod, struct tcpcb *tp)
 {
 	struct adapter *sc = tod->tod_softc;
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so = inp->inp_socket;
 	struct sockbuf *sb = &so->so_rcv;
 	struct toepcb *toep = tp->t_toe;
 	int credits;
 
 	INP_WLOCK_ASSERT(inp);
 
 	SOCKBUF_LOCK(sb);
 	KASSERT(toep->sb_cc >= sbused(sb),
 	    ("%s: sb %p has more data (%d) than last time (%d).",
 	    __func__, sb, sbused(sb), toep->sb_cc));
 
 	toep->rx_credits += toep->sb_cc - sbused(sb);
 	toep->sb_cc = sbused(sb);
 
 	if (toep->rx_credits > 0 &&
 	    (tp->rcv_wnd <= 32 * 1024 || toep->rx_credits >= 64 * 1024 ||
 	    (toep->rx_credits >= 16 * 1024 && tp->rcv_wnd <= 128 * 1024) ||
 	    toep->sb_cc + tp->rcv_wnd < sb->sb_lowat)) {
 
 		credits = send_rx_credits(sc, toep, toep->rx_credits);
 		toep->rx_credits -= credits;
 		tp->rcv_wnd += credits;
 		tp->rcv_adv += credits;
 	}
 	SOCKBUF_UNLOCK(sb);
 }
 
 /*
  * Close a connection by sending a CPL_CLOSE_CON_REQ message.
  */
 static int
 close_conn(struct adapter *sc, struct toepcb *toep)
 {
 	struct wrqe *wr;
 	struct cpl_close_con_req *req;
 	unsigned int tid = toep->tid;
 
 	CTR3(KTR_CXGBE, "%s: tid %u%s", __func__, toep->tid,
 	    toep->flags & TPF_FIN_SENT ? ", IGNORED" : "");
 
 	if (toep->flags & TPF_FIN_SENT)
 		return (0);
 
 	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
 	    ("%s: flowc_wr not sent for tid %u.", __func__, tid));
 
 	wr = alloc_wrqe(sizeof(*req), toep->ofld_txq);
 	if (wr == NULL) {
 		/* XXX */
 		panic("%s: allocation failure.", __func__);
 	}
 	req = wrtod(wr);
 
         req->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) |
 	    V_FW_WR_IMMDLEN(sizeof(*req) - sizeof(req->wr)));
 	req->wr.wr_mid = htonl(V_FW_WR_LEN16(howmany(sizeof(*req), 16)) |
 	    V_FW_WR_FLOWID(tid));
         req->wr.wr_lo = cpu_to_be64(0);
         OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid));
 	req->rsvd = 0;
 
 	toep->flags |= TPF_FIN_SENT;
 	toep->flags &= ~TPF_SEND_FIN;
 	t4_l2t_send(sc, wr, toep->l2te);
 
 	return (0);
 }
 
 #define MAX_OFLD_TX_CREDITS (SGE_MAX_WR_LEN / 16)
 #define MIN_OFLD_TX_CREDITS (howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16))
 
 /* Maximum amount of immediate data we could stuff in a WR */
 static inline int
 max_imm_payload(int tx_credits)
 {
 	const int n = 2;	/* Use only up to 2 desc for imm. data WR */
 
 	KASSERT(tx_credits >= 0 &&
 		tx_credits <= MAX_OFLD_TX_CREDITS,
 		("%s: %d credits", __func__, tx_credits));
 
 	if (tx_credits < MIN_OFLD_TX_CREDITS)
 		return (0);
 
 	if (tx_credits >= (n * EQ_ESIZE) / 16)
 		return ((n * EQ_ESIZE) - sizeof(struct fw_ofld_tx_data_wr));
 	else
 		return (tx_credits * 16 - sizeof(struct fw_ofld_tx_data_wr));
 }
 
 /* Maximum number of SGL entries we could stuff in a WR */
 static inline int
 max_dsgl_nsegs(int tx_credits)
 {
 	int nseg = 1;	/* ulptx_sgl has room for 1, rest ulp_tx_sge_pair */
 	int sge_pair_credits = tx_credits - MIN_OFLD_TX_CREDITS;
 
 	KASSERT(tx_credits >= 0 &&
 		tx_credits <= MAX_OFLD_TX_CREDITS,
 		("%s: %d credits", __func__, tx_credits));
 
 	if (tx_credits < MIN_OFLD_TX_CREDITS)
 		return (0);
 
 	nseg += 2 * (sge_pair_credits * 16 / 24);
 	if ((sge_pair_credits * 16) % 24 == 16)
 		nseg++;
 
 	return (nseg);
 }
 
 static inline void
 write_tx_wr(void *dst, struct toepcb *toep, unsigned int immdlen,
     unsigned int plen, uint8_t credits, int shove, int ulp_submode, int txalign)
 {
 	struct fw_ofld_tx_data_wr *txwr = dst;
 
 	txwr->op_to_immdlen = htobe32(V_WR_OP(FW_OFLD_TX_DATA_WR) |
 	    V_FW_WR_IMMDLEN(immdlen));
 	txwr->flowid_len16 = htobe32(V_FW_WR_FLOWID(toep->tid) |
 	    V_FW_WR_LEN16(credits));
 	txwr->lsodisable_to_flags = htobe32(V_TX_ULP_MODE(toep->ulp_mode) |
 	    V_TX_ULP_SUBMODE(ulp_submode) | V_TX_URG(0) | V_TX_SHOVE(shove));
 	txwr->plen = htobe32(plen);
 
 	if (txalign > 0) {
 		struct tcpcb *tp = intotcpcb(toep->inp);
 
 		if (plen < 2 * tp->t_maxseg || is_10G_port(toep->vi->pi))
 			txwr->lsodisable_to_flags |=
 			    htobe32(F_FW_OFLD_TX_DATA_WR_LSODISABLE);
 		else
 			txwr->lsodisable_to_flags |=
 			    htobe32(F_FW_OFLD_TX_DATA_WR_ALIGNPLD |
 				(tp->t_flags & TF_NODELAY ? 0 :
 				F_FW_OFLD_TX_DATA_WR_ALIGNPLDSHOVE));
 	}
 }
 
 /*
  * Generate a DSGL from a starting mbuf.  The total number of segments and the
  * maximum segments in any one mbuf are provided.
  */
 static void
 write_tx_sgl(void *dst, struct mbuf *start, struct mbuf *stop, int nsegs, int n)
 {
 	struct mbuf *m;
 	struct ulptx_sgl *usgl = dst;
 	int i, j, rc;
 	struct sglist sg;
 	struct sglist_seg segs[n];
 
 	KASSERT(nsegs > 0, ("%s: nsegs 0", __func__));
 
 	sglist_init(&sg, n, segs);
 	usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
 	    V_ULPTX_NSGE(nsegs));
 
 	i = -1;
 	for (m = start; m != stop; m = m->m_next) {
 		rc = sglist_append(&sg, mtod(m, void *), m->m_len);
 		if (__predict_false(rc != 0))
 			panic("%s: sglist_append %d", __func__, rc);
 
 		for (j = 0; j < sg.sg_nseg; i++, j++) {
 			if (i < 0) {
 				usgl->len0 = htobe32(segs[j].ss_len);
 				usgl->addr0 = htobe64(segs[j].ss_paddr);
 			} else {
 				usgl->sge[i / 2].len[i & 1] =
 				    htobe32(segs[j].ss_len);
 				usgl->sge[i / 2].addr[i & 1] =
 				    htobe64(segs[j].ss_paddr);
 			}
 #ifdef INVARIANTS
 			nsegs--;
 #endif
 		}
 		sglist_reset(&sg);
 	}
 	if (i & 1)
 		usgl->sge[i / 2].len[1] = htobe32(0);
 	KASSERT(nsegs == 0, ("%s: nsegs %d, start %p, stop %p",
 	    __func__, nsegs, start, stop));
 }
 
 /*
  * Max number of SGL entries an offload tx work request can have.  This is 41
  * (1 + 40) for a full 512B work request.
  * fw_ofld_tx_data_wr(16B) + ulptx_sgl(16B, 1) + ulptx_sge_pair(480B, 40)
  */
 #define OFLD_SGL_LEN (41)
 
 /*
  * Send data and/or a FIN to the peer.
  *
  * The socket's so_snd buffer consists of a stream of data starting with sb_mb
  * and linked together with m_next.  sb_sndptr, if set, is the last mbuf that
  * was transmitted.
  *
  * drop indicates the number of bytes that should be dropped from the head of
  * the send buffer.  It is an optimization that lets do_fw4_ack avoid creating
  * contention on the send buffer lock (before this change it used to do
  * sowwakeup and then t4_push_frames right after that when recovering from tx
  * stalls).  When drop is set this function MUST drop the bytes and wake up any
  * writers.
  */
 void
 t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop)
 {
 	struct mbuf *sndptr, *m, *sb_sndptr;
 	struct fw_ofld_tx_data_wr *txwr;
 	struct wrqe *wr;
 	u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf;
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp = intotcpcb(inp);
 	struct socket *so = inp->inp_socket;
 	struct sockbuf *sb = &so->so_snd;
 	int tx_credits, shove, compl, space, sowwakeup;
 	struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx];
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
 	    ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid));
 
 	KASSERT(toep->ulp_mode == ULP_MODE_NONE ||
 	    toep->ulp_mode == ULP_MODE_TCPDDP ||
 	    toep->ulp_mode == ULP_MODE_RDMA,
 	    ("%s: ulp_mode %u for toep %p", __func__, toep->ulp_mode, toep));
 
 	if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN))
 		return;
 
 	/*
 	 * This function doesn't resume by itself.  Someone else must clear the
 	 * flag and call this function.
 	 */
 	if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) {
 		KASSERT(drop == 0,
 		    ("%s: drop (%d) != 0 but tx is suspended", __func__, drop));
 		return;
 	}
 
 	do {
 		tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS);
 		max_imm = max_imm_payload(tx_credits);
 		max_nsegs = max_dsgl_nsegs(tx_credits);
 
 		SOCKBUF_LOCK(sb);
 		sowwakeup = drop;
 		if (drop) {
 			sbdrop_locked(sb, drop);
 			drop = 0;
 		}
 		sb_sndptr = sb->sb_sndptr;
 		sndptr = sb_sndptr ? sb_sndptr->m_next : sb->sb_mb;
 		plen = 0;
 		nsegs = 0;
 		max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */
 		for (m = sndptr; m != NULL; m = m->m_next) {
 			int n = sglist_count(mtod(m, void *), m->m_len);
 
 			nsegs += n;
 			plen += m->m_len;
 
 			/* This mbuf sent us _over_ the nsegs limit, back out */
 			if (plen > max_imm && nsegs > max_nsegs) {
 				nsegs -= n;
 				plen -= m->m_len;
 				if (plen == 0) {
 					/* Too few credits */
 					toep->flags |= TPF_TX_SUSPENDED;
 					if (sowwakeup)
 						sowwakeup_locked(so);
 					else
 						SOCKBUF_UNLOCK(sb);
 					SOCKBUF_UNLOCK_ASSERT(sb);
 					return;
 				}
 				break;
 			}
 
 			if (max_nsegs_1mbuf < n)
 				max_nsegs_1mbuf = n;
 			sb_sndptr = m;	/* new sb->sb_sndptr if all goes well */
 
 			/* This mbuf put us right at the max_nsegs limit */
 			if (plen > max_imm && nsegs == max_nsegs) {
 				m = m->m_next;
 				break;
 			}
 		}
 
 		space = sbspace(sb);
 
 		if (space <= sb->sb_hiwat * 3 / 8 &&
 		    toep->plen_nocompl + plen >= sb->sb_hiwat / 4)
 			compl = 1;
 		else
 			compl = 0;
 
 		if (sb->sb_flags & SB_AUTOSIZE &&
 		    V_tcp_do_autosndbuf &&
 		    sb->sb_hiwat < V_tcp_autosndbuf_max &&
 		    space < sb->sb_hiwat / 8) {
 			int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc,
 			    V_tcp_autosndbuf_max);
 
 			if (!sbreserve_locked(sb, newsize, so, NULL))
 				sb->sb_flags &= ~SB_AUTOSIZE;
 			else
 				sowwakeup = 1;	/* room available */
 		}
 		if (sowwakeup)
 			sowwakeup_locked(so);
 		else
 			SOCKBUF_UNLOCK(sb);
 		SOCKBUF_UNLOCK_ASSERT(sb);
 
 		/* nothing to send */
 		if (plen == 0) {
 			KASSERT(m == NULL,
 			    ("%s: nothing to send, but m != NULL", __func__));
 			break;
 		}
 
 		if (__predict_false(toep->flags & TPF_FIN_SENT))
 			panic("%s: excess tx.", __func__);
 
 		shove = m == NULL && !(tp->t_flags & TF_MORETOCOME);
 		if (plen <= max_imm) {
 
 			/* Immediate data tx */
 
 			wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16),
 					toep->ofld_txq);
 			if (wr == NULL) {
 				/* XXX: how will we recover from this? */
 				toep->flags |= TPF_TX_SUSPENDED;
 				return;
 			}
 			txwr = wrtod(wr);
 			credits = howmany(wr->wr_len, 16);
 			write_tx_wr(txwr, toep, plen, plen, credits, shove, 0,
 			    sc->tt.tx_align);
 			m_copydata(sndptr, 0, plen, (void *)(txwr + 1));
 			nsegs = 0;
 		} else {
 			int wr_len;
 
 			/* DSGL tx */
 
 			wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) +
 			    ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8;
 			wr = alloc_wrqe(roundup2(wr_len, 16), toep->ofld_txq);
 			if (wr == NULL) {
 				/* XXX: how will we recover from this? */
 				toep->flags |= TPF_TX_SUSPENDED;
 				return;
 			}
 			txwr = wrtod(wr);
 			credits = howmany(wr_len, 16);
 			write_tx_wr(txwr, toep, 0, plen, credits, shove, 0,
 			    sc->tt.tx_align);
 			write_tx_sgl(txwr + 1, sndptr, m, nsegs,
 			    max_nsegs_1mbuf);
 			if (wr_len & 0xf) {
 				uint64_t *pad = (uint64_t *)
 				    ((uintptr_t)txwr + wr_len);
 				*pad = 0;
 			}
 		}
 
 		KASSERT(toep->tx_credits >= credits,
 			("%s: not enough credits", __func__));
 
 		toep->tx_credits -= credits;
 		toep->tx_nocompl += credits;
 		toep->plen_nocompl += plen;
 		if (toep->tx_credits <= toep->tx_total * 3 / 8 &&
 		    toep->tx_nocompl >= toep->tx_total / 4)
 			compl = 1;
 
 		if (compl || toep->ulp_mode == ULP_MODE_RDMA) {
 			txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL);
 			toep->tx_nocompl = 0;
 			toep->plen_nocompl = 0;
 		}
 
 		tp->snd_nxt += plen;
 		tp->snd_max += plen;
 
 		SOCKBUF_LOCK(sb);
 		KASSERT(sb_sndptr, ("%s: sb_sndptr is NULL", __func__));
 		sb->sb_sndptr = sb_sndptr;
 		SOCKBUF_UNLOCK(sb);
 
 		toep->flags |= TPF_TX_DATA_SENT;
 		if (toep->tx_credits < MIN_OFLD_TX_CREDITS)
 			toep->flags |= TPF_TX_SUSPENDED;
 
 		KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__));
 		txsd->plen = plen;
 		txsd->tx_credits = credits;
 		txsd++;
 		if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) {
 			toep->txsd_pidx = 0;
 			txsd = &toep->txsd[0];
 		}
 		toep->txsd_avail--;
 
 		t4_l2t_send(sc, wr, toep->l2te);
 	} while (m != NULL);
 
 	/* Send a FIN if requested, but only if there's no more data to send */
 	if (m == NULL && toep->flags & TPF_SEND_FIN)
 		close_conn(sc, toep);
 }
 
 static inline void
 rqdrop_locked(struct mbufq *q, int plen)
 {
 	struct mbuf *m;
 
 	while (plen > 0) {
 		m = mbufq_dequeue(q);
 
 		/* Too many credits. */
 		MPASS(m != NULL);
 		M_ASSERTPKTHDR(m);
 
 		/* Partial credits. */
 		MPASS(plen >= m->m_pkthdr.len);
 
 		plen -= m->m_pkthdr.len;
 		m_freem(m);
 	}
 }
 
 void
 t4_push_pdus(struct adapter *sc, struct toepcb *toep, int drop)
 {
 	struct mbuf *sndptr, *m;
 	struct fw_ofld_tx_data_wr *txwr;
 	struct wrqe *wr;
 	u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf;
 	u_int adjusted_plen, ulp_submode;
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp = intotcpcb(inp);
 	int tx_credits, shove;
 	struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx];
 	struct mbufq *pduq = &toep->ulp_pduq;
 	static const u_int ulp_extra_len[] = {0, 4, 4, 8};
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
 	    ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid));
 	KASSERT(toep->ulp_mode == ULP_MODE_ISCSI,
 	    ("%s: ulp_mode %u for toep %p", __func__, toep->ulp_mode, toep));
 
 	if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN))
 		return;
 
 	/*
 	 * This function doesn't resume by itself.  Someone else must clear the
 	 * flag and call this function.
 	 */
 	if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) {
 		KASSERT(drop == 0,
 		    ("%s: drop (%d) != 0 but tx is suspended", __func__, drop));
 		return;
 	}
 
 	if (drop)
 		rqdrop_locked(&toep->ulp_pdu_reclaimq, drop);
 
 	while ((sndptr = mbufq_first(pduq)) != NULL) {
 		M_ASSERTPKTHDR(sndptr);
 
 		tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS);
 		max_imm = max_imm_payload(tx_credits);
 		max_nsegs = max_dsgl_nsegs(tx_credits);
 
 		plen = 0;
 		nsegs = 0;
 		max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */
 		for (m = sndptr; m != NULL; m = m->m_next) {
 			int n = sglist_count(mtod(m, void *), m->m_len);
 
 			nsegs += n;
 			plen += m->m_len;
 
 			/*
 			 * This mbuf would send us _over_ the nsegs limit.
 			 * Suspend tx because the PDU can't be sent out.
 			 */
 			if (plen > max_imm && nsegs > max_nsegs) {
 				toep->flags |= TPF_TX_SUSPENDED;
 				return;
 			}
 
 			if (max_nsegs_1mbuf < n)
 				max_nsegs_1mbuf = n;
 		}
 
 		if (__predict_false(toep->flags & TPF_FIN_SENT))
 			panic("%s: excess tx.", __func__);
 
 		/*
 		 * We have a PDU to send.  All of it goes out in one WR so 'm'
 		 * is NULL.  A PDU's length is always a multiple of 4.
 		 */
 		MPASS(m == NULL);
 		MPASS((plen & 3) == 0);
 		MPASS(sndptr->m_pkthdr.len == plen);
 
 		shove = !(tp->t_flags & TF_MORETOCOME);
 		ulp_submode = mbuf_ulp_submode(sndptr);
 		MPASS(ulp_submode < nitems(ulp_extra_len));
 
 		/*
 		 * plen doesn't include header and data digests, which are
 		 * generated and inserted in the right places by the TOE, but
 		 * they do occupy TCP sequence space and need to be accounted
 		 * for.
 		 */
 		adjusted_plen = plen + ulp_extra_len[ulp_submode];
 		if (plen <= max_imm) {
 
 			/* Immediate data tx */
 
 			wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16),
 					toep->ofld_txq);
 			if (wr == NULL) {
 				/* XXX: how will we recover from this? */
 				toep->flags |= TPF_TX_SUSPENDED;
 				return;
 			}
 			txwr = wrtod(wr);
 			credits = howmany(wr->wr_len, 16);
 			write_tx_wr(txwr, toep, plen, adjusted_plen, credits,
 			    shove, ulp_submode, sc->tt.tx_align);
 			m_copydata(sndptr, 0, plen, (void *)(txwr + 1));
 			nsegs = 0;
 		} else {
 			int wr_len;
 
 			/* DSGL tx */
 			wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) +
 			    ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8;
 			wr = alloc_wrqe(roundup2(wr_len, 16), toep->ofld_txq);
 			if (wr == NULL) {
 				/* XXX: how will we recover from this? */
 				toep->flags |= TPF_TX_SUSPENDED;
 				return;
 			}
 			txwr = wrtod(wr);
 			credits = howmany(wr_len, 16);
 			write_tx_wr(txwr, toep, 0, adjusted_plen, credits,
 			    shove, ulp_submode, sc->tt.tx_align);
 			write_tx_sgl(txwr + 1, sndptr, m, nsegs,
 			    max_nsegs_1mbuf);
 			if (wr_len & 0xf) {
 				uint64_t *pad = (uint64_t *)
 				    ((uintptr_t)txwr + wr_len);
 				*pad = 0;
 			}
 		}
 
 		KASSERT(toep->tx_credits >= credits,
 			("%s: not enough credits", __func__));
 
 		m = mbufq_dequeue(pduq);
 		MPASS(m == sndptr);
 		mbufq_enqueue(&toep->ulp_pdu_reclaimq, m);
 
 		toep->tx_credits -= credits;
 		toep->tx_nocompl += credits;
 		toep->plen_nocompl += plen;
 		if (toep->tx_credits <= toep->tx_total * 3 / 8 &&
 		    toep->tx_nocompl >= toep->tx_total / 4) {
 			txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL);
 			toep->tx_nocompl = 0;
 			toep->plen_nocompl = 0;
 		}
 
 		tp->snd_nxt += adjusted_plen;
 		tp->snd_max += adjusted_plen;
 
 		toep->flags |= TPF_TX_DATA_SENT;
 		if (toep->tx_credits < MIN_OFLD_TX_CREDITS)
 			toep->flags |= TPF_TX_SUSPENDED;
 
 		KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__));
 		txsd->plen = plen;
 		txsd->tx_credits = credits;
 		txsd++;
 		if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) {
 			toep->txsd_pidx = 0;
 			txsd = &toep->txsd[0];
 		}
 		toep->txsd_avail--;
 
 		t4_l2t_send(sc, wr, toep->l2te);
 	}
 
 	/* Send a FIN if requested, but only if there are no more PDUs to send */
 	if (mbufq_first(pduq) == NULL && toep->flags & TPF_SEND_FIN)
 		close_conn(sc, toep);
 }
 
 int
 t4_tod_output(struct toedev *tod, struct tcpcb *tp)
 {
 	struct adapter *sc = tod->tod_softc;
 #ifdef INVARIANTS
 	struct inpcb *inp = tp->t_inpcb;
 #endif
 	struct toepcb *toep = tp->t_toe;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT((inp->inp_flags & INP_DROPPED) == 0,
 	    ("%s: inp %p dropped.", __func__, inp));
 	KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
 
 	if (toep->ulp_mode == ULP_MODE_ISCSI)
 		t4_push_pdus(sc, toep, 0);
 	else
 		t4_push_frames(sc, toep, 0);
 
 	return (0);
 }
 
 int
 t4_send_fin(struct toedev *tod, struct tcpcb *tp)
 {
 	struct adapter *sc = tod->tod_softc;
 #ifdef INVARIANTS
 	struct inpcb *inp = tp->t_inpcb;
 #endif
 	struct toepcb *toep = tp->t_toe;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT((inp->inp_flags & INP_DROPPED) == 0,
 	    ("%s: inp %p dropped.", __func__, inp));
 	KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
 
 	toep->flags |= TPF_SEND_FIN;
 	if (tp->t_state >= TCPS_ESTABLISHED) {
 		if (toep->ulp_mode == ULP_MODE_ISCSI)
 			t4_push_pdus(sc, toep, 0);
 		else
 			t4_push_frames(sc, toep, 0);
 	}
 
 	return (0);
 }
 
 int
 t4_send_rst(struct toedev *tod, struct tcpcb *tp)
 {
 	struct adapter *sc = tod->tod_softc;
 #if defined(INVARIANTS)
 	struct inpcb *inp = tp->t_inpcb;
 #endif
 	struct toepcb *toep = tp->t_toe;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT((inp->inp_flags & INP_DROPPED) == 0,
 	    ("%s: inp %p dropped.", __func__, inp));
 	KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
 
 	/* hmmmm */
 	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
 	    ("%s: flowc for tid %u [%s] not sent already",
 	    __func__, toep->tid, tcpstates[tp->t_state]));
 
 	send_reset(sc, toep, 0);
 	return (0);
 }
 
 /*
  * Peer has sent us a FIN.
  */
 static int
 do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_peer_close *cpl = (const void *)(rss + 1);
 	unsigned int tid = GET_TID(cpl);
 	struct toepcb *toep = lookup_tid(sc, tid);
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp = NULL;
 	struct socket *so;
 	struct sockbuf *sb;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_PEER_CLOSE,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 
 	if (__predict_false(toep->flags & TPF_SYNQE)) {
 #ifdef INVARIANTS
 		struct synq_entry *synqe = (void *)toep;
 
 		INP_WLOCK(synqe->lctx->inp);
 		if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
 			KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
 			    ("%s: listen socket closed but tid %u not aborted.",
 			    __func__, tid));
 		} else {
 			/*
 			 * do_pass_accept_req is still running and will
 			 * eventually take care of this tid.
 			 */
 		}
 		INP_WUNLOCK(synqe->lctx->inp);
 #endif
 		CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid,
 		    toep, toep->flags);
 		return (0);
 	}
 
 	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
 
 	INP_INFO_RLOCK(&V_tcbinfo);
 	INP_WLOCK(inp);
 	tp = intotcpcb(inp);
 
 	CTR5(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x, inp %p", __func__,
 	    tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, inp);
 
 	if (toep->flags & TPF_ABORT_SHUTDOWN)
 		goto done;
 
 	tp->rcv_nxt++;	/* FIN */
 
 	so = inp->inp_socket;
 	sb = &so->so_rcv;
 	SOCKBUF_LOCK(sb);
 	if (__predict_false(toep->ddp_flags & (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE))) {
 		handle_ddp_close(toep, tp, sb, cpl->rcv_nxt);
 	}
 	socantrcvmore_locked(so);	/* unlocks the sockbuf */
 
 	if (toep->ulp_mode != ULP_MODE_RDMA) {
 		KASSERT(tp->rcv_nxt == be32toh(cpl->rcv_nxt),
 	    		("%s: rcv_nxt mismatch: %u %u", __func__, tp->rcv_nxt,
 	    		be32toh(cpl->rcv_nxt)));
 	}
 
 	switch (tp->t_state) {
 	case TCPS_SYN_RECEIVED:
 		tp->t_starttime = ticks;
 		/* FALLTHROUGH */ 
 
 	case TCPS_ESTABLISHED:
 		tp->t_state = TCPS_CLOSE_WAIT;
 		break;
 
 	case TCPS_FIN_WAIT_1:
 		tp->t_state = TCPS_CLOSING;
 		break;
 
 	case TCPS_FIN_WAIT_2:
 		tcp_twstart(tp);
 		INP_UNLOCK_ASSERT(inp);	 /* safe, we have a ref on the inp */
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 
 		INP_WLOCK(inp);
 		final_cpl_received(toep);
 		return (0);
 
 	default:
 		log(LOG_ERR, "%s: TID %u received CPL_PEER_CLOSE in state %d\n",
 		    __func__, tid, tp->t_state);
 	}
 done:
 	INP_WUNLOCK(inp);
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 	return (0);
 }
 
 /*
  * Peer has ACK'd our FIN.
  */
 static int
 do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss,
     struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_close_con_rpl *cpl = (const void *)(rss + 1);
 	unsigned int tid = GET_TID(cpl);
 	struct toepcb *toep = lookup_tid(sc, tid);
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp = NULL;
 	struct socket *so = NULL;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_CLOSE_CON_RPL,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
 
 	INP_INFO_RLOCK(&V_tcbinfo);
 	INP_WLOCK(inp);
 	tp = intotcpcb(inp);
 
 	CTR4(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x",
 	    __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags);
 
 	if (toep->flags & TPF_ABORT_SHUTDOWN)
 		goto done;
 
 	so = inp->inp_socket;
 	tp->snd_una = be32toh(cpl->snd_nxt) - 1;	/* exclude FIN */
 
 	switch (tp->t_state) {
 	case TCPS_CLOSING:	/* see TCPS_FIN_WAIT_2 in do_peer_close too */
 		tcp_twstart(tp);
 release:
 		INP_UNLOCK_ASSERT(inp);	/* safe, we have a ref on the  inp */
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 
 		INP_WLOCK(inp);
 		final_cpl_received(toep);	/* no more CPLs expected */
 
 		return (0);
 	case TCPS_LAST_ACK:
 		if (tcp_close(tp))
 			INP_WUNLOCK(inp);
 		goto release;
 
 	case TCPS_FIN_WAIT_1:
 		if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
 			soisdisconnected(so);
 		tp->t_state = TCPS_FIN_WAIT_2;
 		break;
 
 	default:
 		log(LOG_ERR,
 		    "%s: TID %u received CPL_CLOSE_CON_RPL in state %s\n",
 		    __func__, tid, tcpstates[tp->t_state]);
 	}
 done:
 	INP_WUNLOCK(inp);
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 	return (0);
 }
 
 void
 send_abort_rpl(struct adapter *sc, struct sge_wrq *ofld_txq, int tid,
     int rst_status)
 {
 	struct wrqe *wr;
 	struct cpl_abort_rpl *cpl;
 
 	wr = alloc_wrqe(sizeof(*cpl), ofld_txq);
 	if (wr == NULL) {
 		/* XXX */
 		panic("%s: allocation failure.", __func__);
 	}
 	cpl = wrtod(wr);
 
 	INIT_TP_WR_MIT_CPL(cpl, CPL_ABORT_RPL, tid);
 	cpl->cmd = rst_status;
 
 	t4_wrq_tx(sc, wr);
 }
 
 static int
 abort_status_to_errno(struct tcpcb *tp, unsigned int abort_reason)
 {
 	switch (abort_reason) {
 	case CPL_ERR_BAD_SYN:
 	case CPL_ERR_CONN_RESET:
 		return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET);
 	case CPL_ERR_XMIT_TIMEDOUT:
 	case CPL_ERR_PERSIST_TIMEDOUT:
 	case CPL_ERR_FINWAIT2_TIMEDOUT:
 	case CPL_ERR_KEEPALIVE_TIMEDOUT:
 		return (ETIMEDOUT);
 	default:
 		return (EIO);
 	}
 }
 
 /*
  * TCP RST from the peer, timeout, or some other such critical error.
  */
 static int
 do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1);
 	unsigned int tid = GET_TID(cpl);
 	struct toepcb *toep = lookup_tid(sc, tid);
 	struct sge_wrq *ofld_txq = toep->ofld_txq;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_ABORT_REQ_RSS,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 
 	if (toep->flags & TPF_SYNQE)
 		return (do_abort_req_synqe(iq, rss, m));
 
 	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
 
 	if (negative_advice(cpl->status)) {
 		CTR4(KTR_CXGBE, "%s: negative advice %d for tid %d (0x%x)",
 		    __func__, cpl->status, tid, toep->flags);
 		return (0);	/* Ignore negative advice */
 	}
 
 	inp = toep->inp;
 	INP_INFO_RLOCK(&V_tcbinfo);	/* for tcp_close */
 	INP_WLOCK(inp);
 
 	tp = intotcpcb(inp);
 
 	CTR6(KTR_CXGBE,
 	    "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x, status %d",
 	    __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags,
 	    inp->inp_flags, cpl->status);
 
 	/*
 	 * If we'd initiated an abort earlier the reply to it is responsible for
 	 * cleaning up resources.  Otherwise we tear everything down right here
 	 * right now.  We owe the T4 a CPL_ABORT_RPL no matter what.
 	 */
 	if (toep->flags & TPF_ABORT_SHUTDOWN) {
 		INP_WUNLOCK(inp);
 		goto done;
 	}
 	toep->flags |= TPF_ABORT_SHUTDOWN;
 
 	if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) {
 		struct socket *so = inp->inp_socket;
 
 		if (so != NULL)
 			so_error_set(so, abort_status_to_errno(tp,
 			    cpl->status));
 		tp = tcp_close(tp);
 		if (tp == NULL)
 			INP_WLOCK(inp);	/* re-acquire */
 	}
 
 	final_cpl_received(toep);
 done:
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 	send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
 	return (0);
 }
 
 /*
  * Reply to the CPL_ABORT_REQ (send_reset)
  */
 static int
 do_abort_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1);
 	unsigned int tid = GET_TID(cpl);
 	struct toepcb *toep = lookup_tid(sc, tid);
 	struct inpcb *inp = toep->inp;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_ABORT_RPL_RSS,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 
 	if (toep->flags & TPF_SYNQE)
 		return (do_abort_rpl_synqe(iq, rss, m));
 
 	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
 
 	CTR5(KTR_CXGBE, "%s: tid %u, toep %p, inp %p, status %d",
 	    __func__, tid, toep, inp, cpl->status);
 
 	KASSERT(toep->flags & TPF_ABORT_SHUTDOWN,
 	    ("%s: wasn't expecting abort reply", __func__));
 
 	INP_WLOCK(inp);
 	final_cpl_received(toep);
 
 	return (0);
 }
 
 static int
 do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_rx_data *cpl = mtod(m, const void *);
 	unsigned int tid = GET_TID(cpl);
 	struct toepcb *toep = lookup_tid(sc, tid);
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp;
 	struct socket *so;
 	struct sockbuf *sb;
 	int len;
 	uint32_t ddp_placed = 0;
 
 	if (__predict_false(toep->flags & TPF_SYNQE)) {
 #ifdef INVARIANTS
 		struct synq_entry *synqe = (void *)toep;
 
 		INP_WLOCK(synqe->lctx->inp);
 		if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
 			KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
 			    ("%s: listen socket closed but tid %u not aborted.",
 			    __func__, tid));
 		} else {
 			/*
 			 * do_pass_accept_req is still running and will
 			 * eventually take care of this tid.
 			 */
 		}
 		INP_WUNLOCK(synqe->lctx->inp);
 #endif
 		CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid,
 		    toep, toep->flags);
 		m_freem(m);
 		return (0);
 	}
 
 	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
 
 	/* strip off CPL header */
 	m_adj(m, sizeof(*cpl));
 	len = m->m_pkthdr.len;
 
 	INP_WLOCK(inp);
 	if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) {
 		CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
 		    __func__, tid, len, inp->inp_flags);
 		INP_WUNLOCK(inp);
 		m_freem(m);
 		return (0);
 	}
 
 	tp = intotcpcb(inp);
 
 	if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq)))
 		ddp_placed = be32toh(cpl->seq) - tp->rcv_nxt;
 
 	tp->rcv_nxt += len;
 	KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__));
 	tp->rcv_wnd -= len;
 	tp->t_rcvtime = ticks;
 
 	so = inp_inpcbtosocket(inp);
 	sb = &so->so_rcv;
 	SOCKBUF_LOCK(sb);
 
 	if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
 		CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)",
 		    __func__, tid, len);
 		m_freem(m);
 		SOCKBUF_UNLOCK(sb);
 		INP_WUNLOCK(inp);
 
 		INP_INFO_RLOCK(&V_tcbinfo);
 		INP_WLOCK(inp);
 		tp = tcp_drop(tp, ECONNRESET);
 		if (tp)
 			INP_WUNLOCK(inp);
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 
 		return (0);
 	}
 
 	/* receive buffer autosize */
 	if (sb->sb_flags & SB_AUTOSIZE &&
 	    V_tcp_do_autorcvbuf &&
 	    sb->sb_hiwat < V_tcp_autorcvbuf_max &&
 	    len > (sbspace(sb) / 8 * 7)) {
 		unsigned int hiwat = sb->sb_hiwat;
 		unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc,
 		    V_tcp_autorcvbuf_max);
 
 		if (!sbreserve_locked(sb, newsize, so, NULL))
 			sb->sb_flags &= ~SB_AUTOSIZE;
 		else
 			toep->rx_credits += newsize - hiwat;
 	}
 
 	if (toep->ulp_mode == ULP_MODE_TCPDDP) {
 		int changed = !(toep->ddp_flags & DDP_ON) ^ cpl->ddp_off;
 
 		if (changed) {
 			if (toep->ddp_flags & DDP_SC_REQ)
 				toep->ddp_flags ^= DDP_ON | DDP_SC_REQ;
 			else {
 				KASSERT(cpl->ddp_off == 1,
 				    ("%s: DDP switched on by itself.",
 				    __func__));
 
 				/* Fell out of DDP mode */
 				toep->ddp_flags &= ~(DDP_ON | DDP_BUF0_ACTIVE |
 				    DDP_BUF1_ACTIVE);
 
 				if (ddp_placed)
 					insert_ddp_data(toep, ddp_placed);
 			}
 		}
 
 		if ((toep->ddp_flags & DDP_OK) == 0 &&
 		    time_uptime >= toep->ddp_disabled + DDP_RETRY_WAIT) {
 			toep->ddp_score = DDP_LOW_SCORE;
 			toep->ddp_flags |= DDP_OK;
 			CTR3(KTR_CXGBE, "%s: tid %u DDP_OK @ %u",
 			    __func__, tid, time_uptime);
 		}
 
 		if (toep->ddp_flags & DDP_ON) {
 
 			/*
 			 * CPL_RX_DATA with DDP on can only be an indicate.  Ask
 			 * soreceive to post a buffer or disable DDP.  The
 			 * payload that arrived in this indicate is appended to
 			 * the socket buffer as usual.
 			 */
 
 #if 0
 			CTR5(KTR_CXGBE,
 			    "%s: tid %u (0x%x) DDP indicate (seq 0x%x, len %d)",
 			    __func__, tid, toep->flags, be32toh(cpl->seq), len);
 #endif
 			sb->sb_flags |= SB_DDP_INDICATE;
 		} else if ((toep->ddp_flags & (DDP_OK|DDP_SC_REQ)) == DDP_OK &&
 		    tp->rcv_wnd > DDP_RSVD_WIN && len >= sc->tt.ddp_thres) {
 
 			/*
 			 * DDP allowed but isn't on (and a request to switch it
 			 * on isn't pending either), and conditions are ripe for
 			 * it to work.  Switch it on.
 			 */
 
 			enable_ddp(sc, toep);
 		}
 	}
 
 	KASSERT(toep->sb_cc >= sbused(sb),
 	    ("%s: sb %p has more data (%d) than last time (%d).",
 	    __func__, sb, sbused(sb), toep->sb_cc));
 	toep->rx_credits += toep->sb_cc - sbused(sb);
 	sbappendstream_locked(sb, m, 0);
 	toep->sb_cc = sbused(sb);
 	if (toep->rx_credits > 0 && toep->sb_cc + tp->rcv_wnd < sb->sb_lowat) {
 		int credits;
 
 		credits = send_rx_credits(sc, toep, toep->rx_credits);
 		toep->rx_credits -= credits;
 		tp->rcv_wnd += credits;
 		tp->rcv_adv += credits;
 	}
 	sorwakeup_locked(so);
 	SOCKBUF_UNLOCK_ASSERT(sb);
 
 	INP_WUNLOCK(inp);
 	return (0);
 }
 
 #define S_CPL_FW4_ACK_OPCODE    24
 #define M_CPL_FW4_ACK_OPCODE    0xff
 #define V_CPL_FW4_ACK_OPCODE(x) ((x) << S_CPL_FW4_ACK_OPCODE)
 #define G_CPL_FW4_ACK_OPCODE(x) \
     (((x) >> S_CPL_FW4_ACK_OPCODE) & M_CPL_FW4_ACK_OPCODE)
 
 #define S_CPL_FW4_ACK_FLOWID    0
 #define M_CPL_FW4_ACK_FLOWID    0xffffff
 #define V_CPL_FW4_ACK_FLOWID(x) ((x) << S_CPL_FW4_ACK_FLOWID)
 #define G_CPL_FW4_ACK_FLOWID(x) \
     (((x) >> S_CPL_FW4_ACK_FLOWID) & M_CPL_FW4_ACK_FLOWID)
 
 #define S_CPL_FW4_ACK_CR        24
 #define M_CPL_FW4_ACK_CR        0xff
 #define V_CPL_FW4_ACK_CR(x)     ((x) << S_CPL_FW4_ACK_CR)
 #define G_CPL_FW4_ACK_CR(x)     (((x) >> S_CPL_FW4_ACK_CR) & M_CPL_FW4_ACK_CR)
 
 #define S_CPL_FW4_ACK_SEQVAL    0
 #define M_CPL_FW4_ACK_SEQVAL    0x1
 #define V_CPL_FW4_ACK_SEQVAL(x) ((x) << S_CPL_FW4_ACK_SEQVAL)
 #define G_CPL_FW4_ACK_SEQVAL(x) \
     (((x) >> S_CPL_FW4_ACK_SEQVAL) & M_CPL_FW4_ACK_SEQVAL)
 #define F_CPL_FW4_ACK_SEQVAL    V_CPL_FW4_ACK_SEQVAL(1U)
 
 static int
 do_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_fw4_ack *cpl = (const void *)(rss + 1);
 	unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl)));
 	struct toepcb *toep = lookup_tid(sc, tid);
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct socket *so;
 	uint8_t credits = cpl->credits;
 	struct ofld_tx_sdesc *txsd;
 	int plen;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_FW4_ACK_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	/*
 	 * Very unusual case: we'd sent a flowc + abort_req for a synq entry and
 	 * now this comes back carrying the credits for the flowc.
 	 */
 	if (__predict_false(toep->flags & TPF_SYNQE)) {
 		KASSERT(toep->flags & TPF_ABORT_SHUTDOWN,
 		    ("%s: credits for a synq entry %p", __func__, toep));
 		return (0);
 	}
 
 	inp = toep->inp;
 
 	KASSERT(opcode == CPL_FW4_ACK,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
 
 	INP_WLOCK(inp);
 
 	if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) {
 		INP_WUNLOCK(inp);
 		return (0);
 	}
 
 	KASSERT((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0,
 	    ("%s: inp_flags 0x%x", __func__, inp->inp_flags));
 
 	tp = intotcpcb(inp);
 
 	if (cpl->flags & CPL_FW4_ACK_FLAGS_SEQVAL) {
 		tcp_seq snd_una = be32toh(cpl->snd_una);
 
 #ifdef INVARIANTS
 		if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) {
 			log(LOG_ERR,
 			    "%s: unexpected seq# %x for TID %u, snd_una %x\n",
 			    __func__, snd_una, toep->tid, tp->snd_una);
 		}
 #endif
 
 		if (tp->snd_una != snd_una) {
 			tp->snd_una = snd_una;
 			tp->ts_recent_age = tcp_ts_getticks();
 		}
 	}
 
 	so = inp->inp_socket;
 	txsd = &toep->txsd[toep->txsd_cidx];
 	plen = 0;
 	while (credits) {
 		KASSERT(credits >= txsd->tx_credits,
 		    ("%s: too many (or partial) credits", __func__));
 		credits -= txsd->tx_credits;
 		toep->tx_credits += txsd->tx_credits;
 		plen += txsd->plen;
 		txsd++;
 		toep->txsd_avail++;
 		KASSERT(toep->txsd_avail <= toep->txsd_total,
 		    ("%s: txsd avail > total", __func__));
 		if (__predict_false(++toep->txsd_cidx == toep->txsd_total)) {
 			txsd = &toep->txsd[0];
 			toep->txsd_cidx = 0;
 		}
 	}
 
 	if (toep->tx_credits == toep->tx_total) {
 		toep->tx_nocompl = 0;
 		toep->plen_nocompl = 0;
 	}
 
 	if (toep->flags & TPF_TX_SUSPENDED &&
 	    toep->tx_credits >= toep->tx_total / 4) {
 		toep->flags &= ~TPF_TX_SUSPENDED;
 		if (toep->ulp_mode == ULP_MODE_ISCSI)
 			t4_push_pdus(sc, toep, plen);
 		else
 			t4_push_frames(sc, toep, plen);
 	} else if (plen > 0) {
 		struct sockbuf *sb = &so->so_snd;
 		int sbu;
 
 		SOCKBUF_LOCK(sb);
 		sbu = sbused(sb);
 		if (toep->ulp_mode == ULP_MODE_ISCSI) {
 
 			if (__predict_false(sbu > 0)) {
 				/*
 				 * The data trasmitted before the tid's ULP mode
 				 * changed to ISCSI is still in so_snd.
 				 * Incoming credits should account for so_snd
 				 * first.
 				 */
 				sbdrop_locked(sb, min(sbu, plen));
 				plen -= min(sbu, plen);
 			}
 			sowwakeup_locked(so);	/* unlocks so_snd */
 			rqdrop_locked(&toep->ulp_pdu_reclaimq, plen);
 		} else {
 			sbdrop_locked(sb, plen);
 			sowwakeup_locked(so);	/* unlocks so_snd */
 		}
 		SOCKBUF_UNLOCK_ASSERT(sb);
 	}
 
 	INP_WUNLOCK(inp);
 
 	return (0);
 }
 
 static int
 do_set_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1);
 	unsigned int tid = GET_TID(cpl);
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_SET_TCB_RPL,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 
 	if (is_ftid(sc, tid))
 		return (t4_filter_rpl(iq, rss, m)); /* TCB is a filter */
 
 	/*
 	 * TOM and/or other ULPs don't request replies for CPL_SET_TCB or
 	 * CPL_SET_TCB_FIELD requests.  This can easily change and when it does
 	 * the dispatch code will go here.
 	 */
 #ifdef INVARIANTS
 	panic("%s: Unexpected CPL_SET_TCB_RPL for tid %u on iq %p", __func__,
 	    tid, iq);
 #else
 	log(LOG_ERR, "%s: Unexpected CPL_SET_TCB_RPL for tid %u on iq %p\n",
 	    __func__, tid, iq);
 #endif
 
 	return (0);
 }
 
 void
 t4_set_tcb_field(struct adapter *sc, struct toepcb *toep, int ctrl,
     uint16_t word, uint64_t mask, uint64_t val)
 {
 	struct wrqe *wr;
 	struct cpl_set_tcb_field *req;
 
 	wr = alloc_wrqe(sizeof(*req), ctrl ? toep->ctrlq : toep->ofld_txq);
 	if (wr == NULL) {
 		/* XXX */
 		panic("%s: allocation failure.", __func__);
 	}
 	req = wrtod(wr);
 
 	INIT_TP_WR_MIT_CPL(req, CPL_SET_TCB_FIELD, toep->tid);
 	req->reply_ctrl = htobe16(V_NO_REPLY(1) |
 	    V_QUEUENO(toep->ofld_rxq->iq.abs_id));
 	req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(0));
 	req->mask = htobe64(mask);
 	req->val = htobe64(val);
 
 	t4_wrq_tx(sc, wr);
 }
 
 void
 t4_init_cpl_io_handlers(struct adapter *sc)
 {
 
 	t4_register_cpl_handler(sc, CPL_PEER_CLOSE, do_peer_close);
 	t4_register_cpl_handler(sc, CPL_CLOSE_CON_RPL, do_close_con_rpl);
 	t4_register_cpl_handler(sc, CPL_ABORT_REQ_RSS, do_abort_req);
 	t4_register_cpl_handler(sc, CPL_ABORT_RPL_RSS, do_abort_rpl);
 	t4_register_cpl_handler(sc, CPL_RX_DATA, do_rx_data);
 	t4_register_cpl_handler(sc, CPL_FW4_ACK, do_fw4_ack);
 	t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, do_set_tcb_rpl);
 }
 
 void
 t4_uninit_cpl_io_handlers(struct adapter *sc)
 {
 
 	t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, t4_filter_rpl);
 }
 #endif
Index: projects/release-pkg/sys/dev/cxgbe/tom/t4_listen.c
===================================================================
--- projects/release-pkg/sys/dev/cxgbe/tom/t4_listen.c	(revision 293335)
+++ projects/release-pkg/sys/dev/cxgbe/tom/t4_listen.c	(revision 293336)
@@ -1,1598 +1,1598 @@
 /*-
  * Copyright (c) 2012 Chelsio Communications, Inc.
  * All rights reserved.
  * Written by: Navdeep Parhar <np@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #ifdef TCP_OFFLOAD
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/module.h>
 #include <sys/protosw.h>
 #include <sys/refcount.h>
 #include <sys/domain.h>
 #include <sys/fnv_hash.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_vlan_var.h>
 #include <net/route.h>
 #include <netinet/in.h>
+#include <netinet/in_fib.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
+#include <netinet6/in6_fib.h>
 #include <netinet6/scope6_var.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #define TCPSTATES
 #include <netinet/tcp_fsm.h>
 #include <netinet/toecore.h>
 
 #include "common/common.h"
 #include "common/t4_msg.h"
 #include "common/t4_regs.h"
 #include "tom/t4_tom_l2t.h"
 #include "tom/t4_tom.h"
 
 /* stid services */
 static int alloc_stid(struct adapter *, struct listen_ctx *, int);
 static struct listen_ctx *lookup_stid(struct adapter *, int);
 static void free_stid(struct adapter *, struct listen_ctx *);
 
 /* lctx services */
 static struct listen_ctx *alloc_lctx(struct adapter *, struct inpcb *,
     struct vi_info *);
 static int free_lctx(struct adapter *, struct listen_ctx *);
 static void hold_lctx(struct listen_ctx *);
 static void listen_hash_add(struct adapter *, struct listen_ctx *);
 static struct listen_ctx *listen_hash_find(struct adapter *, struct inpcb *);
 static struct listen_ctx *listen_hash_del(struct adapter *, struct inpcb *);
 static struct inpcb *release_lctx(struct adapter *, struct listen_ctx *);
 
 static inline void save_qids_in_mbuf(struct mbuf *, struct vi_info *);
 static inline void get_qids_from_mbuf(struct mbuf *m, int *, int *);
 static void send_reset_synqe(struct toedev *, struct synq_entry *);
 
 static int
 alloc_stid(struct adapter *sc, struct listen_ctx *lctx, int isipv6)
 {
 	struct tid_info *t = &sc->tids;
 	u_int stid, n, f, mask;
 	struct stid_region *sr = &lctx->stid_region;
 
 	/*
 	 * An IPv6 server needs 2 naturally aligned stids (1 stid = 4 cells) in
 	 * the TCAM.  The start of the stid region is properly aligned (the chip
 	 * requires each region to be 128-cell aligned).
 	 */
 	n = isipv6 ? 2 : 1;
 	mask = n - 1;
 	KASSERT((t->stid_base & mask) == 0 && (t->nstids & mask) == 0,
 	    ("%s: stid region (%u, %u) not properly aligned.  n = %u",
 	    __func__, t->stid_base, t->nstids, n));
 
 	mtx_lock(&t->stid_lock);
 	if (n > t->nstids - t->stids_in_use) {
 		mtx_unlock(&t->stid_lock);
 		return (-1);
 	}
 
 	if (t->nstids_free_head >= n) {
 		/*
 		 * This allocation will definitely succeed because the region
 		 * starts at a good alignment and we just checked we have enough
 		 * stids free.
 		 */
 		f = t->nstids_free_head & mask;
 		t->nstids_free_head -= n + f;
 		stid = t->nstids_free_head;
 		TAILQ_INSERT_HEAD(&t->stids, sr, link);
 	} else {
 		struct stid_region *s;
 
 		stid = t->nstids_free_head;
 		TAILQ_FOREACH(s, &t->stids, link) {
 			stid += s->used + s->free;
 			f = stid & mask;
 			if (s->free >= n + f) {
 				stid -= n + f;
 				s->free -= n + f;
 				TAILQ_INSERT_AFTER(&t->stids, s, sr, link);
 				goto allocated;
 			}
 		}
 
 		if (__predict_false(stid != t->nstids)) {
 			panic("%s: stids TAILQ (%p) corrupt."
 			    "  At %d instead of %d at the end of the queue.",
 			    __func__, &t->stids, stid, t->nstids);
 		}
 
 		mtx_unlock(&t->stid_lock);
 		return (-1);
 	}
 
 allocated:
 	sr->used = n;
 	sr->free = f;
 	t->stids_in_use += n;
 	t->stid_tab[stid] = lctx;
 	mtx_unlock(&t->stid_lock);
 
 	KASSERT(((stid + t->stid_base) & mask) == 0,
 	    ("%s: EDOOFUS.", __func__));
 	return (stid + t->stid_base);
 }
 
 static struct listen_ctx *
 lookup_stid(struct adapter *sc, int stid)
 {
 	struct tid_info *t = &sc->tids;
 
 	return (t->stid_tab[stid - t->stid_base]);
 }
 
 static void
 free_stid(struct adapter *sc, struct listen_ctx *lctx)
 {
 	struct tid_info *t = &sc->tids;
 	struct stid_region *sr = &lctx->stid_region;
 	struct stid_region *s;
 
 	KASSERT(sr->used > 0, ("%s: nonsense free (%d)", __func__, sr->used));
 
 	mtx_lock(&t->stid_lock);
 	s = TAILQ_PREV(sr, stid_head, link);
 	if (s != NULL)
 		s->free += sr->used + sr->free;
 	else
 		t->nstids_free_head += sr->used + sr->free;
 	KASSERT(t->stids_in_use >= sr->used,
 	    ("%s: stids_in_use (%u) < stids being freed (%u)", __func__,
 	    t->stids_in_use, sr->used));
 	t->stids_in_use -= sr->used;
 	TAILQ_REMOVE(&t->stids, sr, link);
 	mtx_unlock(&t->stid_lock);
 }
 
 static struct listen_ctx *
 alloc_lctx(struct adapter *sc, struct inpcb *inp, struct vi_info *vi)
 {
 	struct listen_ctx *lctx;
 
 	INP_WLOCK_ASSERT(inp);
 
 	lctx = malloc(sizeof(struct listen_ctx), M_CXGBE, M_NOWAIT | M_ZERO);
 	if (lctx == NULL)
 		return (NULL);
 
 	lctx->stid = alloc_stid(sc, lctx, inp->inp_vflag & INP_IPV6);
 	if (lctx->stid < 0) {
 		free(lctx, M_CXGBE);
 		return (NULL);
 	}
 
 	if (inp->inp_vflag & INP_IPV6 &&
 	    !IN6_ARE_ADDR_EQUAL(&in6addr_any, &inp->in6p_laddr)) {
 		struct tom_data *td = sc->tom_softc;
 
 		lctx->ce = hold_lip(td, &inp->in6p_laddr);
 		if (lctx->ce == NULL) {
 			free(lctx, M_CXGBE);
 			return (NULL);
 		}
 	}
 
 	lctx->ctrlq = &sc->sge.ctrlq[vi->pi->port_id];
 	lctx->ofld_rxq = &sc->sge.ofld_rxq[vi->first_ofld_rxq];
 	refcount_init(&lctx->refcount, 1);
 	TAILQ_INIT(&lctx->synq);
 
 	lctx->inp = inp;
 	in_pcbref(inp);
 
 	return (lctx);
 }
 
 /* Don't call this directly, use release_lctx instead */
 static int
 free_lctx(struct adapter *sc, struct listen_ctx *lctx)
 {
 	struct inpcb *inp = lctx->inp;
 	struct tom_data *td = sc->tom_softc;
 
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(lctx->refcount == 0,
 	    ("%s: refcount %d", __func__, lctx->refcount));
 	KASSERT(TAILQ_EMPTY(&lctx->synq),
 	    ("%s: synq not empty.", __func__));
 	KASSERT(lctx->stid >= 0, ("%s: bad stid %d.", __func__, lctx->stid));
 
 	CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, inp %p",
 	    __func__, lctx->stid, lctx, lctx->inp);
 
 	if (lctx->ce)
 		release_lip(td, lctx->ce);
 	free_stid(sc, lctx);
 	free(lctx, M_CXGBE);
 
 	return (in_pcbrele_wlocked(inp));
 }
 
 static void
 hold_lctx(struct listen_ctx *lctx)
 {
 
 	refcount_acquire(&lctx->refcount);
 }
 
 static inline uint32_t
 listen_hashfn(void *key, u_long mask)
 {
 
 	return (fnv_32_buf(&key, sizeof(key), FNV1_32_INIT) & mask);
 }
 
 /*
  * Add a listen_ctx entry to the listen hash table.
  */
 static void
 listen_hash_add(struct adapter *sc, struct listen_ctx *lctx)
 {
 	struct tom_data *td = sc->tom_softc;
 	int bucket = listen_hashfn(lctx->inp, td->listen_mask);
 
 	mtx_lock(&td->lctx_hash_lock);
 	LIST_INSERT_HEAD(&td->listen_hash[bucket], lctx, link);
 	td->lctx_count++;
 	mtx_unlock(&td->lctx_hash_lock);
 }
 
 /*
  * Look for the listening socket's context entry in the hash and return it.
  */
 static struct listen_ctx *
 listen_hash_find(struct adapter *sc, struct inpcb *inp)
 {
 	struct tom_data *td = sc->tom_softc;
 	int bucket = listen_hashfn(inp, td->listen_mask);
 	struct listen_ctx *lctx;
 
 	mtx_lock(&td->lctx_hash_lock);
 	LIST_FOREACH(lctx, &td->listen_hash[bucket], link) {
 		if (lctx->inp == inp)
 			break;
 	}
 	mtx_unlock(&td->lctx_hash_lock);
 
 	return (lctx);
 }
 
 /*
  * Removes the listen_ctx structure for inp from the hash and returns it.
  */
 static struct listen_ctx *
 listen_hash_del(struct adapter *sc, struct inpcb *inp)
 {
 	struct tom_data *td = sc->tom_softc;
 	int bucket = listen_hashfn(inp, td->listen_mask);
 	struct listen_ctx *lctx, *l;
 
 	mtx_lock(&td->lctx_hash_lock);
 	LIST_FOREACH_SAFE(lctx, &td->listen_hash[bucket], link, l) {
 		if (lctx->inp == inp) {
 			LIST_REMOVE(lctx, link);
 			td->lctx_count--;
 			break;
 		}
 	}
 	mtx_unlock(&td->lctx_hash_lock);
 
 	return (lctx);
 }
 
 /*
  * Releases a hold on the lctx.  Must be called with the listening socket's inp
  * locked.  The inp may be freed by this function and it returns NULL to
  * indicate this.
  */
 static struct inpcb *
 release_lctx(struct adapter *sc, struct listen_ctx *lctx)
 {
 	struct inpcb *inp = lctx->inp;
 	int inp_freed = 0;
 
 	INP_WLOCK_ASSERT(inp);
 	if (refcount_release(&lctx->refcount))
 		inp_freed = free_lctx(sc, lctx);
 
 	return (inp_freed ? NULL : inp);
 }
 
 static void
 send_reset_synqe(struct toedev *tod, struct synq_entry *synqe)
 {
 	struct adapter *sc = tod->tod_softc;
 	struct mbuf *m = synqe->syn;
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct vi_info *vi = ifp->if_softc;
 	struct port_info *pi = vi->pi;
 	struct l2t_entry *e = &sc->l2t->l2tab[synqe->l2e_idx];
 	struct wrqe *wr;
 	struct fw_flowc_wr *flowc;
 	struct cpl_abort_req *req;
 	int txqid, rxqid, flowclen;
 	struct sge_wrq *ofld_txq;
 	struct sge_ofld_rxq *ofld_rxq;
 	const int nparams = 6;
 	unsigned int pfvf = G_FW_VIID_PFN(vi->viid) << S_FW_VIID_PFN;
 
 	INP_WLOCK_ASSERT(synqe->lctx->inp);
 
 	CTR5(KTR_CXGBE, "%s: synqe %p (0x%x), tid %d%s",
 	    __func__, synqe, synqe->flags, synqe->tid,
 	    synqe->flags & TPF_ABORT_SHUTDOWN ?
 	    " (abort already in progress)" : "");
 	if (synqe->flags & TPF_ABORT_SHUTDOWN)
 		return;	/* abort already in progress */
 	synqe->flags |= TPF_ABORT_SHUTDOWN;
 
 	get_qids_from_mbuf(m, &txqid, &rxqid);
 	ofld_txq = &sc->sge.ofld_txq[txqid];
 	ofld_rxq = &sc->sge.ofld_rxq[rxqid];
 
 	/* The wrqe will have two WRs - a flowc followed by an abort_req */
 	flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval);
 
 	wr = alloc_wrqe(roundup2(flowclen, EQ_ESIZE) + sizeof(*req), ofld_txq);
 	if (wr == NULL) {
 		/* XXX */
 		panic("%s: allocation failure.", __func__);
 	}
 	flowc = wrtod(wr);
 	req = (void *)((caddr_t)flowc + roundup2(flowclen, EQ_ESIZE));
 
 	/* First the flowc ... */
 	memset(flowc, 0, wr->wr_len);
 	flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) |
 	    V_FW_FLOWC_WR_NPARAMS(nparams));
 	flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) |
 	    V_FW_WR_FLOWID(synqe->tid));
 	flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
 	flowc->mnemval[0].val = htobe32(pfvf);
 	flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
 	flowc->mnemval[1].val = htobe32(pi->tx_chan);
 	flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
 	flowc->mnemval[2].val = htobe32(pi->tx_chan);
 	flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
 	flowc->mnemval[3].val = htobe32(ofld_rxq->iq.abs_id);
  	flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDBUF;
  	flowc->mnemval[4].val = htobe32(512);
  	flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_MSS;
  	flowc->mnemval[5].val = htobe32(512);
 	synqe->flags |= TPF_FLOWC_WR_SENT;
 
 	/* ... then ABORT request */
 	INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, synqe->tid);
 	req->rsvd0 = 0;	/* don't have a snd_nxt */
 	req->rsvd1 = 1;	/* no data sent yet */
 	req->cmd = CPL_ABORT_SEND_RST;
 
 	t4_l2t_send(sc, wr, e);
 }
 
 static int
 create_server(struct adapter *sc, struct listen_ctx *lctx)
 {
 	struct wrqe *wr;
 	struct cpl_pass_open_req *req;
 	struct inpcb *inp = lctx->inp;
 
 	wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
 	if (wr == NULL) {
 		log(LOG_ERR, "%s: allocation failure", __func__);
 		return (ENOMEM);
 	}
 	req = wrtod(wr);
 
 	INIT_TP_WR(req, 0);
 	OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, lctx->stid));
 	req->local_port = inp->inp_lport;
 	req->peer_port = 0;
 	req->local_ip = inp->inp_laddr.s_addr;
 	req->peer_ip = 0;
 	req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan));
 	req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) |
 	    F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id));
 
 	t4_wrq_tx(sc, wr);
 	return (0);
 }
 
 static int
 create_server6(struct adapter *sc, struct listen_ctx *lctx)
 {
 	struct wrqe *wr;
 	struct cpl_pass_open_req6 *req;
 	struct inpcb *inp = lctx->inp;
 
 	wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
 	if (wr == NULL) {
 		log(LOG_ERR, "%s: allocation failure", __func__);
 		return (ENOMEM);
 	}
 	req = wrtod(wr);
 
 	INIT_TP_WR(req, 0);
 	OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ6, lctx->stid));
 	req->local_port = inp->inp_lport;
 	req->peer_port = 0;
 	req->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0];
 	req->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8];
 	req->peer_ip_hi = 0;
 	req->peer_ip_lo = 0;
 	req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan));
 	req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) |
 	    F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id));
 
 	t4_wrq_tx(sc, wr);
 	return (0);
 }
 
 static int
 destroy_server(struct adapter *sc, struct listen_ctx *lctx)
 {
 	struct wrqe *wr;
 	struct cpl_close_listsvr_req *req;
 
 	wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
 	if (wr == NULL) {
 		/* XXX */
 		panic("%s: allocation failure.", __func__);
 	}
 	req = wrtod(wr);
 
 	INIT_TP_WR(req, 0);
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ,
 	    lctx->stid));
 	req->reply_ctrl = htobe16(lctx->ofld_rxq->iq.abs_id);
 	req->rsvd = htobe16(0);
 
 	t4_wrq_tx(sc, wr);
 	return (0);
 }
 
 /*
  * Start a listening server by sending a passive open request to HW.
  *
  * Can't take adapter lock here and access to sc->flags,
  * sc->offload_map, if_capenable are all race prone.
  */
 int
 t4_listen_start(struct toedev *tod, struct tcpcb *tp)
 {
 	struct adapter *sc = tod->tod_softc;
 	struct vi_info *vi;
 	struct port_info *pi;
 	struct inpcb *inp = tp->t_inpcb;
 	struct listen_ctx *lctx;
 	int i, rc, v;
 
 	INP_WLOCK_ASSERT(inp);
 
 	/* Don't start a hardware listener for any loopback address. */
 	if (inp->inp_vflag & INP_IPV6 && IN6_IS_ADDR_LOOPBACK(&inp->in6p_laddr))
 		return (0);
 	if (!(inp->inp_vflag & INP_IPV6) &&
 	    IN_LOOPBACK(ntohl(inp->inp_laddr.s_addr)))
 		return (0);
 #if 0
 	ADAPTER_LOCK(sc);
 	if (IS_BUSY(sc)) {
 		log(LOG_ERR, "%s: listen request ignored, %s is busy",
 		    __func__, device_get_nameunit(sc->dev));
 		goto done;
 	}
 
 	KASSERT(uld_active(sc, ULD_TOM),
 	    ("%s: TOM not initialized", __func__));
 #endif
 
 	/*
 	 * Find a running VI with IFCAP_TOE (4 or 6).  We'll use the first
 	 * such VI's queues to send the passive open and receive the reply to
 	 * it.
 	 *
 	 * XXX: need a way to mark a port in use by offload.  if_cxgbe should
 	 * then reject any attempt to bring down such a port (and maybe reject
 	 * attempts to disable IFCAP_TOE on that port too?).
 	 */
 	for_each_port(sc, i) {
 		pi = sc->port[i];
 		for_each_vi(pi, v, vi) {
 			if (vi->ifp->if_drv_flags & IFF_DRV_RUNNING &&
 			    vi->ifp->if_capenable & IFCAP_TOE)
 				goto found;
 		}
 	}
 	goto done;	/* no port that's UP with IFCAP_TOE enabled */
 found:
 
 	if (listen_hash_find(sc, inp) != NULL)
 		goto done;	/* already setup */
 
 	lctx = alloc_lctx(sc, inp, vi);
 	if (lctx == NULL) {
 		log(LOG_ERR,
 		    "%s: listen request ignored, %s couldn't allocate lctx\n",
 		    __func__, device_get_nameunit(sc->dev));
 		goto done;
 	}
 	listen_hash_add(sc, lctx);
 
 	CTR6(KTR_CXGBE, "%s: stid %u (%s), lctx %p, inp %p vflag 0x%x",
 	    __func__, lctx->stid, tcpstates[tp->t_state], lctx, inp,
 	    inp->inp_vflag);
 
 	if (inp->inp_vflag & INP_IPV6)
 		rc = create_server6(sc, lctx);
 	else
 		rc = create_server(sc, lctx);
 	if (rc != 0) {
 		log(LOG_ERR, "%s: %s failed to create hw listener: %d.\n",
 		    __func__, device_get_nameunit(sc->dev), rc);
 		(void) listen_hash_del(sc, inp);
 		inp = release_lctx(sc, lctx);
 		/* can't be freed, host stack has a reference */
 		KASSERT(inp != NULL, ("%s: inp freed", __func__));
 		goto done;
 	}
 	lctx->flags |= LCTX_RPL_PENDING;
 done:
 #if 0
 	ADAPTER_UNLOCK(sc);
 #endif
 	return (0);
 }
 
 int
 t4_listen_stop(struct toedev *tod, struct tcpcb *tp)
 {
 	struct listen_ctx *lctx;
 	struct adapter *sc = tod->tod_softc;
 	struct inpcb *inp = tp->t_inpcb;
 	struct synq_entry *synqe;
 
 	INP_WLOCK_ASSERT(inp);
 
 	lctx = listen_hash_del(sc, inp);
 	if (lctx == NULL)
 		return (ENOENT);	/* no hardware listener for this inp */
 
 	CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, flags %x", __func__, lctx->stid,
 	    lctx, lctx->flags);
 
 	/*
 	 * If the reply to the PASS_OPEN is still pending we'll wait for it to
 	 * arrive and clean up when it does.
 	 */
 	if (lctx->flags & LCTX_RPL_PENDING) {
 		KASSERT(TAILQ_EMPTY(&lctx->synq),
 		    ("%s: synq not empty.", __func__));
 		return (EINPROGRESS);
 	}
 
 	/*
 	 * The host stack will abort all the connections on the listening
 	 * socket's so_comp.  It doesn't know about the connections on the synq
 	 * so we need to take care of those.
 	 */
 	TAILQ_FOREACH(synqe, &lctx->synq, link) {
 		if (synqe->flags & TPF_SYNQE_HAS_L2TE)
 			send_reset_synqe(tod, synqe);
 	}
 
 	destroy_server(sc, lctx);
 	return (0);
 }
 
 static inline void
 hold_synqe(struct synq_entry *synqe)
 {
 
 	refcount_acquire(&synqe->refcnt);
 }
 
 static inline void
 release_synqe(struct synq_entry *synqe)
 {
 
 	if (refcount_release(&synqe->refcnt)) {
 		int needfree = synqe->flags & TPF_SYNQE_NEEDFREE;
 
 		m_freem(synqe->syn);
 		if (needfree)
 			free(synqe, M_CXGBE);
 	}
 }
 
 void
 t4_syncache_added(struct toedev *tod __unused, void *arg)
 {
 	struct synq_entry *synqe = arg;
 
 	hold_synqe(synqe);
 }
 
 void
 t4_syncache_removed(struct toedev *tod __unused, void *arg)
 {
 	struct synq_entry *synqe = arg;
 
 	release_synqe(synqe);
 }
 
 /* XXX */
 extern void tcp_dooptions(struct tcpopt *, u_char *, int, int);
 
 int
 t4_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m)
 {
 	struct adapter *sc = tod->tod_softc;
 	struct synq_entry *synqe = arg;
 	struct wrqe *wr;
 	struct l2t_entry *e;
 	struct tcpopt to;
 	struct ip *ip = mtod(m, struct ip *);
 	struct tcphdr *th;
 
 	wr = (struct wrqe *)atomic_readandclear_ptr(&synqe->wr);
 	if (wr == NULL) {
 		m_freem(m);
 		return (EALREADY);
 	}
 
 	if (ip->ip_v == IPVERSION)
 		th = (void *)(ip + 1);
 	else
 		th = (void *)((struct ip6_hdr *)ip + 1);
 	bzero(&to, sizeof(to));
 	tcp_dooptions(&to, (void *)(th + 1), (th->th_off << 2) - sizeof(*th),
 	    TO_SYN);
 
 	/* save these for later */
 	synqe->iss = be32toh(th->th_seq);
 	synqe->ts = to.to_tsval;
 
 	if (is_t5(sc)) {
 		struct cpl_t5_pass_accept_rpl *rpl5 = wrtod(wr);
 
 		rpl5->iss = th->th_seq;
 	}
 
 	e = &sc->l2t->l2tab[synqe->l2e_idx];
 	t4_l2t_send(sc, wr, e);
 
 	m_freem(m);	/* don't need this any more */
 	return (0);
 }
 
 static int
 do_pass_open_rpl(struct sge_iq *iq, const struct rss_header *rss,
     struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_pass_open_rpl *cpl = (const void *)(rss + 1);
 	int stid = GET_TID(cpl);
 	unsigned int status = cpl->status;
 	struct listen_ctx *lctx = lookup_stid(sc, stid);
 	struct inpcb *inp = lctx->inp;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_PASS_OPEN_RPL,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 	KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
 
 	INP_WLOCK(inp);
 
 	CTR4(KTR_CXGBE, "%s: stid %d, status %u, flags 0x%x",
 	    __func__, stid, status, lctx->flags);
 
 	lctx->flags &= ~LCTX_RPL_PENDING;
 
 	if (status != CPL_ERR_NONE)
 		log(LOG_ERR, "listener (stid %u) failed: %d\n", stid, status);
 
 #ifdef INVARIANTS
 	/*
 	 * If the inp has been dropped (listening socket closed) then
 	 * listen_stop must have run and taken the inp out of the hash.
 	 */
 	if (inp->inp_flags & INP_DROPPED) {
 		KASSERT(listen_hash_del(sc, inp) == NULL,
 		    ("%s: inp %p still in listen hash", __func__, inp));
 	}
 #endif
 
 	if (inp->inp_flags & INP_DROPPED && status != CPL_ERR_NONE) {
 		if (release_lctx(sc, lctx) != NULL)
 			INP_WUNLOCK(inp);
 		return (status);
 	}
 
 	/*
 	 * Listening socket stopped listening earlier and now the chip tells us
 	 * it has started the hardware listener.  Stop it; the lctx will be
 	 * released in do_close_server_rpl.
 	 */
 	if (inp->inp_flags & INP_DROPPED) {
 		destroy_server(sc, lctx);
 		INP_WUNLOCK(inp);
 		return (status);
 	}
 
 	/*
 	 * Failed to start hardware listener.  Take inp out of the hash and
 	 * release our reference on it.  An error message has been logged
 	 * already.
 	 */
 	if (status != CPL_ERR_NONE) {
 		listen_hash_del(sc, inp);
 		if (release_lctx(sc, lctx) != NULL)
 			INP_WUNLOCK(inp);
 		return (status);
 	}
 
 	/* hardware listener open for business */
 
 	INP_WUNLOCK(inp);
 	return (status);
 }
 
 static int
 do_close_server_rpl(struct sge_iq *iq, const struct rss_header *rss,
     struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_close_listsvr_rpl *cpl = (const void *)(rss + 1);
 	int stid = GET_TID(cpl);
 	unsigned int status = cpl->status;
 	struct listen_ctx *lctx = lookup_stid(sc, stid);
 	struct inpcb *inp = lctx->inp;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_CLOSE_LISTSRV_RPL,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 	KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
 
 	CTR3(KTR_CXGBE, "%s: stid %u, status %u", __func__, stid, status);
 
 	if (status != CPL_ERR_NONE) {
 		log(LOG_ERR, "%s: failed (%u) to close listener for stid %u\n",
 		    __func__, status, stid);
 		return (status);
 	}
 
 	INP_WLOCK(inp);
 	inp = release_lctx(sc, lctx);
 	if (inp != NULL)
 		INP_WUNLOCK(inp);
 
 	return (status);
 }
 
 static void
 done_with_synqe(struct adapter *sc, struct synq_entry *synqe)
 {
 	struct listen_ctx *lctx = synqe->lctx;
 	struct inpcb *inp = lctx->inp;
 	struct vi_info *vi = synqe->syn->m_pkthdr.rcvif->if_softc;
 	struct l2t_entry *e = &sc->l2t->l2tab[synqe->l2e_idx];
 
 	INP_WLOCK_ASSERT(inp);
 
 	TAILQ_REMOVE(&lctx->synq, synqe, link);
 	inp = release_lctx(sc, lctx);
 	if (inp)
 		INP_WUNLOCK(inp);
 	remove_tid(sc, synqe->tid);
 	release_tid(sc, synqe->tid, &sc->sge.ctrlq[vi->pi->port_id]);
 	t4_l2t_release(e);
 	release_synqe(synqe);	/* removed from synq list */
 }
 
 int
 do_abort_req_synqe(struct sge_iq *iq, const struct rss_header *rss,
     struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1);
 	unsigned int tid = GET_TID(cpl);
 	struct synq_entry *synqe = lookup_tid(sc, tid);
 	struct listen_ctx *lctx = synqe->lctx;
 	struct inpcb *inp = lctx->inp;
 	int txqid;
 	struct sge_wrq *ofld_txq;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_ABORT_REQ_RSS,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 	KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__));
 
 	CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d",
 	    __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status);
 
 	if (negative_advice(cpl->status))
 		return (0);	/* Ignore negative advice */
 
 	INP_WLOCK(inp);
 
 	get_qids_from_mbuf(synqe->syn, &txqid, NULL);
 	ofld_txq = &sc->sge.ofld_txq[txqid];
 
 	/*
 	 * If we'd initiated an abort earlier the reply to it is responsible for
 	 * cleaning up resources.  Otherwise we tear everything down right here
 	 * right now.  We owe the T4 a CPL_ABORT_RPL no matter what.
 	 */
 	if (synqe->flags & TPF_ABORT_SHUTDOWN) {
 		INP_WUNLOCK(inp);
 		goto done;
 	}
 
 	done_with_synqe(sc, synqe);
 	/* inp lock released by done_with_synqe */
 done:
 	send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
 	return (0);
 }
 
 int
 do_abort_rpl_synqe(struct sge_iq *iq, const struct rss_header *rss,
     struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1);
 	unsigned int tid = GET_TID(cpl);
 	struct synq_entry *synqe = lookup_tid(sc, tid);
 	struct listen_ctx *lctx = synqe->lctx;
 	struct inpcb *inp = lctx->inp;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_ABORT_RPL_RSS,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 	KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__));
 
 	CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d",
 	    __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status);
 
 	INP_WLOCK(inp);
 	KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
 	    ("%s: wasn't expecting abort reply for synqe %p (0x%x)",
 	    __func__, synqe, synqe->flags));
 
 	done_with_synqe(sc, synqe);
 	/* inp lock released by done_with_synqe */
 
 	return (0);
 }
 
 void
 t4_offload_socket(struct toedev *tod, void *arg, struct socket *so)
 {
 	struct adapter *sc = tod->tod_softc;
 	struct synq_entry *synqe = arg;
 #ifdef INVARIANTS
 	struct inpcb *inp = sotoinpcb(so);
 #endif
 	struct cpl_pass_establish *cpl = mtod(synqe->syn, void *);
 	struct toepcb *toep = *(struct toepcb **)(cpl + 1);
 
 	INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */
 	INP_WLOCK_ASSERT(inp);
 	KASSERT(synqe->flags & TPF_SYNQE,
 	    ("%s: %p not a synq_entry?", __func__, arg));
 
 	offload_socket(so, toep);
 	make_established(toep, cpl->snd_isn, cpl->rcv_isn, cpl->tcp_opt);
 	toep->flags |= TPF_CPL_PENDING;
 	update_tid(sc, synqe->tid, toep);
 	synqe->flags |= TPF_SYNQE_EXPANDED;
 }
 
 static inline void
 save_qids_in_mbuf(struct mbuf *m, struct vi_info *vi)
 {
 	uint32_t txqid, rxqid;
 
 	txqid = (arc4random() % vi->nofldtxq) + vi->first_ofld_txq;
 	rxqid = (arc4random() % vi->nofldrxq) + vi->first_ofld_rxq;
 
 	m->m_pkthdr.flowid = (txqid << 16) | (rxqid & 0xffff);
 }
 
 static inline void
 get_qids_from_mbuf(struct mbuf *m, int *txqid, int *rxqid)
 {
 
 	if (txqid)
 		*txqid = m->m_pkthdr.flowid >> 16;
 	if (rxqid)
 		*rxqid = m->m_pkthdr.flowid & 0xffff;
 }
 
 /*
  * Use the trailing space in the mbuf in which the PASS_ACCEPT_REQ arrived to
  * store some state temporarily.
  */
 static struct synq_entry *
 mbuf_to_synqe(struct mbuf *m)
 {
 	int len = roundup2(sizeof (struct synq_entry), 8);
 	int tspace = M_TRAILINGSPACE(m);
 	struct synq_entry *synqe = NULL;
 
 	if (tspace < len) {
 		synqe = malloc(sizeof(*synqe), M_CXGBE, M_NOWAIT);
 		if (synqe == NULL)
 			return (NULL);
 		synqe->flags = TPF_SYNQE | TPF_SYNQE_NEEDFREE;
 	} else {
 		synqe = (void *)(m->m_data + m->m_len + tspace - len);
 		synqe->flags = TPF_SYNQE;
 	}
 
 	return (synqe);
 }
 
 static void
 t4opt_to_tcpopt(const struct tcp_options *t4opt, struct tcpopt *to)
 {
 	bzero(to, sizeof(*to));
 
 	if (t4opt->mss) {
 		to->to_flags |= TOF_MSS;
 		to->to_mss = be16toh(t4opt->mss);
 	}
 
 	if (t4opt->wsf) {
 		to->to_flags |= TOF_SCALE;
 		to->to_wscale = t4opt->wsf;
 	}
 
 	if (t4opt->tstamp)
 		to->to_flags |= TOF_TS;
 
 	if (t4opt->sack)
 		to->to_flags |= TOF_SACKPERM;
 }
 
 /*
  * Options2 for passive open.
  */
 static uint32_t
 calc_opt2p(struct adapter *sc, struct port_info *pi, int rxqid,
     const struct tcp_options *tcpopt, struct tcphdr *th, int ulp_mode)
 {
 	struct sge_ofld_rxq *ofld_rxq = &sc->sge.ofld_rxq[rxqid];
 	uint32_t opt2;
 
 	opt2 = V_TX_QUEUE(sc->params.tp.tx_modq[pi->tx_chan]) |
 	    F_RSS_QUEUE_VALID | V_RSS_QUEUE(ofld_rxq->iq.abs_id);
 
 	if (V_tcp_do_rfc1323) {
 		if (tcpopt->tstamp)
 			opt2 |= F_TSTAMPS_EN;
 		if (tcpopt->sack)
 			opt2 |= F_SACK_EN;
 		if (tcpopt->wsf <= 14)
 			opt2 |= F_WND_SCALE_EN;
 	}
 
 	if (V_tcp_do_ecn && th->th_flags & (TH_ECE | TH_CWR))
 		opt2 |= F_CCTRL_ECN;
 
 	/* RX_COALESCE is always a valid value (0 or M_RX_COALESCE). */
 	if (is_t4(sc))
 		opt2 |= F_RX_COALESCE_VALID;
 	else {
 		opt2 |= F_T5_OPT_2_VALID;
 		opt2 |= F_CONG_CNTRL_VALID; /* OPT_2_ISS really, for T5 */
 	}
 	if (sc->tt.rx_coalesce)
 		opt2 |= V_RX_COALESCE(M_RX_COALESCE);
 
 #ifdef USE_DDP_RX_FLOW_CONTROL
 	if (ulp_mode == ULP_MODE_TCPDDP)
 		opt2 |= F_RX_FC_VALID | F_RX_FC_DDP;
 #endif
 
 	return htobe32(opt2);
 }
 
 static void
 pass_accept_req_to_protohdrs(const struct mbuf *m, struct in_conninfo *inc,
     struct tcphdr *th)
 {
 	const struct cpl_pass_accept_req *cpl = mtod(m, const void *);
 	const struct ether_header *eh;
 	unsigned int hlen = be32toh(cpl->hdr_len);
 	uintptr_t l3hdr;
 	const struct tcphdr *tcp;
 
 	eh = (const void *)(cpl + 1);
 	l3hdr = ((uintptr_t)eh + G_ETH_HDR_LEN(hlen));
 	tcp = (const void *)(l3hdr + G_IP_HDR_LEN(hlen));
 
 	if (inc) {
 		bzero(inc, sizeof(*inc));
 		inc->inc_fport = tcp->th_sport;
 		inc->inc_lport = tcp->th_dport;
 		if (((struct ip *)l3hdr)->ip_v == IPVERSION) {
 			const struct ip *ip = (const void *)l3hdr;
 
 			inc->inc_faddr = ip->ip_src;
 			inc->inc_laddr = ip->ip_dst;
 		} else {
 			const struct ip6_hdr *ip6 = (const void *)l3hdr;
 
 			inc->inc_flags |= INC_ISIPV6;
 			inc->inc6_faddr = ip6->ip6_src;
 			inc->inc6_laddr = ip6->ip6_dst;
 		}
 	}
 
 	if (th) {
 		bcopy(tcp, th, sizeof(*th));
 		tcp_fields_to_host(th);		/* just like tcp_input */
 	}
 }
 
 static struct l2t_entry *
 get_l2te_for_nexthop(struct port_info *pi, struct ifnet *ifp,
     struct in_conninfo *inc)
 {
-	struct rtentry *rt;
 	struct l2t_entry *e;
 	struct sockaddr_in6 sin6;
 	struct sockaddr *dst = (void *)&sin6;
  
 	if (inc->inc_flags & INC_ISIPV6) {
+		struct nhop6_basic nh6;
+
+		bzero(dst, sizeof(struct sockaddr_in6));
 		dst->sa_len = sizeof(struct sockaddr_in6);
 		dst->sa_family = AF_INET6;
-		((struct sockaddr_in6 *)dst)->sin6_addr = inc->inc6_faddr;
 
 		if (IN6_IS_ADDR_LINKLOCAL(&inc->inc6_laddr)) {
 			/* no need for route lookup */
 			e = t4_l2t_get(pi, ifp, dst);
 			return (e);
 		}
+
+		if (fib6_lookup_nh_basic(RT_DEFAULT_FIB, &inc->inc6_faddr,
+		    0, 0, 0, &nh6) != 0)
+			return (NULL);
+		if (nh6.nh_ifp != ifp)
+			return (NULL);
+		((struct sockaddr_in6 *)dst)->sin6_addr = nh6.nh_addr;
 	} else {
+		struct nhop4_basic nh4;
+
 		dst->sa_len = sizeof(struct sockaddr_in);
 		dst->sa_family = AF_INET;
-		((struct sockaddr_in *)dst)->sin_addr = inc->inc_faddr;
-	}
 
-	rt = rtalloc1(dst, 0, 0);
-	if (rt == NULL)
-		return (NULL);
-	else {
-		struct sockaddr *nexthop;
-
-		RT_UNLOCK(rt);
-		if (rt->rt_ifp != ifp)
-			e = NULL;
-		else {
-			if (rt->rt_flags & RTF_GATEWAY)
-				nexthop = rt->rt_gateway;
-			else
-				nexthop = dst;
-			e = t4_l2t_get(pi, ifp, nexthop);
-		}
-		RTFREE(rt);
+		if (fib4_lookup_nh_basic(RT_DEFAULT_FIB, inc->inc_faddr, 0, 0,
+		    &nh4) != 0)
+			return (NULL);
+		if (nh4.nh_ifp != ifp)
+			return (NULL);
+		((struct sockaddr_in *)dst)->sin_addr = nh4.nh_addr;
 	}
 
+	e = t4_l2t_get(pi, ifp, dst);
 	return (e);
 }
 
 #define REJECT_PASS_ACCEPT()	do { \
 	reject_reason = __LINE__; \
 	goto reject; \
 } while (0)
 
 /*
  * The context associated with a tid entry via insert_tid could be a synq_entry
  * or a toepcb.  The only way CPL handlers can tell is via a bit in these flags.
  */
 CTASSERT(offsetof(struct toepcb, flags) == offsetof(struct synq_entry, flags));
 
 /*
  * Incoming SYN on a listening socket.
  *
  * XXX: Every use of ifp in this routine has a bad race with up/down, toe/-toe,
  * etc.
  */
 static int
 do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
     struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	struct toedev *tod;
 	const struct cpl_pass_accept_req *cpl = mtod(m, const void *);
 	struct cpl_pass_accept_rpl *rpl;
 	struct wrqe *wr;
 	unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid));
 	unsigned int tid = GET_TID(cpl);
 	struct listen_ctx *lctx = lookup_stid(sc, stid);
 	struct inpcb *inp;
 	struct socket *so;
 	struct in_conninfo inc;
 	struct tcphdr th;
 	struct tcpopt to;
 	struct port_info *pi;
 	struct vi_info *vi;
 	struct ifnet *hw_ifp, *ifp;
 	struct l2t_entry *e = NULL;
 	int rscale, mtu_idx, rx_credits, rxqid, ulp_mode;
 	struct synq_entry *synqe = NULL;
 	int reject_reason, v;
 	uint16_t vid;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_PASS_ACCEPT_REQ,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
 
 	CTR4(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p", __func__, stid, tid,
 	    lctx);
 
 	pass_accept_req_to_protohdrs(m, &inc, &th);
 	t4opt_to_tcpopt(&cpl->tcpopt, &to);
 
 	pi = sc->port[G_SYN_INTF(be16toh(cpl->l2info))];
 
 	/*
 	 * Use the MAC index to lookup the associated VI.  If this SYN
 	 * didn't match a perfect MAC filter, punt.
 	 */
 	if (!(be16toh(cpl->l2info) & F_SYN_XACT_MATCH)) {
 		m_freem(m);
 		m = NULL;
 		REJECT_PASS_ACCEPT();
 	}
 	for_each_vi(pi, v, vi) {
 		if (vi->xact_addr_filt == G_SYN_MAC_IDX(be16toh(cpl->l2info)))
 			goto found;
 	}
 	m_freem(m);
 	m = NULL;
 	REJECT_PASS_ACCEPT();
 
 found:
 	hw_ifp = vi->ifp;	/* the (v)cxgbeX ifnet */
 	m->m_pkthdr.rcvif = hw_ifp;
 	tod = TOEDEV(hw_ifp);
 
 	/*
 	 * Figure out if there is a pseudo interface (vlan, lagg, etc.)
 	 * involved.  Don't offload if the SYN had a VLAN tag and the vid
 	 * doesn't match anything on this interface.
 	 *
 	 * XXX: lagg support, lagg + vlan support.
 	 */
 	vid = EVL_VLANOFTAG(be16toh(cpl->vlan));
 	if (vid != 0xfff) {
 		ifp = VLAN_DEVAT(hw_ifp, vid);
 		if (ifp == NULL)
 			REJECT_PASS_ACCEPT();
 	} else
 		ifp = hw_ifp;
 
 	/*
 	 * Don't offload if the peer requested a TCP option that's not known to
 	 * the silicon.
 	 */
 	if (cpl->tcpopt.unknown)
 		REJECT_PASS_ACCEPT();
 
 	if (inc.inc_flags & INC_ISIPV6) {
 
 		/* Don't offload if the ifcap isn't enabled */
 		if ((ifp->if_capenable & IFCAP_TOE6) == 0)
 			REJECT_PASS_ACCEPT();
 
 		/*
 		 * SYN must be directed to an IP6 address on this ifnet.  This
 		 * is more restrictive than in6_localip.
 		 */
 		if (!in6_ifhasaddr(ifp, &inc.inc6_laddr))
 			REJECT_PASS_ACCEPT();
 	} else {
 
 		/* Don't offload if the ifcap isn't enabled */
 		if ((ifp->if_capenable & IFCAP_TOE4) == 0)
 			REJECT_PASS_ACCEPT();
 
 		/*
 		 * SYN must be directed to an IP address on this ifnet.  This
 		 * is more restrictive than in_localip.
 		 */
 		if (!in_ifhasaddr(ifp, inc.inc_laddr))
 			REJECT_PASS_ACCEPT();
 	}
 
 	e = get_l2te_for_nexthop(pi, ifp, &inc);
 	if (e == NULL)
 		REJECT_PASS_ACCEPT();
 
 	synqe = mbuf_to_synqe(m);
 	if (synqe == NULL)
 		REJECT_PASS_ACCEPT();
 
 	wr = alloc_wrqe(is_t4(sc) ? sizeof(struct cpl_pass_accept_rpl) :
 	    sizeof(struct cpl_t5_pass_accept_rpl), &sc->sge.ctrlq[pi->port_id]);
 	if (wr == NULL)
 		REJECT_PASS_ACCEPT();
 	rpl = wrtod(wr);
 
 	INP_INFO_RLOCK(&V_tcbinfo);	/* for 4-tuple check */
 
 	/* Don't offload if the 4-tuple is already in use */
 	if (toe_4tuple_check(&inc, &th, ifp) != 0) {
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		free(wr, M_CXGBE);
 		REJECT_PASS_ACCEPT();
 	}
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 
 	inp = lctx->inp;		/* listening socket, not owned by TOE */
 	INP_WLOCK(inp);
 
 	/* Don't offload if the listening socket has closed */
 	if (__predict_false(inp->inp_flags & INP_DROPPED)) {
 		/*
 		 * The listening socket has closed.  The reply from the TOE to
 		 * our CPL_CLOSE_LISTSRV_REQ will ultimately release all
 		 * resources tied to this listen context.
 		 */
 		INP_WUNLOCK(inp);
 		free(wr, M_CXGBE);
 		REJECT_PASS_ACCEPT();
 	}
 	so = inp->inp_socket;
 
 	mtu_idx = find_best_mtu_idx(sc, &inc, be16toh(cpl->tcpopt.mss));
 	rscale = cpl->tcpopt.wsf && V_tcp_do_rfc1323 ? select_rcv_wscale() : 0;
 	SOCKBUF_LOCK(&so->so_rcv);
 	/* opt0 rcv_bufsiz initially, assumes its normal meaning later */
 	rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ);
 	SOCKBUF_UNLOCK(&so->so_rcv);
 
 	save_qids_in_mbuf(m, vi);
 	get_qids_from_mbuf(m, NULL, &rxqid);
 
 	if (is_t4(sc))
 		INIT_TP_WR_MIT_CPL(rpl, CPL_PASS_ACCEPT_RPL, tid);
 	else {
 		struct cpl_t5_pass_accept_rpl *rpl5 = (void *)rpl;
 
 		INIT_TP_WR_MIT_CPL(rpl5, CPL_PASS_ACCEPT_RPL, tid);
 	}
 	if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0) {
 		ulp_mode = ULP_MODE_TCPDDP;
 		synqe->flags |= TPF_SYNQE_TCPDDP;
 	} else
 		ulp_mode = ULP_MODE_NONE;
 	rpl->opt0 = calc_opt0(so, vi, e, mtu_idx, rscale, rx_credits, ulp_mode);
 	rpl->opt2 = calc_opt2p(sc, pi, rxqid, &cpl->tcpopt, &th, ulp_mode);
 
 	synqe->tid = tid;
 	synqe->lctx = lctx;
 	synqe->syn = m;
 	m = NULL;
 	refcount_init(&synqe->refcnt, 1);	/* 1 means extra hold */
 	synqe->l2e_idx = e->idx;
 	synqe->rcv_bufsize = rx_credits;
 	atomic_store_rel_ptr(&synqe->wr, (uintptr_t)wr);
 
 	insert_tid(sc, tid, synqe);
 	TAILQ_INSERT_TAIL(&lctx->synq, synqe, link);
 	hold_synqe(synqe);	/* hold for the duration it's in the synq */
 	hold_lctx(lctx);	/* A synqe on the list has a ref on its lctx */
 
 	/*
 	 * If all goes well t4_syncache_respond will get called during
 	 * syncache_add.  Note that syncache_add releases the pcb lock.
 	 */
 	toe_syncache_add(&inc, &to, &th, inp, tod, synqe);
 	INP_UNLOCK_ASSERT(inp);	/* ok to assert, we have a ref on the inp */
 
 	/*
 	 * If we replied during syncache_add (synqe->wr has been consumed),
 	 * good.  Otherwise, set it to 0 so that further syncache_respond
 	 * attempts by the kernel will be ignored.
 	 */
 	if (atomic_cmpset_ptr(&synqe->wr, (uintptr_t)wr, 0)) {
 
 		/*
 		 * syncache may or may not have a hold on the synqe, which may
 		 * or may not be stashed in the original SYN mbuf passed to us.
 		 * Just copy it over instead of dealing with all possibilities.
 		 */
 		m = m_dup(synqe->syn, M_NOWAIT);
 		if (m)
 			m->m_pkthdr.rcvif = hw_ifp;
 
 		remove_tid(sc, synqe->tid);
 		free(wr, M_CXGBE);
 
 		/* Yank the synqe out of the lctx synq. */
 		INP_WLOCK(inp);
 		TAILQ_REMOVE(&lctx->synq, synqe, link);
 		release_synqe(synqe);	/* removed from synq list */
 		inp = release_lctx(sc, lctx);
 		if (inp)
 			INP_WUNLOCK(inp);
 
 		release_synqe(synqe);	/* extra hold */
 		REJECT_PASS_ACCEPT();
 	}
 
 	CTR5(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p, synqe %p, SYNACK",
 	    __func__, stid, tid, lctx, synqe);
 
 	INP_WLOCK(inp);
 	synqe->flags |= TPF_SYNQE_HAS_L2TE;
 	if (__predict_false(inp->inp_flags & INP_DROPPED)) {
 		/*
 		 * Listening socket closed but tod_listen_stop did not abort
 		 * this tid because there was no L2T entry for the tid at that
 		 * time.  Abort it now.  The reply to the abort will clean up.
 		 */
 		CTR6(KTR_CXGBE,
 		    "%s: stid %u, tid %u, lctx %p, synqe %p (0x%x), ABORT",
 		    __func__, stid, tid, lctx, synqe, synqe->flags);
 		if (!(synqe->flags & TPF_SYNQE_EXPANDED))
 			send_reset_synqe(tod, synqe);
 		INP_WUNLOCK(inp);
 
 		release_synqe(synqe);	/* extra hold */
 		return (__LINE__);
 	}
 	INP_WUNLOCK(inp);
 
 	release_synqe(synqe);	/* extra hold */
 	return (0);
 reject:
 	CTR4(KTR_CXGBE, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid,
 	    reject_reason);
 
 	if (e)
 		t4_l2t_release(e);
 	release_tid(sc, tid, lctx->ctrlq);
 
 	if (__predict_true(m != NULL)) {
 		m_adj(m, sizeof(*cpl));
 		m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID |
 		    CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 		m->m_pkthdr.csum_data = 0xffff;
 		hw_ifp->if_input(hw_ifp, m);
 	}
 
 	return (reject_reason);
 }
 
 static void
 synqe_to_protohdrs(struct synq_entry *synqe,
     const struct cpl_pass_establish *cpl, struct in_conninfo *inc,
     struct tcphdr *th, struct tcpopt *to)
 {
 	uint16_t tcp_opt = be16toh(cpl->tcp_opt);
 
 	/* start off with the original SYN */
 	pass_accept_req_to_protohdrs(synqe->syn, inc, th);
 
 	/* modify parts to make it look like the ACK to our SYN|ACK */
 	th->th_flags = TH_ACK;
 	th->th_ack = synqe->iss + 1;
 	th->th_seq = be32toh(cpl->rcv_isn);
 	bzero(to, sizeof(*to));
 	if (G_TCPOPT_TSTAMP(tcp_opt)) {
 		to->to_flags |= TOF_TS;
 		to->to_tsecr = synqe->ts;
 	}
 }
 
 static int
 do_pass_establish(struct sge_iq *iq, const struct rss_header *rss,
     struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	struct vi_info *vi;
 	struct ifnet *ifp;
 	const struct cpl_pass_establish *cpl = (const void *)(rss + 1);
 #if defined(KTR) || defined(INVARIANTS)
 	unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid));
 #endif
 	unsigned int tid = GET_TID(cpl);
 	struct synq_entry *synqe = lookup_tid(sc, tid);
 	struct listen_ctx *lctx = synqe->lctx;
 	struct inpcb *inp = lctx->inp, *new_inp;
 	struct socket *so;
 	struct tcphdr th;
 	struct tcpopt to;
 	struct in_conninfo inc;
 	struct toepcb *toep;
 	u_int txqid, rxqid;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
 
 	KASSERT(opcode == CPL_PASS_ESTABLISH,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 	KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
 	KASSERT(synqe->flags & TPF_SYNQE,
 	    ("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe));
 
 	INP_INFO_RLOCK(&V_tcbinfo);	/* for syncache_expand */
 	INP_WLOCK(inp);
 
 	CTR6(KTR_CXGBE,
 	    "%s: stid %u, tid %u, synqe %p (0x%x), inp_flags 0x%x",
 	    __func__, stid, tid, synqe, synqe->flags, inp->inp_flags);
 
 	if (__predict_false(inp->inp_flags & INP_DROPPED)) {
 
 		if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
 			KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
 			    ("%s: listen socket closed but tid %u not aborted.",
 			    __func__, tid));
 		}
 
 		INP_WUNLOCK(inp);
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		return (0);
 	}
 
 	ifp = synqe->syn->m_pkthdr.rcvif;
 	vi = ifp->if_softc;
 	KASSERT(vi->pi->adapter == sc,
 	    ("%s: vi %p, sc %p mismatch", __func__, vi, sc));
 
 	get_qids_from_mbuf(synqe->syn, &txqid, &rxqid);
 	KASSERT(rxqid == iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0],
 	    ("%s: CPL arrived on unexpected rxq.  %d %d", __func__, rxqid,
 	    (int)(iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0])));
 
 	toep = alloc_toepcb(vi, txqid, rxqid, M_NOWAIT);
 	if (toep == NULL) {
 reset:
 		/*
 		 * The reply to this abort will perform final cleanup.  There is
 		 * no need to check for HAS_L2TE here.  We can be here only if
 		 * we responded to the PASS_ACCEPT_REQ, and our response had the
 		 * L2T idx.
 		 */
 		send_reset_synqe(TOEDEV(ifp), synqe);
 		INP_WUNLOCK(inp);
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		return (0);
 	}
 	toep->tid = tid;
 	toep->l2te = &sc->l2t->l2tab[synqe->l2e_idx];
 	if (synqe->flags & TPF_SYNQE_TCPDDP)
 		set_tcpddp_ulp_mode(toep);
 	else
 		toep->ulp_mode = ULP_MODE_NONE;
 	/* opt0 rcv_bufsiz initially, assumes its normal meaning later */
 	toep->rx_credits = synqe->rcv_bufsize;
 
 	so = inp->inp_socket;
 	KASSERT(so != NULL, ("%s: socket is NULL", __func__));
 
 	/* Come up with something that syncache_expand should be ok with. */
 	synqe_to_protohdrs(synqe, cpl, &inc, &th, &to);
 
 	/*
 	 * No more need for anything in the mbuf that carried the
 	 * CPL_PASS_ACCEPT_REQ.  Drop the CPL_PASS_ESTABLISH and toep pointer
 	 * there.  XXX: bad form but I don't want to increase the size of synqe.
 	 */
 	m = synqe->syn;
 	KASSERT(sizeof(*cpl) + sizeof(toep) <= m->m_len,
 	    ("%s: no room in mbuf %p (m_len %d)", __func__, m, m->m_len));
 	bcopy(cpl, mtod(m, void *), sizeof(*cpl));
 	*(struct toepcb **)(mtod(m, struct cpl_pass_establish *) + 1) = toep;
 
 	if (!toe_syncache_expand(&inc, &to, &th, &so) || so == NULL) {
 		free_toepcb(toep);
 		goto reset;
 	}
 
 	/* New connection inpcb is already locked by syncache_expand(). */
 	new_inp = sotoinpcb(so);
 	INP_WLOCK_ASSERT(new_inp);
 
 	/*
 	 * This is for the unlikely case where the syncache entry that we added
 	 * has been evicted from the syncache, but the syncache_expand above
 	 * works because of syncookies.
 	 *
 	 * XXX: we've held the tcbinfo lock throughout so there's no risk of
 	 * anyone accept'ing a connection before we've installed our hooks, but
 	 * this somewhat defeats the purpose of having a tod_offload_socket :-(
 	 */
 	if (__predict_false(!(synqe->flags & TPF_SYNQE_EXPANDED))) {
 		tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0);
 		t4_offload_socket(TOEDEV(ifp), synqe, so);
 	}
 
 	INP_WUNLOCK(new_inp);
 
 	/* Done with the synqe */
 	TAILQ_REMOVE(&lctx->synq, synqe, link);
 	inp = release_lctx(sc, lctx);
 	if (inp != NULL)
 		INP_WUNLOCK(inp);
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 	release_synqe(synqe);
 
 	return (0);
 }
 
 void
 t4_init_listen_cpl_handlers(struct adapter *sc)
 {
 
 	t4_register_cpl_handler(sc, CPL_PASS_OPEN_RPL, do_pass_open_rpl);
 	t4_register_cpl_handler(sc, CPL_CLOSE_LISTSRV_RPL, do_close_server_rpl);
 	t4_register_cpl_handler(sc, CPL_PASS_ACCEPT_REQ, do_pass_accept_req);
 	t4_register_cpl_handler(sc, CPL_PASS_ESTABLISH, do_pass_establish);
 }
 #endif
Index: projects/release-pkg/sys/dev/e1000/if_em.c
===================================================================
--- projects/release-pkg/sys/dev/e1000/if_em.c	(revision 293335)
+++ projects/release-pkg/sys/dev/e1000/if_em.c	(revision 293336)
@@ -1,6038 +1,6078 @@
 /******************************************************************************
 
   Copyright (c) 2001-2015, Intel Corporation 
   All rights reserved.
   
   Redistribution and use in source and binary forms, with or without 
   modification, are permitted provided that the following conditions are met:
   
    1. Redistributions of source code must retain the above copyright notice, 
       this list of conditions and the following disclaimer.
   
    2. Redistributions in binary form must reproduce the above copyright 
       notice, this list of conditions and the following disclaimer in the 
       documentation and/or other materials provided with the distribution.
   
    3. Neither the name of the Intel Corporation nor the names of its 
       contributors may be used to endorse or promote products derived from 
       this software without specific prior written permission.
   
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   POSSIBILITY OF SUCH DAMAGE.
 
 ******************************************************************************/
 /*$FreeBSD$*/
 
 #include "opt_em.h"
 #include "opt_ddb.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #ifdef HAVE_KERNEL_OPTION_HEADERS
 #include "opt_device_polling.h"
 #endif
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #ifdef DDB
 #include <sys/types.h>
 #include <ddb/ddb.h>
 #endif
 #if __FreeBSD_version >= 800000
 #include <sys/buf_ring.h>
 #endif
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/rman.h>
 #include <sys/smp.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <sys/eventhandler.h>
 #include <machine/bus.h>
 #include <machine/resource.h>
 
 #include <net/bpf.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 
 #include <net/if_types.h>
 #include <net/if_vlan_var.h>
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 
 #include <machine/in_cksum.h>
 #include <dev/led/led.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 
 #include "e1000_api.h"
 #include "e1000_82571.h"
 #include "if_em.h"
 
 /*********************************************************************
  *  Set this to one to display debug statistics
  *********************************************************************/
 int	em_display_debug_stats = 0;
 
 /*********************************************************************
  *  Driver version:
  *********************************************************************/
 char em_driver_version[] = "7.4.2";
 
 /*********************************************************************
  *  PCI Device ID Table
  *
  *  Used by probe to select devices to load on
  *  Last field stores an index into e1000_strings
  *  Last entry must be all 0s
  *
  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
  *********************************************************************/
 
 static em_vendor_info_t em_vendor_info_array[] =
 {
 	/* Intel(R) PRO/1000 Network Connection */
 	{ 0x8086, E1000_DEV_ID_82571EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82571EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_DUAL,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82571EB_SERDES_QUAD,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_COPPER_LP,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82571EB_QUAD_FIBER,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82571PT_QUAD_COPPER,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82572EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82572EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82572EI_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82572EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
 
 	{ 0x8086, E1000_DEV_ID_82573E,		PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82573E_IAMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82573L,		PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82583V,		PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_SPT,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_SPT,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_COPPER_DPT,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_80003ES2LAN_SERDES_DPT,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH8_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH8_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH8_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH8_82567V_3,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_AMT,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_C,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH9_IGP_M_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH9_IFE,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_GT,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH9_IFE_G,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH9_BM,		PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82574L,		PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_82574LA,		PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH10_R_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_ICH10_D_BM_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_M_HV_LC,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DM,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_D_HV_DC,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH2_LV_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH2_LV_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_LM,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_LPT_I217_V,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_LM,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_LPTLP_I218_V,
 						PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM2,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_I218_V2,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_I218_LM3,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	{ 0x8086, E1000_DEV_ID_PCH_I218_V3,	PCI_ANY_ID, PCI_ANY_ID, 0},
 	/* required last entry */
 	{ 0, 0, 0, 0, 0}
 };
 
 /*********************************************************************
  *  Table of branding strings for all supported NICs.
  *********************************************************************/
 
 static char *em_strings[] = {
 	"Intel(R) PRO/1000 Network Connection"
 };
 
 /*********************************************************************
  *  Function prototypes
  *********************************************************************/
 static int	em_probe(device_t);
 static int	em_attach(device_t);
 static int	em_detach(device_t);
 static int	em_shutdown(device_t);
 static int	em_suspend(device_t);
 static int	em_resume(device_t);
 #ifdef EM_MULTIQUEUE
 static int	em_mq_start(if_t, struct mbuf *);
 static int	em_mq_start_locked(if_t,
 		    struct tx_ring *);
 static void	em_qflush(if_t);
 #else
 static void	em_start(if_t);
 static void	em_start_locked(if_t, struct tx_ring *);
 #endif
 static int	em_ioctl(if_t, u_long, caddr_t);
 static uint64_t	em_get_counter(if_t, ift_counter);
 static void	em_init(void *);
 static void	em_init_locked(struct adapter *);
 static void	em_stop(void *);
 static void	em_media_status(if_t, struct ifmediareq *);
 static int	em_media_change(if_t);
 static void	em_identify_hardware(struct adapter *);
 static int	em_allocate_pci_resources(struct adapter *);
 static int	em_allocate_legacy(struct adapter *);
 static int	em_allocate_msix(struct adapter *);
 static int	em_allocate_queues(struct adapter *);
 static int	em_setup_msix(struct adapter *);
 static void	em_free_pci_resources(struct adapter *);
 static void	em_local_timer(void *);
 static void	em_reset(struct adapter *);
 static int	em_setup_interface(device_t, struct adapter *);
 
 static void	em_setup_transmit_structures(struct adapter *);
 static void	em_initialize_transmit_unit(struct adapter *);
 static int	em_allocate_transmit_buffers(struct tx_ring *);
 static void	em_free_transmit_structures(struct adapter *);
 static void	em_free_transmit_buffers(struct tx_ring *);
 
 static int	em_setup_receive_structures(struct adapter *);
 static int	em_allocate_receive_buffers(struct rx_ring *);
 static void	em_initialize_receive_unit(struct adapter *);
 static void	em_free_receive_structures(struct adapter *);
 static void	em_free_receive_buffers(struct rx_ring *);
 
 static void	em_enable_intr(struct adapter *);
 static void	em_disable_intr(struct adapter *);
 static void	em_update_stats_counters(struct adapter *);
 static void	em_add_hw_stats(struct adapter *adapter);
 static void	em_txeof(struct tx_ring *);
 static bool	em_rxeof(struct rx_ring *, int, int *);
 #ifndef __NO_STRICT_ALIGNMENT
 static int	em_fixup_rx(struct rx_ring *);
 #endif
-static void	em_receive_checksum(struct e1000_rx_desc *, struct mbuf *);
+static void	em_setup_rxdesc(union e1000_rx_desc_extended *,
+		    const struct em_rxbuffer *rxbuf);
+static void	em_receive_checksum(uint32_t status, struct mbuf *);
 static void	em_transmit_checksum_setup(struct tx_ring *, struct mbuf *, int,
 		    struct ip *, u32 *, u32 *);
 static void	em_tso_setup(struct tx_ring *, struct mbuf *, int, struct ip *,
 		    struct tcphdr *, u32 *, u32 *);
 static void	em_set_promisc(struct adapter *);
 static void	em_disable_promisc(struct adapter *);
 static void	em_set_multi(struct adapter *);
 static void	em_update_link_status(struct adapter *);
 static void	em_refresh_mbufs(struct rx_ring *, int);
 static void	em_register_vlan(void *, if_t, u16);
 static void	em_unregister_vlan(void *, if_t, u16);
 static void	em_setup_vlan_hw_support(struct adapter *);
 static int	em_xmit(struct tx_ring *, struct mbuf **);
 static int	em_dma_malloc(struct adapter *, bus_size_t,
 		    struct em_dma_alloc *, int);
 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
 static int	em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
 static void	em_print_nvm_info(struct adapter *);
 static int	em_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
 static void	em_print_debug_info(struct adapter *);
 static int 	em_is_valid_ether_addr(u8 *);
 static int	em_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
 static void	em_add_int_delay_sysctl(struct adapter *, const char *,
 		    const char *, struct em_int_delay_info *, int, int);
 /* Management and WOL Support */
 static void	em_init_manageability(struct adapter *);
 static void	em_release_manageability(struct adapter *);
 static void     em_get_hw_control(struct adapter *);
 static void     em_release_hw_control(struct adapter *);
 static void	em_get_wakeup(device_t);
 static void     em_enable_wakeup(device_t);
 static int	em_enable_phy_wakeup(struct adapter *);
 static void	em_led_func(void *, int);
 static void	em_disable_aspm(struct adapter *);
 
 static int	em_irq_fast(void *);
 
 /* MSIX handlers */
 static void	em_msix_tx(void *);
 static void	em_msix_rx(void *);
 static void	em_msix_link(void *);
 static void	em_handle_tx(void *context, int pending);
 static void	em_handle_rx(void *context, int pending);
 static void	em_handle_link(void *context, int pending);
 
 #ifdef EM_MULTIQUEUE
 static void	em_enable_vectors_82574(struct adapter *);
 #endif
 
 static void	em_set_sysctl_value(struct adapter *, const char *,
 		    const char *, int *, int);
 static int	em_set_flowcntl(SYSCTL_HANDLER_ARGS);
 static int	em_sysctl_eee(SYSCTL_HANDLER_ARGS);
 
 static __inline void em_rx_discard(struct rx_ring *, int);
 
 #ifdef DEVICE_POLLING
 static poll_handler_t em_poll;
 #endif /* POLLING */
 
 /*********************************************************************
  *  FreeBSD Device Interface Entry Points
  *********************************************************************/
 
 static device_method_t em_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe, em_probe),
 	DEVMETHOD(device_attach, em_attach),
 	DEVMETHOD(device_detach, em_detach),
 	DEVMETHOD(device_shutdown, em_shutdown),
 	DEVMETHOD(device_suspend, em_suspend),
 	DEVMETHOD(device_resume, em_resume),
 	DEVMETHOD_END
 };
 
 static driver_t em_driver = {
 	"em", em_methods, sizeof(struct adapter),
 };
 
 devclass_t em_devclass;
 DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
 MODULE_DEPEND(em, pci, 1, 1, 1);
 MODULE_DEPEND(em, ether, 1, 1, 1);
 #ifdef DEV_NETMAP
 MODULE_DEPEND(em, netmap, 1, 1, 1);
 #endif /* DEV_NETMAP */
 
 /*********************************************************************
  *  Tunable default values.
  *********************************************************************/
 
 #define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
 #define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
 #define M_TSO_LEN			66
 
 #define MAX_INTS_PER_SEC	8000
 #define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
 
 /* Allow common code without TSO */
 #ifndef CSUM_TSO
 #define CSUM_TSO	0
 #endif
 
 #define TSO_WORKAROUND	4
 
 static SYSCTL_NODE(_hw, OID_AUTO, em, CTLFLAG_RD, 0, "EM driver parameters");
 
 static int em_disable_crc_stripping = 0;
 SYSCTL_INT(_hw_em, OID_AUTO, disable_crc_stripping, CTLFLAG_RDTUN,
     &em_disable_crc_stripping, 0, "Disable CRC Stripping");
 
 static int em_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
 static int em_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
 SYSCTL_INT(_hw_em, OID_AUTO, tx_int_delay, CTLFLAG_RDTUN, &em_tx_int_delay_dflt,
     0, "Default transmit interrupt delay in usecs");
 SYSCTL_INT(_hw_em, OID_AUTO, rx_int_delay, CTLFLAG_RDTUN, &em_rx_int_delay_dflt,
     0, "Default receive interrupt delay in usecs");
 
 static int em_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
 static int em_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
 SYSCTL_INT(_hw_em, OID_AUTO, tx_abs_int_delay, CTLFLAG_RDTUN,
     &em_tx_abs_int_delay_dflt, 0,
     "Default transmit interrupt delay limit in usecs");
 SYSCTL_INT(_hw_em, OID_AUTO, rx_abs_int_delay, CTLFLAG_RDTUN,
     &em_rx_abs_int_delay_dflt, 0,
     "Default receive interrupt delay limit in usecs");
 
 static int em_rxd = EM_DEFAULT_RXD;
 static int em_txd = EM_DEFAULT_TXD;
 SYSCTL_INT(_hw_em, OID_AUTO, rxd, CTLFLAG_RDTUN, &em_rxd, 0,
     "Number of receive descriptors per queue");
 SYSCTL_INT(_hw_em, OID_AUTO, txd, CTLFLAG_RDTUN, &em_txd, 0,
     "Number of transmit descriptors per queue");
 
 static int em_smart_pwr_down = FALSE;
 SYSCTL_INT(_hw_em, OID_AUTO, smart_pwr_down, CTLFLAG_RDTUN, &em_smart_pwr_down,
     0, "Set to true to leave smart power down enabled on newer adapters");
 
 /* Controls whether promiscuous also shows bad packets */
 static int em_debug_sbp = FALSE;
 SYSCTL_INT(_hw_em, OID_AUTO, sbp, CTLFLAG_RDTUN, &em_debug_sbp, 0,
     "Show bad packets in promiscuous mode");
 
 static int em_enable_msix = TRUE;
 SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0,
     "Enable MSI-X interrupts");
 
 #ifdef EM_MULTIQUEUE
 static int em_num_queues = 1;
 SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0,
     "82574 only: Number of queues to configure, 0 indicates autoconfigure");
 #endif
 
 /*
 ** Global variable to store last used CPU when binding queues
 ** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
 ** queue is bound to a cpu.
 */
 static int em_last_bind_cpu = -1;
 
 /* How many packets rxeof tries to clean at a time */
 static int em_rx_process_limit = 100;
 SYSCTL_INT(_hw_em, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
     &em_rx_process_limit, 0,
     "Maximum number of received packets to process "
     "at a time, -1 means unlimited");
 
 /* Energy efficient ethernet - default to OFF */
 static int eee_setting = 1;
 SYSCTL_INT(_hw_em, OID_AUTO, eee_setting, CTLFLAG_RDTUN, &eee_setting, 0,
     "Enable Energy Efficient Ethernet");
 
 /* Global used in WOL setup with multiport cards */
 static int global_quad_port_a = 0;
 
 #ifdef DEV_NETMAP	/* see ixgbe.c for details */
 #include <dev/netmap/if_em_netmap.h>
 #endif /* DEV_NETMAP */
 
 /*********************************************************************
  *  Device identification routine
  *
  *  em_probe determines if the driver should be loaded on
  *  adapter based on PCI vendor/device id of the adapter.
  *
  *  return BUS_PROBE_DEFAULT on success, positive on failure
  *********************************************************************/
 
 static int
 em_probe(device_t dev)
 {
 	char		adapter_name[60];
 	uint16_t	pci_vendor_id = 0;
 	uint16_t	pci_device_id = 0;
 	uint16_t	pci_subvendor_id = 0;
 	uint16_t	pci_subdevice_id = 0;
 	em_vendor_info_t *ent;
 
 	INIT_DEBUGOUT("em_probe: begin");
 
 	pci_vendor_id = pci_get_vendor(dev);
 	if (pci_vendor_id != EM_VENDOR_ID)
 		return (ENXIO);
 
 	pci_device_id = pci_get_device(dev);
 	pci_subvendor_id = pci_get_subvendor(dev);
 	pci_subdevice_id = pci_get_subdevice(dev);
 
 	ent = em_vendor_info_array;
 	while (ent->vendor_id != 0) {
 		if ((pci_vendor_id == ent->vendor_id) &&
 		    (pci_device_id == ent->device_id) &&
 
 		    ((pci_subvendor_id == ent->subvendor_id) ||
 		    (ent->subvendor_id == PCI_ANY_ID)) &&
 
 		    ((pci_subdevice_id == ent->subdevice_id) ||
 		    (ent->subdevice_id == PCI_ANY_ID))) {
 			sprintf(adapter_name, "%s %s",
 				em_strings[ent->index],
 				em_driver_version);
 			device_set_desc_copy(dev, adapter_name);
 			return (BUS_PROBE_DEFAULT);
 		}
 		ent++;
 	}
 
 	return (ENXIO);
 }
 
 /*********************************************************************
  *  Device initialization routine
  *
  *  The attach entry point is called when the driver is being loaded.
  *  This routine identifies the type of hardware, allocates all resources
  *  and initializes the hardware.
  *
  *  return 0 on success, positive on failure
  *********************************************************************/
 
 static int
 em_attach(device_t dev)
 {
 	struct adapter	*adapter;
 	struct e1000_hw	*hw;
 	int		error = 0;
 
 	INIT_DEBUGOUT("em_attach: begin");
 
 	if (resource_disabled("em", device_get_unit(dev))) {
 		device_printf(dev, "Disabled by device hint\n");
 		return (ENXIO);
 	}
 
 	adapter = device_get_softc(dev);
 	adapter->dev = adapter->osdep.dev = dev;
 	hw = &adapter->hw;
 	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
 
 	/* SYSCTL stuff */
 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
 	    em_sysctl_nvm_info, "I", "NVM Information");
 
 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
 	    em_sysctl_debug_info, "I", "Debug Information");
 
 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
 	    em_set_flowcntl, "I", "Flow Control");
 
 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
 
 	/* Determine hardware and mac info */
 	em_identify_hardware(adapter);
 
 	/* Setup PCI resources */
 	if (em_allocate_pci_resources(adapter)) {
 		device_printf(dev, "Allocation of PCI resources failed\n");
 		error = ENXIO;
 		goto err_pci;
 	}
 
 	/*
 	** For ICH8 and family we need to
 	** map the flash memory, and this
 	** must happen after the MAC is 
 	** identified
 	*/
 	if ((hw->mac.type == e1000_ich8lan) ||
 	    (hw->mac.type == e1000_ich9lan) ||
 	    (hw->mac.type == e1000_ich10lan) ||
 	    (hw->mac.type == e1000_pchlan) ||
 	    (hw->mac.type == e1000_pch2lan) ||
 	    (hw->mac.type == e1000_pch_lpt)) {
 		int rid = EM_BAR_TYPE_FLASH;
 		adapter->flash = bus_alloc_resource_any(dev,
 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
 		if (adapter->flash == NULL) {
 			device_printf(dev, "Mapping of Flash failed\n");
 			error = ENXIO;
 			goto err_pci;
 		}
 		/* This is used in the shared code */
 		hw->flash_address = (u8 *)adapter->flash;
 		adapter->osdep.flash_bus_space_tag =
 		    rman_get_bustag(adapter->flash);
 		adapter->osdep.flash_bus_space_handle =
 		    rman_get_bushandle(adapter->flash);
 	}
 
 	/* Do Shared Code initialization */
 	if (e1000_setup_init_funcs(hw, TRUE)) {
 		device_printf(dev, "Setup of Shared code failed\n");
 		error = ENXIO;
 		goto err_pci;
 	}
 
 	/*
 	 * Setup MSI/X or MSI if PCI Express
 	 */
 	adapter->msix = em_setup_msix(adapter);
 
 	e1000_get_bus_info(hw);
 
 	/* Set up some sysctls for the tunable interrupt delays */
 	em_add_int_delay_sysctl(adapter, "rx_int_delay",
 	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
 	    E1000_REGISTER(hw, E1000_RDTR), em_rx_int_delay_dflt);
 	em_add_int_delay_sysctl(adapter, "tx_int_delay",
 	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
 	    E1000_REGISTER(hw, E1000_TIDV), em_tx_int_delay_dflt);
 	em_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
 	    "receive interrupt delay limit in usecs",
 	    &adapter->rx_abs_int_delay,
 	    E1000_REGISTER(hw, E1000_RADV),
 	    em_rx_abs_int_delay_dflt);
 	em_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
 	    "transmit interrupt delay limit in usecs",
 	    &adapter->tx_abs_int_delay,
 	    E1000_REGISTER(hw, E1000_TADV),
 	    em_tx_abs_int_delay_dflt);
 	em_add_int_delay_sysctl(adapter, "itr",
 	    "interrupt delay limit in usecs/4",
 	    &adapter->tx_itr,
 	    E1000_REGISTER(hw, E1000_ITR),
 	    DEFAULT_ITR);
 
 	/* Sysctl for limiting the amount of work done in the taskqueue */
 	em_set_sysctl_value(adapter, "rx_processing_limit",
 	    "max number of rx packets to process", &adapter->rx_process_limit,
 	    em_rx_process_limit);
 
 	/*
 	 * Validate number of transmit and receive descriptors. It
 	 * must not exceed hardware maximum, and must be multiple
 	 * of E1000_DBA_ALIGN.
 	 */
 	if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
 	    (em_txd > EM_MAX_TXD) || (em_txd < EM_MIN_TXD)) {
 		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
 		    EM_DEFAULT_TXD, em_txd);
 		adapter->num_tx_desc = EM_DEFAULT_TXD;
 	} else
 		adapter->num_tx_desc = em_txd;
 
-	if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
+	if (((em_rxd * sizeof(union e1000_rx_desc_extended)) % EM_DBA_ALIGN) != 0 ||
 	    (em_rxd > EM_MAX_RXD) || (em_rxd < EM_MIN_RXD)) {
 		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
 		    EM_DEFAULT_RXD, em_rxd);
 		adapter->num_rx_desc = EM_DEFAULT_RXD;
 	} else
 		adapter->num_rx_desc = em_rxd;
 
 	hw->mac.autoneg = DO_AUTO_NEG;
 	hw->phy.autoneg_wait_to_complete = FALSE;
 	hw->phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
 
 	/* Copper options */
 	if (hw->phy.media_type == e1000_media_type_copper) {
 		hw->phy.mdix = AUTO_ALL_MODES;
 		hw->phy.disable_polarity_correction = FALSE;
 		hw->phy.ms_type = EM_MASTER_SLAVE;
 	}
 
 	/*
 	 * Set the frame limits assuming
 	 * standard ethernet sized frames.
 	 */
 	adapter->hw.mac.max_frame_size =
 	    ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
 
 	/*
 	 * This controls when hardware reports transmit completion
 	 * status.
 	 */
 	hw->mac.report_tx_early = 1;
 
 	/* 
 	** Get queue/ring memory
 	*/
 	if (em_allocate_queues(adapter)) {
 		error = ENOMEM;
 		goto err_pci;
 	}
 
 	/* Allocate multicast array memory. */
 	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
 	if (adapter->mta == NULL) {
 		device_printf(dev, "Can not allocate multicast setup array\n");
 		error = ENOMEM;
 		goto err_late;
 	}
 
 	/* Check SOL/IDER usage */
 	if (e1000_check_reset_block(hw))
 		device_printf(dev, "PHY reset is blocked"
 		    " due to SOL/IDER session.\n");
 
 	/* Sysctl for setting Energy Efficient Ethernet */
 	hw->dev_spec.ich8lan.eee_disable = eee_setting;
 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
 	    OID_AUTO, "eee_control", CTLTYPE_INT|CTLFLAG_RW,
 	    adapter, 0, em_sysctl_eee, "I",
 	    "Disable Energy Efficient Ethernet");
 
 	/*
 	** Start from a known state, this is
 	** important in reading the nvm and
 	** mac from that.
 	*/
 	e1000_reset_hw(hw);
 
 
 	/* Make sure we have a good EEPROM before we read from it */
 	if (e1000_validate_nvm_checksum(hw) < 0) {
 		/*
 		** Some PCI-E parts fail the first check due to
 		** the link being in sleep state, call it again,
 		** if it fails a second time its a real issue.
 		*/
 		if (e1000_validate_nvm_checksum(hw) < 0) {
 			device_printf(dev,
 			    "The EEPROM Checksum Is Not Valid\n");
 			error = EIO;
 			goto err_late;
 		}
 	}
 
 	/* Copy the permanent MAC address out of the EEPROM */
 	if (e1000_read_mac_addr(hw) < 0) {
 		device_printf(dev, "EEPROM read error while reading MAC"
 		    " address\n");
 		error = EIO;
 		goto err_late;
 	}
 
 	if (!em_is_valid_ether_addr(hw->mac.addr)) {
 		device_printf(dev, "Invalid MAC address\n");
 		error = EIO;
 		goto err_late;
 	}
 
 	/* Disable ULP support */
 	e1000_disable_ulp_lpt_lp(hw, TRUE);
 
 	/*
 	**  Do interrupt configuration
 	*/
 	if (adapter->msix > 1) /* Do MSIX */
 		error = em_allocate_msix(adapter);
 	else  /* MSI or Legacy */
 		error = em_allocate_legacy(adapter);
 	if (error)
 		goto err_late;
 
 	/*
 	 * Get Wake-on-Lan and Management info for later use
 	 */
 	em_get_wakeup(dev);
 
 	/* Setup OS specific network interface */
 	if (em_setup_interface(dev, adapter) != 0)
 		goto err_late;
 
 	em_reset(adapter);
 
 	/* Initialize statistics */
 	em_update_stats_counters(adapter);
 
 	hw->mac.get_link_status = 1;
 	em_update_link_status(adapter);
 
 	/* Register for VLAN events */
 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
 	    em_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
 	    em_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 
 
 	em_add_hw_stats(adapter);
 
 	/* Non-AMT based hardware can now take control from firmware */
 	if (adapter->has_manage && !adapter->has_amt)
 		em_get_hw_control(adapter);
 
 	/* Tell the stack that the interface is not active */
 	if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
 
 	adapter->led_dev = led_create(em_led_func, adapter,
 	    device_get_nameunit(dev));
 #ifdef DEV_NETMAP
 	em_netmap_attach(adapter);
 #endif /* DEV_NETMAP */
 
 	INIT_DEBUGOUT("em_attach: end");
 
 	return (0);
 
 err_late:
 	em_free_transmit_structures(adapter);
 	em_free_receive_structures(adapter);
 	em_release_hw_control(adapter);
 	if (adapter->ifp != (void *)NULL)
 		if_free(adapter->ifp);
 err_pci:
 	em_free_pci_resources(adapter);
 	free(adapter->mta, M_DEVBUF);
 	EM_CORE_LOCK_DESTROY(adapter);
 
 	return (error);
 }
 
 /*********************************************************************
  *  Device removal routine
  *
  *  The detach entry point is called when the driver is being removed.
  *  This routine stops the adapter and deallocates all the resources
  *  that were allocated for driver operation.
  *
  *  return 0 on success, positive on failure
  *********************************************************************/
 
 static int
 em_detach(device_t dev)
 {
 	struct adapter	*adapter = device_get_softc(dev);
 	if_t ifp = adapter->ifp;
 
 	INIT_DEBUGOUT("em_detach: begin");
 
 	/* Make sure VLANS are not using driver */
 	if (if_vlantrunkinuse(ifp)) {
 		device_printf(dev,"Vlan in use, detach first\n");
 		return (EBUSY);
 	}
 
 #ifdef DEVICE_POLLING
 	if (if_getcapenable(ifp) & IFCAP_POLLING)
 		ether_poll_deregister(ifp);
 #endif
 
 	if (adapter->led_dev != NULL)
 		led_destroy(adapter->led_dev);
 
 	EM_CORE_LOCK(adapter);
 	adapter->in_detach = 1;
 	em_stop(adapter);
 	EM_CORE_UNLOCK(adapter);
 	EM_CORE_LOCK_DESTROY(adapter);
 
 	e1000_phy_hw_reset(&adapter->hw);
 
 	em_release_manageability(adapter);
 	em_release_hw_control(adapter);
 
 	/* Unregister VLAN events */
 	if (adapter->vlan_attach != NULL)
 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
 	if (adapter->vlan_detach != NULL)
 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 
 
 	ether_ifdetach(adapter->ifp);
 	callout_drain(&adapter->timer);
 
 #ifdef DEV_NETMAP
 	netmap_detach(ifp);
 #endif /* DEV_NETMAP */
 
 	em_free_pci_resources(adapter);
 	bus_generic_detach(dev);
 	if_free(ifp);
 
 	em_free_transmit_structures(adapter);
 	em_free_receive_structures(adapter);
 
 	em_release_hw_control(adapter);
 	free(adapter->mta, M_DEVBUF);
 
 	return (0);
 }
 
 /*********************************************************************
  *
  *  Shutdown entry point
  *
  **********************************************************************/
 
 static int
 em_shutdown(device_t dev)
 {
 	return em_suspend(dev);
 }
 
 /*
  * Suspend/resume device methods.
  */
 static int
 em_suspend(device_t dev)
 {
 	struct adapter *adapter = device_get_softc(dev);
 
 	EM_CORE_LOCK(adapter);
 
         em_release_manageability(adapter);
 	em_release_hw_control(adapter);
 	em_enable_wakeup(dev);
 
 	EM_CORE_UNLOCK(adapter);
 
 	return bus_generic_suspend(dev);
 }
 
 static int
 em_resume(device_t dev)
 {
 	struct adapter *adapter = device_get_softc(dev);
 	struct tx_ring	*txr = adapter->tx_rings;
 	if_t ifp = adapter->ifp;
 
 	EM_CORE_LOCK(adapter);
 	if (adapter->hw.mac.type == e1000_pch2lan)
 		e1000_resume_workarounds_pchlan(&adapter->hw);
 	em_init_locked(adapter);
 	em_init_manageability(adapter);
 
 	if ((if_getflags(ifp) & IFF_UP) &&
 	    (if_getdrvflags(ifp) & IFF_DRV_RUNNING) && adapter->link_active) {
 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
 			EM_TX_LOCK(txr);
 #ifdef EM_MULTIQUEUE
 			if (!drbr_empty(ifp, txr->br))
 				em_mq_start_locked(ifp, txr);
 #else
 			if (!if_sendq_empty(ifp))
 				em_start_locked(ifp, txr);
 #endif
 			EM_TX_UNLOCK(txr);
 		}
 	}
 	EM_CORE_UNLOCK(adapter);
 
 	return bus_generic_resume(dev);
 }
 
 
 #ifndef EM_MULTIQUEUE
 static void
 em_start_locked(if_t ifp, struct tx_ring *txr)
 {
 	struct adapter	*adapter = if_getsoftc(ifp);
 	struct mbuf	*m_head;
 
 	EM_TX_LOCK_ASSERT(txr);
 
 	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
 	    IFF_DRV_RUNNING)
 		return;
 
 	if (!adapter->link_active)
 		return;
 
 	while (!if_sendq_empty(ifp)) {
         	/* Call cleanup if number of TX descriptors low */
 		if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD)
 			em_txeof(txr);
 		if (txr->tx_avail < EM_MAX_SCATTER) {
 			if_setdrvflagbits(ifp,IFF_DRV_OACTIVE, 0);
 			break;
 		}
 		m_head = if_dequeue(ifp);
 		if (m_head == NULL)
 			break;
 		/*
 		 *  Encapsulation can modify our pointer, and or make it
 		 *  NULL on failure.  In that event, we can't requeue.
 		 */
 		if (em_xmit(txr, &m_head)) {
 			if (m_head == NULL)
 				break;
 			if_sendq_prepend(ifp, m_head);
 			break;
 		}
 
 		/* Mark the queue as having work */
 		if (txr->busy == EM_TX_IDLE)
 			txr->busy = EM_TX_BUSY;
 
 		/* Send a copy of the frame to the BPF listener */
 		ETHER_BPF_MTAP(ifp, m_head);
 
 	}
 
 	return;
 }
 
 static void
 em_start(if_t ifp)
 {
 	struct adapter	*adapter = if_getsoftc(ifp);
 	struct tx_ring	*txr = adapter->tx_rings;
 
 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
 		EM_TX_LOCK(txr);
 		em_start_locked(ifp, txr);
 		EM_TX_UNLOCK(txr);
 	}
 	return;
 }
 #else /* EM_MULTIQUEUE */
 /*********************************************************************
  *  Multiqueue Transmit routines 
  *
  *  em_mq_start is called by the stack to initiate a transmit.
  *  however, if busy the driver can queue the request rather
  *  than do an immediate send. It is this that is an advantage
  *  in this driver, rather than also having multiple tx queues.
  **********************************************************************/
 /*
 ** Multiqueue capable stack interface
 */
 static int
 em_mq_start(if_t ifp, struct mbuf *m)
 {
 	struct adapter	*adapter = if_getsoftc(ifp);
 	struct tx_ring	*txr = adapter->tx_rings;
 	unsigned int	i, error;
 
 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
 		i = m->m_pkthdr.flowid % adapter->num_queues;
 	else
 		i = curcpu % adapter->num_queues;
 
 	txr = &adapter->tx_rings[i];
 
 	error = drbr_enqueue(ifp, txr->br, m);
 	if (error)
 		return (error);
 
 	if (EM_TX_TRYLOCK(txr)) {
 		em_mq_start_locked(ifp, txr);
 		EM_TX_UNLOCK(txr);
 	} else 
 		taskqueue_enqueue(txr->tq, &txr->tx_task);
 
 	return (0);
 }
 
 static int
 em_mq_start_locked(if_t ifp, struct tx_ring *txr)
 {
 	struct adapter  *adapter = txr->adapter;
         struct mbuf     *next;
         int             err = 0, enq = 0;
 
 	EM_TX_LOCK_ASSERT(txr);
 
 	if (((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) ||
 	    adapter->link_active == 0) {
 		return (ENETDOWN);
 	}
 
 	/* Process the queue */
 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
 		if ((err = em_xmit(txr, &next)) != 0) {
 			if (next == NULL) {
 				/* It was freed, move forward */
 				drbr_advance(ifp, txr->br);
 			} else {
 				/* 
 				 * Still have one left, it may not be
 				 * the same since the transmit function
 				 * may have changed it.
 				 */
 				drbr_putback(ifp, txr->br, next);
 			}
 			break;
 		}
 		drbr_advance(ifp, txr->br);
 		enq++;
 		if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len);
 		if (next->m_flags & M_MCAST)
 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
 		ETHER_BPF_MTAP(ifp, next);
 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
                         break;
 	}
 
 	/* Mark the queue as having work */
 	if ((enq > 0) && (txr->busy == EM_TX_IDLE))
 		txr->busy = EM_TX_BUSY;
 
 	if (txr->tx_avail < EM_MAX_SCATTER)
 		em_txeof(txr);
 	if (txr->tx_avail < EM_MAX_SCATTER) {
 		if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
 	}
 	return (err);
 }
 
 /*
 ** Flush all ring buffers
 */
 static void
 em_qflush(if_t ifp)
 {
 	struct adapter  *adapter = if_getsoftc(ifp);
 	struct tx_ring  *txr = adapter->tx_rings;
 	struct mbuf     *m;
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
 		EM_TX_LOCK(txr);
 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
 			m_freem(m);
 		EM_TX_UNLOCK(txr);
 	}
 	if_qflush(ifp);
 }
 #endif /* EM_MULTIQUEUE */
 
 /*********************************************************************
  *  Ioctl entry point
  *
  *  em_ioctl is called when the user wants to configure the
  *  interface.
  *
  *  return 0 on success, positive on failure
  **********************************************************************/
 
 static int
 em_ioctl(if_t ifp, u_long command, caddr_t data)
 {
 	struct adapter	*adapter = if_getsoftc(ifp);
 	struct ifreq	*ifr = (struct ifreq *)data;
 #if defined(INET) || defined(INET6)
 	struct ifaddr	*ifa = (struct ifaddr *)data;
 #endif
 	bool		avoid_reset = FALSE;
 	int		error = 0;
 
 	if (adapter->in_detach)
 		return (error);
 
 	switch (command) {
 	case SIOCSIFADDR:
 #ifdef INET
 		if (ifa->ifa_addr->sa_family == AF_INET)
 			avoid_reset = TRUE;
 #endif
 #ifdef INET6
 		if (ifa->ifa_addr->sa_family == AF_INET6)
 			avoid_reset = TRUE;
 #endif
 		/*
 		** Calling init results in link renegotiation,
 		** so we avoid doing it when possible.
 		*/
 		if (avoid_reset) {
 			if_setflagbits(ifp,IFF_UP,0);
 			if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING))
 				em_init(adapter);
 #ifdef INET
 			if (!(if_getflags(ifp) & IFF_NOARP))
 				arp_ifinit(ifp, ifa);
 #endif
 		} else
 			error = ether_ioctl(ifp, command, data);
 		break;
 	case SIOCSIFMTU:
 	    {
 		int max_frame_size;
 
 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
 
 		EM_CORE_LOCK(adapter);
 		switch (adapter->hw.mac.type) {
 		case e1000_82571:
 		case e1000_82572:
 		case e1000_ich9lan:
 		case e1000_ich10lan:
 		case e1000_pch2lan:
 		case e1000_pch_lpt:
 		case e1000_82574:
 		case e1000_82583:
 		case e1000_80003es2lan:	/* 9K Jumbo Frame size */
 			max_frame_size = 9234;
 			break;
 		case e1000_pchlan:
 			max_frame_size = 4096;
 			break;
 			/* Adapters that do not support jumbo frames */
 		case e1000_ich8lan:
 			max_frame_size = ETHER_MAX_LEN;
 			break;
 		default:
 			max_frame_size = MAX_JUMBO_FRAME_SIZE;
 		}
 		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
 		    ETHER_CRC_LEN) {
 			EM_CORE_UNLOCK(adapter);
 			error = EINVAL;
 			break;
 		}
 
 		if_setmtu(ifp, ifr->ifr_mtu);
 		adapter->hw.mac.max_frame_size =
 		    if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
 		em_init_locked(adapter);
 		EM_CORE_UNLOCK(adapter);
 		break;
 	    }
 	case SIOCSIFFLAGS:
 		IOCTL_DEBUGOUT("ioctl rcv'd:\
 		    SIOCSIFFLAGS (Set Interface Flags)");
 		EM_CORE_LOCK(adapter);
 		if (if_getflags(ifp) & IFF_UP) {
 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
 				if ((if_getflags(ifp) ^ adapter->if_flags) &
 				    (IFF_PROMISC | IFF_ALLMULTI)) {
 					em_disable_promisc(adapter);
 					em_set_promisc(adapter);
 				}
 			} else
 				em_init_locked(adapter);
 		} else
 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
 				em_stop(adapter);
 		adapter->if_flags = if_getflags(ifp);
 		EM_CORE_UNLOCK(adapter);
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
 			EM_CORE_LOCK(adapter);
 			em_disable_intr(adapter);
 			em_set_multi(adapter);
 #ifdef DEVICE_POLLING
 			if (!(if_getcapenable(ifp) & IFCAP_POLLING))
 #endif
 				em_enable_intr(adapter);
 			EM_CORE_UNLOCK(adapter);
 		}
 		break;
 	case SIOCSIFMEDIA:
 		/* Check SOL/IDER usage */
 		EM_CORE_LOCK(adapter);
 		if (e1000_check_reset_block(&adapter->hw)) {
 			EM_CORE_UNLOCK(adapter);
 			device_printf(adapter->dev, "Media change is"
 			    " blocked due to SOL/IDER session.\n");
 			break;
 		}
 		EM_CORE_UNLOCK(adapter);
 		/* falls thru */
 	case SIOCGIFMEDIA:
 		IOCTL_DEBUGOUT("ioctl rcv'd: \
 		    SIOCxIFMEDIA (Get/Set Interface Media)");
 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
 		break;
 	case SIOCSIFCAP:
 	    {
 		int mask, reinit;
 
 		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
 		reinit = 0;
 		mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
 #ifdef DEVICE_POLLING
 		if (mask & IFCAP_POLLING) {
 			if (ifr->ifr_reqcap & IFCAP_POLLING) {
 				error = ether_poll_register(em_poll, ifp);
 				if (error)
 					return (error);
 				EM_CORE_LOCK(adapter);
 				em_disable_intr(adapter);
 				if_setcapenablebit(ifp, IFCAP_POLLING, 0);
 				EM_CORE_UNLOCK(adapter);
 			} else {
 				error = ether_poll_deregister(ifp);
 				/* Enable interrupt even in error case */
 				EM_CORE_LOCK(adapter);
 				em_enable_intr(adapter);
 				if_setcapenablebit(ifp, 0, IFCAP_POLLING);
 				EM_CORE_UNLOCK(adapter);
 			}
 		}
 #endif
 		if (mask & IFCAP_HWCSUM) {
 			if_togglecapenable(ifp,IFCAP_HWCSUM);
 			reinit = 1;
 		}
 		if (mask & IFCAP_TSO4) {
 			if_togglecapenable(ifp,IFCAP_TSO4);
 			reinit = 1;
 		}
 		if (mask & IFCAP_VLAN_HWTAGGING) {
 			if_togglecapenable(ifp,IFCAP_VLAN_HWTAGGING);
 			reinit = 1;
 		}
 		if (mask & IFCAP_VLAN_HWFILTER) {
 			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
 			reinit = 1;
 		}
 		if (mask & IFCAP_VLAN_HWTSO) {
 			if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
 			reinit = 1;
 		}
 		if ((mask & IFCAP_WOL) &&
 		    (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
 			if (mask & IFCAP_WOL_MCAST)
 				if_togglecapenable(ifp, IFCAP_WOL_MCAST);
 			if (mask & IFCAP_WOL_MAGIC)
 				if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
 		}
 		if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
 			em_init(adapter);
 		if_vlancap(ifp);
 		break;
 	    }
 
 	default:
 		error = ether_ioctl(ifp, command, data);
 		break;
 	}
 
 	return (error);
 }
 
 
 /*********************************************************************
  *  Init entry point
  *
  *  This routine is used in two ways. It is used by the stack as
  *  init entry point in network interface structure. It is also used
  *  by the driver as a hw/sw initialization routine to get to a
  *  consistent state.
  *
  *  return 0 on success, positive on failure
  **********************************************************************/
 
 static void
 em_init_locked(struct adapter *adapter)
 {
 	if_t ifp = adapter->ifp;
 	device_t	dev = adapter->dev;
 
 	INIT_DEBUGOUT("em_init: begin");
 
 	EM_CORE_LOCK_ASSERT(adapter);
 
 	em_disable_intr(adapter);
 	callout_stop(&adapter->timer);
 
 	/* Get the latest mac address, User can use a LAA */
         bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
               ETHER_ADDR_LEN);
 
 	/* Put the address into the Receive Address Array */
 	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
 
 	/*
 	 * With the 82571 adapter, RAR[0] may be overwritten
 	 * when the other port is reset, we make a duplicate
 	 * in RAR[14] for that eventuality, this assures
 	 * the interface continues to function.
 	 */
 	if (adapter->hw.mac.type == e1000_82571) {
 		e1000_set_laa_state_82571(&adapter->hw, TRUE);
 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr,
 		    E1000_RAR_ENTRIES - 1);
 	}
 
 	/* Initialize the hardware */
 	em_reset(adapter);
 	em_update_link_status(adapter);
 
 	/* Setup VLAN support, basic and offload if available */
 	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
 
 	/* Set hardware offload abilities */
 	if_clearhwassist(ifp);
 	if (if_getcapenable(ifp) & IFCAP_TXCSUM)
 		if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
 	if (if_getcapenable(ifp) & IFCAP_TSO4)
 		if_sethwassistbits(ifp, CSUM_TSO, 0);
 
 	/* Configure for OS presence */
 	em_init_manageability(adapter);
 
 	/* Prepare transmit descriptors and buffers */
 	em_setup_transmit_structures(adapter);
 	em_initialize_transmit_unit(adapter);
 
 	/* Setup Multicast table */
 	em_set_multi(adapter);
 
 	/*
 	** Figure out the desired mbuf
 	** pool for doing jumbos
 	*/
 	if (adapter->hw.mac.max_frame_size <= 2048)
 		adapter->rx_mbuf_sz = MCLBYTES;
 	else if (adapter->hw.mac.max_frame_size <= 4096)
 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
 	else
 		adapter->rx_mbuf_sz = MJUM9BYTES;
 
 	/* Prepare receive descriptors and buffers */
 	if (em_setup_receive_structures(adapter)) {
 		device_printf(dev, "Could not setup receive structures\n");
 		em_stop(adapter);
 		return;
 	}
 	em_initialize_receive_unit(adapter);
 
 	/* Use real VLAN Filter support? */
 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
 		if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
 			/* Use real VLAN Filter support */
 			em_setup_vlan_hw_support(adapter);
 		else {
 			u32 ctrl;
 			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
 			ctrl |= E1000_CTRL_VME;
 			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
 		}
 	}
 
 	/* Don't lose promiscuous settings */
 	em_set_promisc(adapter);
 
 	/* Set the interface as ACTIVE */
 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
 
 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
 	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
 
 	/* MSI/X configuration for 82574 */
 	if (adapter->hw.mac.type == e1000_82574) {
 		int tmp;
 		tmp = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
 		tmp |= E1000_CTRL_EXT_PBA_CLR;
 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, tmp);
 		/* Set the IVAR - interrupt vector routing. */
 		E1000_WRITE_REG(&adapter->hw, E1000_IVAR, adapter->ivars);
 	}
 
 #ifdef DEVICE_POLLING
 	/*
 	 * Only enable interrupts if we are not polling, make sure
 	 * they are off otherwise.
 	 */
 	if (if_getcapenable(ifp) & IFCAP_POLLING)
 		em_disable_intr(adapter);
 	else
 #endif /* DEVICE_POLLING */
 		em_enable_intr(adapter);
 
 	/* AMT based hardware can now take control from firmware */
 	if (adapter->has_manage && adapter->has_amt)
 		em_get_hw_control(adapter);
 }
 
 static void
 em_init(void *arg)
 {
 	struct adapter *adapter = arg;
 
 	EM_CORE_LOCK(adapter);
 	em_init_locked(adapter);
 	EM_CORE_UNLOCK(adapter);
 }
 
 
 #ifdef DEVICE_POLLING
 /*********************************************************************
  *
  *  Legacy polling routine: note this only works with single queue
  *
  *********************************************************************/
 static int
 em_poll(if_t ifp, enum poll_cmd cmd, int count)
 {
 	struct adapter *adapter = if_getsoftc(ifp);
 	struct tx_ring	*txr = adapter->tx_rings;
 	struct rx_ring	*rxr = adapter->rx_rings;
 	u32		reg_icr;
 	int		rx_done;
 
 	EM_CORE_LOCK(adapter);
 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
 		EM_CORE_UNLOCK(adapter);
 		return (0);
 	}
 
 	if (cmd == POLL_AND_CHECK_STATUS) {
 		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
 			callout_stop(&adapter->timer);
 			adapter->hw.mac.get_link_status = 1;
 			em_update_link_status(adapter);
 			callout_reset(&adapter->timer, hz,
 			    em_local_timer, adapter);
 		}
 	}
 	EM_CORE_UNLOCK(adapter);
 
 	em_rxeof(rxr, count, &rx_done);
 
 	EM_TX_LOCK(txr);
 	em_txeof(txr);
 #ifdef EM_MULTIQUEUE
 	if (!drbr_empty(ifp, txr->br))
 		em_mq_start_locked(ifp, txr);
 #else
 	if (!if_sendq_empty(ifp))
 		em_start_locked(ifp, txr);
 #endif
 	EM_TX_UNLOCK(txr);
 
 	return (rx_done);
 }
 #endif /* DEVICE_POLLING */
 
 
 /*********************************************************************
  *
  *  Fast Legacy/MSI Combined Interrupt Service routine  
  *
  *********************************************************************/
 static int
 em_irq_fast(void *arg)
 {
 	struct adapter	*adapter = arg;
 	if_t ifp;
 	u32		reg_icr;
 
 	ifp = adapter->ifp;
 
 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
 
 	/* Hot eject?  */
 	if (reg_icr == 0xffffffff)
 		return FILTER_STRAY;
 
 	/* Definitely not our interrupt.  */
 	if (reg_icr == 0x0)
 		return FILTER_STRAY;
 
 	/*
 	 * Starting with the 82571 chip, bit 31 should be used to
 	 * determine whether the interrupt belongs to us.
 	 */
 	if (adapter->hw.mac.type >= e1000_82571 &&
 	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
 		return FILTER_STRAY;
 
 	em_disable_intr(adapter);
 	taskqueue_enqueue(adapter->tq, &adapter->que_task);
 
 	/* Link status change */
 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
 		adapter->hw.mac.get_link_status = 1;
 		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
 	}
 
 	if (reg_icr & E1000_ICR_RXO)
 		adapter->rx_overruns++;
 	return FILTER_HANDLED;
 }
 
 /* Combined RX/TX handler, used by Legacy and MSI */
 static void
 em_handle_que(void *context, int pending)
 {
 	struct adapter	*adapter = context;
 	if_t ifp = adapter->ifp;
 	struct tx_ring	*txr = adapter->tx_rings;
 	struct rx_ring	*rxr = adapter->rx_rings;
 
 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
 
 		EM_TX_LOCK(txr);
 		em_txeof(txr);
 #ifdef EM_MULTIQUEUE
 		if (!drbr_empty(ifp, txr->br))
 			em_mq_start_locked(ifp, txr);
 #else
 		if (!if_sendq_empty(ifp))
 			em_start_locked(ifp, txr);
 #endif
 		EM_TX_UNLOCK(txr);
 		if (more) {
 			taskqueue_enqueue(adapter->tq, &adapter->que_task);
 			return;
 		}
 	}
 
 	em_enable_intr(adapter);
 	return;
 }
 
 
 /*********************************************************************
  *
  *  MSIX Interrupt Service Routines
  *
  **********************************************************************/
 static void
 em_msix_tx(void *arg)
 {
 	struct tx_ring *txr = arg;
 	struct adapter *adapter = txr->adapter;
 	if_t ifp = adapter->ifp;
 
 	++txr->tx_irq;
 	EM_TX_LOCK(txr);
 	em_txeof(txr);
 #ifdef EM_MULTIQUEUE
 	if (!drbr_empty(ifp, txr->br))
 		em_mq_start_locked(ifp, txr);
 #else
 	if (!if_sendq_empty(ifp))
 		em_start_locked(ifp, txr);
 #endif
 
 	/* Reenable this interrupt */
 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
 	EM_TX_UNLOCK(txr);
 	return;
 }
 
 /*********************************************************************
  *
  *  MSIX RX Interrupt Service routine
  *
  **********************************************************************/
 
 static void
 em_msix_rx(void *arg)
 {
 	struct rx_ring	*rxr = arg;
 	struct adapter	*adapter = rxr->adapter;
 	bool		more;
 
 	++rxr->rx_irq;
 	if (!(if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING))
 		return;
 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
 	if (more)
 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
 	else {
 		/* Reenable this interrupt */
 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
 	}
 	return;
 }
 
 /*********************************************************************
  *
  *  MSIX Link Fast Interrupt Service routine
  *
  **********************************************************************/
 static void
 em_msix_link(void *arg)
 {
 	struct adapter	*adapter = arg;
 	u32		reg_icr;
 
 	++adapter->link_irq;
 	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
 
 	if (reg_icr & E1000_ICR_RXO)
 		adapter->rx_overruns++;
 
 	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
 		adapter->hw.mac.get_link_status = 1;
 		em_handle_link(adapter, 0);
 	} else
 		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
 		    EM_MSIX_LINK | E1000_IMS_LSC);
 	/*
  	** Because we must read the ICR for this interrupt
  	** it may clear other causes using autoclear, for
  	** this reason we simply create a soft interrupt
  	** for all these vectors.
  	*/
 	if (reg_icr) {
 		E1000_WRITE_REG(&adapter->hw,
 			E1000_ICS, adapter->ims);
 	}
 	return;
 }
 
 static void
 em_handle_rx(void *context, int pending)
 {
 	struct rx_ring	*rxr = context;
 	struct adapter	*adapter = rxr->adapter;
         bool            more;
 
 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
 	if (more)
 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
 	else {
 		/* Reenable this interrupt */
 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
 	}
 }
 
 static void
 em_handle_tx(void *context, int pending)
 {
 	struct tx_ring	*txr = context;
 	struct adapter	*adapter = txr->adapter;
 	if_t ifp = adapter->ifp;
 
 	EM_TX_LOCK(txr);
 	em_txeof(txr);
 #ifdef EM_MULTIQUEUE
 	if (!drbr_empty(ifp, txr->br))
 		em_mq_start_locked(ifp, txr);
 #else
 	if (!if_sendq_empty(ifp))
 		em_start_locked(ifp, txr);
 #endif
 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
 	EM_TX_UNLOCK(txr);
 }
 
 static void
 em_handle_link(void *context, int pending)
 {
 	struct adapter	*adapter = context;
 	struct tx_ring	*txr = adapter->tx_rings;
 	if_t ifp = adapter->ifp;
 
 	if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
 		return;
 
 	EM_CORE_LOCK(adapter);
 	callout_stop(&adapter->timer);
 	em_update_link_status(adapter);
 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
 	E1000_WRITE_REG(&adapter->hw, E1000_IMS,
 	    EM_MSIX_LINK | E1000_IMS_LSC);
 	if (adapter->link_active) {
 		for (int i = 0; i < adapter->num_queues; i++, txr++) {
 			EM_TX_LOCK(txr);
 #ifdef EM_MULTIQUEUE
 			if (!drbr_empty(ifp, txr->br))
 				em_mq_start_locked(ifp, txr);
 #else
 			if (if_sendq_empty(ifp))
 				em_start_locked(ifp, txr);
 #endif
 			EM_TX_UNLOCK(txr);
 		}
 	}
 	EM_CORE_UNLOCK(adapter);
 }
 
 
 /*********************************************************************
  *
  *  Media Ioctl callback
  *
  *  This routine is called whenever the user queries the status of
  *  the interface using ifconfig.
  *
  **********************************************************************/
 static void
 em_media_status(if_t ifp, struct ifmediareq *ifmr)
 {
 	struct adapter *adapter = if_getsoftc(ifp);
 	u_char fiber_type = IFM_1000_SX;
 
 	INIT_DEBUGOUT("em_media_status: begin");
 
 	EM_CORE_LOCK(adapter);
 	em_update_link_status(adapter);
 
 	ifmr->ifm_status = IFM_AVALID;
 	ifmr->ifm_active = IFM_ETHER;
 
 	if (!adapter->link_active) {
 		EM_CORE_UNLOCK(adapter);
 		return;
 	}
 
 	ifmr->ifm_status |= IFM_ACTIVE;
 
 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
 		ifmr->ifm_active |= fiber_type | IFM_FDX;
 	} else {
 		switch (adapter->link_speed) {
 		case 10:
 			ifmr->ifm_active |= IFM_10_T;
 			break;
 		case 100:
 			ifmr->ifm_active |= IFM_100_TX;
 			break;
 		case 1000:
 			ifmr->ifm_active |= IFM_1000_T;
 			break;
 		}
 		if (adapter->link_duplex == FULL_DUPLEX)
 			ifmr->ifm_active |= IFM_FDX;
 		else
 			ifmr->ifm_active |= IFM_HDX;
 	}
 	EM_CORE_UNLOCK(adapter);
 }
 
 /*********************************************************************
  *
  *  Media Ioctl callback
  *
  *  This routine is called when the user changes speed/duplex using
  *  media/mediopt option with ifconfig.
  *
  **********************************************************************/
 static int
 em_media_change(if_t ifp)
 {
 	struct adapter *adapter = if_getsoftc(ifp);
 	struct ifmedia  *ifm = &adapter->media;
 
 	INIT_DEBUGOUT("em_media_change: begin");
 
 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
 		return (EINVAL);
 
 	EM_CORE_LOCK(adapter);
 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
 	case IFM_AUTO:
 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
 		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
 		break;
 	case IFM_1000_LX:
 	case IFM_1000_SX:
 	case IFM_1000_T:
 		adapter->hw.mac.autoneg = DO_AUTO_NEG;
 		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
 		break;
 	case IFM_100_TX:
 		adapter->hw.mac.autoneg = FALSE;
 		adapter->hw.phy.autoneg_advertised = 0;
 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
 		else
 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
 		break;
 	case IFM_10_T:
 		adapter->hw.mac.autoneg = FALSE;
 		adapter->hw.phy.autoneg_advertised = 0;
 		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
 		else
 			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
 		break;
 	default:
 		device_printf(adapter->dev, "Unsupported media type\n");
 	}
 
 	em_init_locked(adapter);
 	EM_CORE_UNLOCK(adapter);
 
 	return (0);
 }
 
 /*********************************************************************
  *
  *  This routine maps the mbufs to tx descriptors.
  *
  *  return 0 on success, positive on failure
  **********************************************************************/
 
 static int
 em_xmit(struct tx_ring *txr, struct mbuf **m_headp)
 {
 	struct adapter		*adapter = txr->adapter;
 	bus_dma_segment_t	segs[EM_MAX_SCATTER];
 	bus_dmamap_t		map;
-	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
+	struct em_txbuffer	*tx_buffer, *tx_buffer_mapped;
 	struct e1000_tx_desc	*ctxd = NULL;
 	struct mbuf		*m_head;
 	struct ether_header	*eh;
 	struct ip		*ip = NULL;
 	struct tcphdr		*tp = NULL;
 	u32			txd_upper = 0, txd_lower = 0;
 	int			ip_off, poff;
 	int			nsegs, i, j, first, last = 0;
 	int			error;
 	bool			do_tso, tso_desc, remap = TRUE;
 
 	m_head = *m_headp;
 	do_tso = (m_head->m_pkthdr.csum_flags & CSUM_TSO);
 	tso_desc = FALSE;
 	ip_off = poff = 0;
 
 	/*
 	 * Intel recommends entire IP/TCP header length reside in a single
 	 * buffer. If multiple descriptors are used to describe the IP and
 	 * TCP header, each descriptor should describe one or more
 	 * complete headers; descriptors referencing only parts of headers
 	 * are not supported. If all layer headers are not coalesced into
 	 * a single buffer, each buffer should not cross a 4KB boundary,
 	 * or be larger than the maximum read request size.
 	 * Controller also requires modifing IP/TCP header to make TSO work
 	 * so we firstly get a writable mbuf chain then coalesce ethernet/
 	 * IP/TCP header into a single buffer to meet the requirement of
 	 * controller. This also simplifies IP/TCP/UDP checksum offloading
 	 * which also has similiar restrictions.
 	 */
 	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
 		if (do_tso || (m_head->m_next != NULL && 
 		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
 			if (M_WRITABLE(*m_headp) == 0) {
 				m_head = m_dup(*m_headp, M_NOWAIT);
 				m_freem(*m_headp);
 				if (m_head == NULL) {
 					*m_headp = NULL;
 					return (ENOBUFS);
 				}
 				*m_headp = m_head;
 			}
 		}
 		/*
 		 * XXX
 		 * Assume IPv4, we don't have TSO/checksum offload support
 		 * for IPv6 yet.
 		 */
 		ip_off = sizeof(struct ether_header);
 		if (m_head->m_len < ip_off) {
 			m_head = m_pullup(m_head, ip_off);
 			if (m_head == NULL) {
 				*m_headp = NULL;
 				return (ENOBUFS);
 			}
 		}
 		eh = mtod(m_head, struct ether_header *);
 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
 			ip_off = sizeof(struct ether_vlan_header);
 			if (m_head->m_len < ip_off) {
 				m_head = m_pullup(m_head, ip_off);
 				if (m_head == NULL) {
 					*m_headp = NULL;
 					return (ENOBUFS);
 				}
 			}
 		}
 		if (m_head->m_len < ip_off + sizeof(struct ip)) {
 			m_head = m_pullup(m_head, ip_off + sizeof(struct ip));
 			if (m_head == NULL) {
 				*m_headp = NULL;
 				return (ENOBUFS);
 			}
 		}
 		ip = (struct ip *)(mtod(m_head, char *) + ip_off);
 		poff = ip_off + (ip->ip_hl << 2);
 
 		if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP)) {
 			if (m_head->m_len < poff + sizeof(struct tcphdr)) {
 				m_head = m_pullup(m_head, poff +
 				    sizeof(struct tcphdr));
 				if (m_head == NULL) {
 					*m_headp = NULL;
 					return (ENOBUFS);
 				}
 			}
 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
 			/*
 			 * TSO workaround:
 			 *   pull 4 more bytes of data into it.
 			 */
 			if (m_head->m_len < poff + (tp->th_off << 2)) {
 				m_head = m_pullup(m_head, poff +
 				                 (tp->th_off << 2) +
 				                 TSO_WORKAROUND);
 				if (m_head == NULL) {
 					*m_headp = NULL;
 					return (ENOBUFS);
 				}
 			}
 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
 			tp = (struct tcphdr *)(mtod(m_head, char *) + poff);
 			if (do_tso) {
 				ip->ip_len = htons(m_head->m_pkthdr.tso_segsz +
 				                  (ip->ip_hl << 2) +
 				                  (tp->th_off << 2));
 				ip->ip_sum = 0;
 				/*
 				 * The pseudo TCP checksum does not include TCP
 				 * payload length so driver should recompute
 				 * the checksum here what hardware expect to
 				 * see. This is adherence of Microsoft's Large
 				 * Send specification.
 			 	*/
 				tp->th_sum = in_pseudo(ip->ip_src.s_addr,
 				    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
 			}
 		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
 			if (m_head->m_len < poff + sizeof(struct udphdr)) {
 				m_head = m_pullup(m_head, poff +
 				    sizeof(struct udphdr));
 				if (m_head == NULL) {
 					*m_headp = NULL;
 					return (ENOBUFS);
 				}
 			}
 			ip = (struct ip *)(mtod(m_head, char *) + ip_off);
 		}
 		*m_headp = m_head;
 	}
 
 	/*
 	 * Map the packet for DMA
 	 *
 	 * Capture the first descriptor index,
 	 * this descriptor will have the index
 	 * of the EOP which is the only one that
 	 * now gets a DONE bit writeback.
 	 */
 	first = txr->next_avail_desc;
 	tx_buffer = &txr->tx_buffers[first];
 	tx_buffer_mapped = tx_buffer;
 	map = tx_buffer->map;
 
 retry:
 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
 	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
 
 	/*
 	 * There are two types of errors we can (try) to handle:
 	 * - EFBIG means the mbuf chain was too long and bus_dma ran
 	 *   out of segments.  Defragment the mbuf chain and try again.
 	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
 	 *   at this point in time.  Defer sending and try again later.
 	 * All other errors, in particular EINVAL, are fatal and prevent the
 	 * mbuf chain from ever going through.  Drop it and report error.
 	 */
 	if (error == EFBIG && remap) {
 		struct mbuf *m;
 
 		m = m_defrag(*m_headp, M_NOWAIT);
 		if (m == NULL) {
 			adapter->mbuf_alloc_failed++;
 			m_freem(*m_headp);
 			*m_headp = NULL;
 			return (ENOBUFS);
 		}
 		*m_headp = m;
 
 		/* Try it again, but only once */
 		remap = FALSE;
 		goto retry;
 	} else if (error != 0) {
 		adapter->no_tx_dma_setup++;
 		m_freem(*m_headp);
 		*m_headp = NULL;
 		return (error);
 	}
 
 	/*
 	 * TSO Hardware workaround, if this packet is not
 	 * TSO, and is only a single descriptor long, and
 	 * it follows a TSO burst, then we need to add a
 	 * sentinel descriptor to prevent premature writeback.
 	 */
 	if ((!do_tso) && (txr->tx_tso == TRUE)) {
 		if (nsegs == 1)
 			tso_desc = TRUE;
 		txr->tx_tso = FALSE;
 	}
 
         if (nsegs > (txr->tx_avail - EM_MAX_SCATTER)) {
                 txr->no_desc_avail++;
 		bus_dmamap_unload(txr->txtag, map);
 		return (ENOBUFS);
         }
 	m_head = *m_headp;
 
 	/* Do hardware assists */
 	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
 		em_tso_setup(txr, m_head, ip_off, ip, tp,
 		    &txd_upper, &txd_lower);
 		/* we need to make a final sentinel transmit desc */
 		tso_desc = TRUE;
 	} else if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
 		em_transmit_checksum_setup(txr, m_head,
 		    ip_off, ip, &txd_upper, &txd_lower);
 
 	if (m_head->m_flags & M_VLANTAG) {
 		/* Set the vlan id. */
 		txd_upper |= htole16(if_getvtag(m_head)) << 16;
                 /* Tell hardware to add tag */
                 txd_lower |= htole32(E1000_TXD_CMD_VLE);
         }
 
 	i = txr->next_avail_desc;
 
 	/* Set up our transmit descriptors */
 	for (j = 0; j < nsegs; j++) {
 		bus_size_t seg_len;
 		bus_addr_t seg_addr;
 
 		tx_buffer = &txr->tx_buffers[i];
 		ctxd = &txr->tx_base[i];
 		seg_addr = segs[j].ds_addr;
 		seg_len  = segs[j].ds_len;
 		/*
 		** TSO Workaround:
 		** If this is the last descriptor, we want to
 		** split it so we have a small final sentinel
 		*/
 		if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
 			seg_len -= TSO_WORKAROUND;
 			ctxd->buffer_addr = htole64(seg_addr);
 			ctxd->lower.data = htole32(
 				adapter->txd_cmd | txd_lower | seg_len);
 			ctxd->upper.data = htole32(txd_upper);
 			if (++i == adapter->num_tx_desc)
 				i = 0;
 
 			/* Now make the sentinel */	
 			txr->tx_avail--;
 			ctxd = &txr->tx_base[i];
 			tx_buffer = &txr->tx_buffers[i];
 			ctxd->buffer_addr =
 			    htole64(seg_addr + seg_len);
 			ctxd->lower.data = htole32(
 			adapter->txd_cmd | txd_lower | TSO_WORKAROUND);
 			ctxd->upper.data =
 			    htole32(txd_upper);
 			last = i;
 			if (++i == adapter->num_tx_desc)
 				i = 0;
 		} else {
 			ctxd->buffer_addr = htole64(seg_addr);
 			ctxd->lower.data = htole32(
 			adapter->txd_cmd | txd_lower | seg_len);
 			ctxd->upper.data = htole32(txd_upper);
 			last = i;
 			if (++i == adapter->num_tx_desc)
 				i = 0;
 		}
 		tx_buffer->m_head = NULL;
 		tx_buffer->next_eop = -1;
 	}
 
 	txr->next_avail_desc = i;
 	txr->tx_avail -= nsegs;
 
         tx_buffer->m_head = m_head;
 	/*
 	** Here we swap the map so the last descriptor,
 	** which gets the completion interrupt has the
 	** real map, and the first descriptor gets the
 	** unused map from this descriptor.
 	*/
 	tx_buffer_mapped->map = tx_buffer->map;
 	tx_buffer->map = map;
         bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
 
         /*
          * Last Descriptor of Packet
 	 * needs End Of Packet (EOP)
 	 * and Report Status (RS)
          */
         ctxd->lower.data |=
 	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
 	/*
 	 * Keep track in the first buffer which
 	 * descriptor will be written back
 	 */
 	tx_buffer = &txr->tx_buffers[first];
 	tx_buffer->next_eop = last;
 
 	/*
 	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
 	 * that this frame is available to transmit.
 	 */
 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
 
 	return (0);
 }
 
 static void
 em_set_promisc(struct adapter *adapter)
 {
 	if_t ifp = adapter->ifp;
 	u32		reg_rctl;
 
 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
 
 	if (if_getflags(ifp) & IFF_PROMISC) {
 		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
 		/* Turn this on if you want to see bad packets */
 		if (em_debug_sbp)
 			reg_rctl |= E1000_RCTL_SBP;
 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
 	} else if (if_getflags(ifp) & IFF_ALLMULTI) {
 		reg_rctl |= E1000_RCTL_MPE;
 		reg_rctl &= ~E1000_RCTL_UPE;
 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
 	}
 }
 
 static void
 em_disable_promisc(struct adapter *adapter)
 {
 	if_t		ifp = adapter->ifp;
 	u32		reg_rctl;
 	int		mcnt = 0;
 
 	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
 	reg_rctl &=  (~E1000_RCTL_UPE);
 	if (if_getflags(ifp) & IFF_ALLMULTI)
 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
 	else
 		mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
 	/* Don't disable if in MAX groups */
 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
 		reg_rctl &=  (~E1000_RCTL_MPE);
 	reg_rctl &=  (~E1000_RCTL_SBP);
 	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
 }
 
 
 /*********************************************************************
  *  Multicast Update
  *
  *  This routine is called whenever multicast address list is updated.
  *
  **********************************************************************/
 
 static void
 em_set_multi(struct adapter *adapter)
 {
 	if_t ifp = adapter->ifp;
 	u32 reg_rctl = 0;
 	u8  *mta; /* Multicast array memory */
 	int mcnt = 0;
 
 	IOCTL_DEBUGOUT("em_set_multi: begin");
 
 	mta = adapter->mta;
 	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
 
 	if (adapter->hw.mac.type == e1000_82542 && 
 	    adapter->hw.revision_id == E1000_REVISION_2) {
 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
 			e1000_pci_clear_mwi(&adapter->hw);
 		reg_rctl |= E1000_RCTL_RST;
 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
 		msec_delay(5);
 	}
 
 	if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
 
 	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
 		reg_rctl |= E1000_RCTL_MPE;
 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
 	} else
 		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
 
 	if (adapter->hw.mac.type == e1000_82542 && 
 	    adapter->hw.revision_id == E1000_REVISION_2) {
 		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
 		reg_rctl &= ~E1000_RCTL_RST;
 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
 		msec_delay(5);
 		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
 			e1000_pci_set_mwi(&adapter->hw);
 	}
 }
 
 
 /*********************************************************************
  *  Timer routine
  *
  *  This routine checks for link status and updates statistics.
  *
  **********************************************************************/
 
 static void
 em_local_timer(void *arg)
 {
 	struct adapter	*adapter = arg;
 	if_t ifp = adapter->ifp;
 	struct tx_ring	*txr = adapter->tx_rings;
 	struct rx_ring	*rxr = adapter->rx_rings;
 	u32		trigger = 0;
 
 	EM_CORE_LOCK_ASSERT(adapter);
 
 	em_update_link_status(adapter);
 	em_update_stats_counters(adapter);
 
 	/* Reset LAA into RAR[0] on 82571 */
 	if ((adapter->hw.mac.type == e1000_82571) &&
 	    e1000_get_laa_state_82571(&adapter->hw))
 		e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
 
 	/* Mask to use in the irq trigger */
 	if (adapter->msix_mem) {
 		for (int i = 0; i < adapter->num_queues; i++, rxr++)
 			trigger |= rxr->ims;
 		rxr = adapter->rx_rings;
 	} else
 		trigger = E1000_ICS_RXDMT0;
 
 	/*
 	** Check on the state of the TX queue(s), this 
 	** can be done without the lock because its RO
 	** and the HUNG state will be static if set.
 	*/
 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
 		if (txr->busy == EM_TX_HUNG)
 			goto hung;
 		if (txr->busy >= EM_TX_MAXTRIES)
 			txr->busy = EM_TX_HUNG;
 		/* Schedule a TX tasklet if needed */
 		if (txr->tx_avail <= EM_MAX_SCATTER)
 			taskqueue_enqueue(txr->tq, &txr->tx_task);
 	}
 	
 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
 #ifndef DEVICE_POLLING
 	/* Trigger an RX interrupt to guarantee mbuf refresh */
 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
 #endif
 	return;
 hung:
 	/* Looks like we're hung */
 	device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n",
 			txr->me);
 	em_print_debug_info(adapter);
 	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
 	adapter->watchdog_events++;
 	em_init_locked(adapter);
 }
 
 
 static void
 em_update_link_status(struct adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	if_t ifp = adapter->ifp;
 	device_t dev = adapter->dev;
 	struct tx_ring *txr = adapter->tx_rings;
 	u32 link_check = 0;
 
 	/* Get the cached link value or read phy for real */
 	switch (hw->phy.media_type) {
 	case e1000_media_type_copper:
 		if (hw->mac.get_link_status) {
 			/* Do the work to read phy */
 			e1000_check_for_link(hw);
 			link_check = !hw->mac.get_link_status;
 			if (link_check) /* ESB2 fix */
 				e1000_cfg_on_link_up(hw);
 		} else
 			link_check = TRUE;
 		break;
 	case e1000_media_type_fiber:
 		e1000_check_for_link(hw);
 		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
                                  E1000_STATUS_LU);
 		break;
 	case e1000_media_type_internal_serdes:
 		e1000_check_for_link(hw);
 		link_check = adapter->hw.mac.serdes_has_link;
 		break;
 	default:
 	case e1000_media_type_unknown:
 		break;
 	}
 
 	/* Now check for a transition */
 	if (link_check && (adapter->link_active == 0)) {
 		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
 		    &adapter->link_duplex);
 		/* Check if we must disable SPEED_MODE bit on PCI-E */
 		if ((adapter->link_speed != SPEED_1000) &&
 		    ((hw->mac.type == e1000_82571) ||
 		    (hw->mac.type == e1000_82572))) {
 			int tarc0;
 			tarc0 = E1000_READ_REG(hw, E1000_TARC(0));
 			tarc0 &= ~TARC_SPEED_MODE_BIT;
 			E1000_WRITE_REG(hw, E1000_TARC(0), tarc0);
 		}
 		if (bootverbose)
 			device_printf(dev, "Link is up %d Mbps %s\n",
 			    adapter->link_speed,
 			    ((adapter->link_duplex == FULL_DUPLEX) ?
 			    "Full Duplex" : "Half Duplex"));
 		adapter->link_active = 1;
 		adapter->smartspeed = 0;
 		if_setbaudrate(ifp, adapter->link_speed * 1000000);
 		if_link_state_change(ifp, LINK_STATE_UP);
 	} else if (!link_check && (adapter->link_active == 1)) {
 		if_setbaudrate(ifp, 0);
 		adapter->link_speed = 0;
 		adapter->link_duplex = 0;
 		if (bootverbose)
 			device_printf(dev, "Link is Down\n");
 		adapter->link_active = 0;
 		/* Link down, disable hang detection */
 		for (int i = 0; i < adapter->num_queues; i++, txr++)
 			txr->busy = EM_TX_IDLE;
 		if_link_state_change(ifp, LINK_STATE_DOWN);
 	}
 }
 
 /*********************************************************************
  *
  *  This routine disables all traffic on the adapter by issuing a
  *  global reset on the MAC and deallocates TX/RX buffers.
  *
  *  This routine should always be called with BOTH the CORE
  *  and TX locks.
  **********************************************************************/
 
 static void
 em_stop(void *arg)
 {
 	struct adapter	*adapter = arg;
 	if_t ifp = adapter->ifp;
 	struct tx_ring	*txr = adapter->tx_rings;
 
 	EM_CORE_LOCK_ASSERT(adapter);
 
 	INIT_DEBUGOUT("em_stop: begin");
 
 	em_disable_intr(adapter);
 	callout_stop(&adapter->timer);
 
 	/* Tell the stack that the interface is no longer active */
 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
 
         /* Disarm Hang Detection. */
 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
 		EM_TX_LOCK(txr);
 		txr->busy = EM_TX_IDLE;
 		EM_TX_UNLOCK(txr);
 	}
 
 	e1000_reset_hw(&adapter->hw);
 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
 
 	e1000_led_off(&adapter->hw);
 	e1000_cleanup_led(&adapter->hw);
 }
 
 
 /*********************************************************************
  *
  *  Determine hardware revision.
  *
  **********************************************************************/
 static void
 em_identify_hardware(struct adapter *adapter)
 {
 	device_t dev = adapter->dev;
 
 	/* Make sure our PCI config space has the necessary stuff set */
 	pci_enable_busmaster(dev);
 	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
 
 	/* Save off the information about this board */
 	adapter->hw.vendor_id = pci_get_vendor(dev);
 	adapter->hw.device_id = pci_get_device(dev);
 	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
 	adapter->hw.subsystem_vendor_id =
 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
 	adapter->hw.subsystem_device_id =
 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
 
 	/* Do Shared Code Init and Setup */
 	if (e1000_set_mac_type(&adapter->hw)) {
 		device_printf(dev, "Setup init failure\n");
 		return;
 	}
 }
 
 static int
 em_allocate_pci_resources(struct adapter *adapter)
 {
 	device_t	dev = adapter->dev;
 	int		rid;
 
 	rid = PCIR_BAR(0);
 	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
 	    &rid, RF_ACTIVE);
 	if (adapter->memory == NULL) {
 		device_printf(dev, "Unable to allocate bus resource: memory\n");
 		return (ENXIO);
 	}
 	adapter->osdep.mem_bus_space_tag =
 	    rman_get_bustag(adapter->memory);
 	adapter->osdep.mem_bus_space_handle =
 	    rman_get_bushandle(adapter->memory);
 	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
 
 	adapter->hw.back = &adapter->osdep;
 
 	return (0);
 }
 
 /*********************************************************************
  *
  *  Setup the Legacy or MSI Interrupt handler
  *
  **********************************************************************/
 int
 em_allocate_legacy(struct adapter *adapter)
 {
 	device_t dev = adapter->dev;
 	struct tx_ring	*txr = adapter->tx_rings;
 	int error, rid = 0;
 
 	/* Manually turn off all interrupts */
 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
 
 	if (adapter->msix == 1) /* using MSI */
 		rid = 1;
 	/* We allocate a single interrupt resource */
 	adapter->res = bus_alloc_resource_any(dev,
 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
 	if (adapter->res == NULL) {
 		device_printf(dev, "Unable to allocate bus resource: "
 		    "interrupt\n");
 		return (ENXIO);
 	}
 
 	/*
 	 * Allocate a fast interrupt and the associated
 	 * deferred processing contexts.
 	 */
 	TASK_INIT(&adapter->que_task, 0, em_handle_que, adapter);
 	adapter->tq = taskqueue_create_fast("em_taskq", M_NOWAIT,
 	    taskqueue_thread_enqueue, &adapter->tq);
 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s que",
 	    device_get_nameunit(adapter->dev));
 	/* Use a TX only tasklet for local timer */
 	TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
 	txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
 	    taskqueue_thread_enqueue, &txr->tq);
 	taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq",
 	    device_get_nameunit(adapter->dev));
 	TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter);
 	if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET,
 	    em_irq_fast, NULL, adapter, &adapter->tag)) != 0) {
 		device_printf(dev, "Failed to register fast interrupt "
 			    "handler: %d\n", error);
 		taskqueue_free(adapter->tq);
 		adapter->tq = NULL;
 		return (error);
 	}
 	
 	return (0);
 }
 
 /*********************************************************************
  *
  *  Setup the MSIX Interrupt handlers
  *   This is not really Multiqueue, rather
  *   its just seperate interrupt vectors
  *   for TX, RX, and Link.
  *
  **********************************************************************/
 int
 em_allocate_msix(struct adapter *adapter)
 {
 	device_t	dev = adapter->dev;
 	struct		tx_ring *txr = adapter->tx_rings;
 	struct		rx_ring *rxr = adapter->rx_rings;
 	int		error, rid, vector = 0;
 	int		cpu_id = 0;
 
 
 	/* Make sure all interrupts are disabled */
 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
 
 	/* First set up ring resources */
 	for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) {
 
 		/* RX ring */
 		rid = vector + 1;
 
 		rxr->res = bus_alloc_resource_any(dev,
 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
 		if (rxr->res == NULL) {
 			device_printf(dev,
 			    "Unable to allocate bus resource: "
 			    "RX MSIX Interrupt %d\n", i);
 			return (ENXIO);
 		}
 		if ((error = bus_setup_intr(dev, rxr->res,
 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_rx,
 		    rxr, &rxr->tag)) != 0) {
 			device_printf(dev, "Failed to register RX handler");
 			return (error);
 		}
 #if __FreeBSD_version >= 800504
 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i);
 #endif
 		rxr->msix = vector;
 
 		if (em_last_bind_cpu < 0)
 			em_last_bind_cpu = CPU_FIRST();
 		cpu_id = em_last_bind_cpu;
 		bus_bind_intr(dev, rxr->res, cpu_id);
 
 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
 		    taskqueue_thread_enqueue, &rxr->tq);
 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)",
 		    device_get_nameunit(adapter->dev), cpu_id);
 		/*
 		** Set the bit to enable interrupt
 		** in E1000_IMS -- bits 20 and 21
 		** are for RX0 and RX1, note this has
 		** NOTHING to do with the MSIX vector
 		*/
 		rxr->ims = 1 << (20 + i);
 		adapter->ims |= rxr->ims;
 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
 
 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
 	}
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) {
 		/* TX ring */
 		rid = vector + 1;
 		txr->res = bus_alloc_resource_any(dev,
 		    SYS_RES_IRQ, &rid, RF_ACTIVE);
 		if (txr->res == NULL) {
 			device_printf(dev,
 			    "Unable to allocate bus resource: "
 			    "TX MSIX Interrupt %d\n", i);
 			return (ENXIO);
 		}
 		if ((error = bus_setup_intr(dev, txr->res,
 		    INTR_TYPE_NET | INTR_MPSAFE, NULL, em_msix_tx,
 		    txr, &txr->tag)) != 0) {
 			device_printf(dev, "Failed to register TX handler");
 			return (error);
 		}
 #if __FreeBSD_version >= 800504
 		bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i);
 #endif
 		txr->msix = vector;
 
                 if (em_last_bind_cpu < 0)
                         em_last_bind_cpu = CPU_FIRST();
                 cpu_id = em_last_bind_cpu;
                 bus_bind_intr(dev, txr->res, cpu_id);
 
 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
 		    taskqueue_thread_enqueue, &txr->tq);
 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)",
 		    device_get_nameunit(adapter->dev), cpu_id);
 		/*
 		** Set the bit to enable interrupt
 		** in E1000_IMS -- bits 22 and 23
 		** are for TX0 and TX1, note this has
 		** NOTHING to do with the MSIX vector
 		*/
 		txr->ims = 1 << (22 + i);
 		adapter->ims |= txr->ims;
 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
 
 		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
 	}
 
 	/* Link interrupt */
 	rid = vector + 1;
 	adapter->res = bus_alloc_resource_any(dev,
 	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
 	if (!adapter->res) {
 		device_printf(dev,"Unable to allocate "
 		    "bus resource: Link interrupt [%d]\n", rid);
 		return (ENXIO);
         }
 	/* Set the link handler function */
 	error = bus_setup_intr(dev, adapter->res,
 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
 	    em_msix_link, adapter, &adapter->tag);
 	if (error) {
 		adapter->res = NULL;
 		device_printf(dev, "Failed to register LINK handler");
 		return (error);
 	}
 #if __FreeBSD_version >= 800504
 	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
 #endif
 	adapter->linkvec = vector;
 	adapter->ivars |=  (8 | vector) << 16;
 	adapter->ivars |= 0x80000000;
 
 	return (0);
 }
 
 
 static void
 em_free_pci_resources(struct adapter *adapter)
 {
 	device_t	dev = adapter->dev;
 	struct tx_ring	*txr;
 	struct rx_ring	*rxr;
 	int		rid;
 
 
 	/*
 	** Release all the queue interrupt resources:
 	*/
 	for (int i = 0; i < adapter->num_queues; i++) {
 		txr = &adapter->tx_rings[i];
 		/* an early abort? */
 		if (txr == NULL)
 			break;
 		rid = txr->msix +1;
 		if (txr->tag != NULL) {
 			bus_teardown_intr(dev, txr->res, txr->tag);
 			txr->tag = NULL;
 		}
 		if (txr->res != NULL)
 			bus_release_resource(dev, SYS_RES_IRQ,
 			    rid, txr->res);
 
 		rxr = &adapter->rx_rings[i];
 		/* an early abort? */
 		if (rxr == NULL)
 			break;
 		rid = rxr->msix +1;
 		if (rxr->tag != NULL) {
 			bus_teardown_intr(dev, rxr->res, rxr->tag);
 			rxr->tag = NULL;
 		}
 		if (rxr->res != NULL)
 			bus_release_resource(dev, SYS_RES_IRQ,
 			    rid, rxr->res);
 	}
 
         if (adapter->linkvec) /* we are doing MSIX */
                 rid = adapter->linkvec + 1;
         else
                 (adapter->msix != 0) ? (rid = 1):(rid = 0);
 
 	if (adapter->tag != NULL) {
 		bus_teardown_intr(dev, adapter->res, adapter->tag);
 		adapter->tag = NULL;
 	}
 
 	if (adapter->res != NULL)
 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
 
 
 	if (adapter->msix)
 		pci_release_msi(dev);
 
 	if (adapter->msix_mem != NULL)
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
 
 	if (adapter->memory != NULL)
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    PCIR_BAR(0), adapter->memory);
 
 	if (adapter->flash != NULL)
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    EM_FLASH, adapter->flash);
 }
 
 /*
  * Setup MSI or MSI/X
  */
 static int
 em_setup_msix(struct adapter *adapter)
 {
 	device_t dev = adapter->dev;
 	int val;
 
 	/* Nearly always going to use one queue */
 	adapter->num_queues = 1;
 
 	/*
 	** Try using MSI-X for Hartwell adapters
 	*/
 	if ((adapter->hw.mac.type == e1000_82574) &&
 	    (em_enable_msix == TRUE)) {
 #ifdef EM_MULTIQUEUE
 		adapter->num_queues = (em_num_queues == 1) ? 1 : 2;
 		if (adapter->num_queues > 1)
 			em_enable_vectors_82574(adapter);
 #endif
 		/* Map the MSIX BAR */
 		int rid = PCIR_BAR(EM_MSIX_BAR);
 		adapter->msix_mem = bus_alloc_resource_any(dev,
 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
        		if (adapter->msix_mem == NULL) {
 			/* May not be enabled */
                		device_printf(adapter->dev,
 			    "Unable to map MSIX table \n");
 			goto msi;
        		}
 		val = pci_msix_count(dev); 
 
 #ifdef EM_MULTIQUEUE
 		/* We need 5 vectors in the multiqueue case */
 		if (adapter->num_queues > 1 ) {
 			if (val >= 5)
 				val = 5;
 			else {
 				adapter->num_queues = 1;
 				device_printf(adapter->dev,
 				    "Insufficient MSIX vectors for >1 queue, "
 				    "using single queue...\n");
 				goto msix_one;
 			}
 		} else {
 msix_one:
 #endif
 			if (val >= 3)
 				val = 3;
 			else {
 				device_printf(adapter->dev,
 			    	"Insufficient MSIX vectors, using MSI\n");
 				goto msi;
 			}
 #ifdef EM_MULTIQUEUE
 		}
 #endif
 
 		if ((pci_alloc_msix(dev, &val) == 0)) {
 			device_printf(adapter->dev,
 			    "Using MSIX interrupts "
 			    "with %d vectors\n", val);
 			return (val);
 		}
 
 		/*
 		** If MSIX alloc failed or provided us with
 		** less than needed, free and fall through to MSI
 		*/
 		pci_release_msi(dev);
 	}
 msi:
 	if (adapter->msix_mem != NULL) {
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    PCIR_BAR(EM_MSIX_BAR), adapter->msix_mem);
 		adapter->msix_mem = NULL;
 	}
        	val = 1;
        	if (pci_alloc_msi(dev, &val) == 0) {
                	device_printf(adapter->dev, "Using an MSI interrupt\n");
 		return (val);
 	} 
 	/* Should only happen due to manual configuration */
 	device_printf(adapter->dev,"No MSI/MSIX using a Legacy IRQ\n");
 	return (0);
 }
 
 
 /*********************************************************************
  *
  *  Initialize the hardware to a configuration
  *  as specified by the adapter structure.
  *
  **********************************************************************/
 static void
 em_reset(struct adapter *adapter)
 {
 	device_t	dev = adapter->dev;
 	if_t ifp = adapter->ifp;
 	struct e1000_hw	*hw = &adapter->hw;
 	u16		rx_buffer_size;
 	u32		pba;
 
 	INIT_DEBUGOUT("em_reset: begin");
 
 	/* Set up smart power down as default off on newer adapters. */
 	if (!em_smart_pwr_down && (hw->mac.type == e1000_82571 ||
 	    hw->mac.type == e1000_82572)) {
 		u16 phy_tmp = 0;
 
 		/* Speed up time to link by disabling smart power down. */
 		e1000_read_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, &phy_tmp);
 		phy_tmp &= ~IGP02E1000_PM_SPD;
 		e1000_write_phy_reg(hw, IGP02E1000_PHY_POWER_MGMT, phy_tmp);
 	}
 
 	/*
 	 * Packet Buffer Allocation (PBA)
 	 * Writing PBA sets the receive portion of the buffer
 	 * the remainder is used for the transmit buffer.
 	 */
 	switch (hw->mac.type) {
 	/* Total Packet Buffer on these is 48K */
 	case e1000_82571:
 	case e1000_82572:
 	case e1000_80003es2lan:
 			pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
 		break;
 	case e1000_82573: /* 82573: Total Packet Buffer is 32K */
 			pba = E1000_PBA_12K; /* 12K for Rx, 20K for Tx */
 		break;
 	case e1000_82574:
 	case e1000_82583:
 			pba = E1000_PBA_20K; /* 20K for Rx, 20K for Tx */
 		break;
 	case e1000_ich8lan:
 		pba = E1000_PBA_8K;
 		break;
 	case e1000_ich9lan:
 	case e1000_ich10lan:
 		/* Boost Receive side for jumbo frames */
 		if (adapter->hw.mac.max_frame_size > 4096)
 			pba = E1000_PBA_14K;
 		else
 			pba = E1000_PBA_10K;
 		break;
 	case e1000_pchlan:
 	case e1000_pch2lan:
 	case e1000_pch_lpt:
 		pba = E1000_PBA_26K;
 		break;
 	default:
 		if (adapter->hw.mac.max_frame_size > 8192)
 			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
 		else
 			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
 	}
 	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
 
 	/*
 	 * These parameters control the automatic generation (Tx) and
 	 * response (Rx) to Ethernet PAUSE frames.
 	 * - High water mark should allow for at least two frames to be
 	 *   received after sending an XOFF.
 	 * - Low water mark works best when it is very near the high water mark.
 	 *   This allows the receiver to restart by sending XON when it has
 	 *   drained a bit. Here we use an arbitary value of 1500 which will
 	 *   restart after one full frame is pulled from the buffer. There
 	 *   could be several smaller frames in the buffer and if so they will
 	 *   not trigger the XON until their total number reduces the buffer
 	 *   by 1500.
 	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
 	 */
 	rx_buffer_size = ((E1000_READ_REG(hw, E1000_PBA) & 0xffff) << 10 );
 	hw->fc.high_water = rx_buffer_size -
 	    roundup2(adapter->hw.mac.max_frame_size, 1024);
 	hw->fc.low_water = hw->fc.high_water - 1500;
 
 	if (adapter->fc) /* locally set flow control value? */
 		hw->fc.requested_mode = adapter->fc;
 	else
 		hw->fc.requested_mode = e1000_fc_full;
 
 	if (hw->mac.type == e1000_80003es2lan)
 		hw->fc.pause_time = 0xFFFF;
 	else
 		hw->fc.pause_time = EM_FC_PAUSE_TIME;
 
 	hw->fc.send_xon = TRUE;
 
 	/* Device specific overrides/settings */
 	switch (hw->mac.type) {
 	case e1000_pchlan:
 		/* Workaround: no TX flow ctrl for PCH */
                 hw->fc.requested_mode = e1000_fc_rx_pause;
 		hw->fc.pause_time = 0xFFFF; /* override */
 		if (if_getmtu(ifp) > ETHERMTU) {
 			hw->fc.high_water = 0x3500;
 			hw->fc.low_water = 0x1500;
 		} else {
 			hw->fc.high_water = 0x5000;
 			hw->fc.low_water = 0x3000;
 		}
 		hw->fc.refresh_time = 0x1000;
 		break;
 	case e1000_pch2lan:
 	case e1000_pch_lpt:
 		hw->fc.high_water = 0x5C20;
 		hw->fc.low_water = 0x5048;
 		hw->fc.pause_time = 0x0650;
 		hw->fc.refresh_time = 0x0400;
 		/* Jumbos need adjusted PBA */
 		if (if_getmtu(ifp) > ETHERMTU)
 			E1000_WRITE_REG(hw, E1000_PBA, 12);
 		else
 			E1000_WRITE_REG(hw, E1000_PBA, 26);
 		break;
         case e1000_ich9lan:
         case e1000_ich10lan:
 		if (if_getmtu(ifp) > ETHERMTU) {
 			hw->fc.high_water = 0x2800;
 			hw->fc.low_water = hw->fc.high_water - 8;
 			break;
 		} 
 		/* else fall thru */
 	default:
 		if (hw->mac.type == e1000_80003es2lan)
 			hw->fc.pause_time = 0xFFFF;
 		break;
 	}
 
 	/* Issue a global reset */
 	e1000_reset_hw(hw);
 	E1000_WRITE_REG(hw, E1000_WUC, 0);
 	em_disable_aspm(adapter);
 	/* and a re-init */
 	if (e1000_init_hw(hw) < 0) {
 		device_printf(dev, "Hardware Initialization Failed\n");
 		return;
 	}
 
 	E1000_WRITE_REG(hw, E1000_VET, ETHERTYPE_VLAN);
 	e1000_get_phy_info(hw);
 	e1000_check_for_link(hw);
 	return;
 }
 
 /*********************************************************************
  *
  *  Setup networking device structure and register an interface.
  *
  **********************************************************************/
 static int
 em_setup_interface(device_t dev, struct adapter *adapter)
 {
 	if_t ifp;
 
 	INIT_DEBUGOUT("em_setup_interface: begin");
 
 	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
 	if (ifp == 0) {
 		device_printf(dev, "can not allocate ifnet structure\n");
 		return (-1);
 	}
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	if_setdev(ifp, dev);
 	if_setinitfn(ifp, em_init);
 	if_setsoftc(ifp, adapter);
 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
 	if_setioctlfn(ifp, em_ioctl);
 	if_setgetcounterfn(ifp, em_get_counter);
 	/* TSO parameters */
 	ifp->if_hw_tsomax = IP_MAXPACKET;
 	ifp->if_hw_tsomaxsegcount = EM_MAX_SCATTER;
 	ifp->if_hw_tsomaxsegsize = EM_TSO_SEG_SIZE;
 
 #ifdef EM_MULTIQUEUE
 	/* Multiqueue stack interface */
 	if_settransmitfn(ifp, em_mq_start);
 	if_setqflushfn(ifp, em_qflush);
 #else
 	if_setstartfn(ifp, em_start);
 	if_setsendqlen(ifp, adapter->num_tx_desc - 1);
 	if_setsendqready(ifp);
 #endif	
 
 	ether_ifattach(ifp, adapter->hw.mac.addr);
 
 	if_setcapabilities(ifp, 0);
 	if_setcapenable(ifp, 0);
 
 
 	if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM |
 	    IFCAP_TSO4, 0);
 	/*
 	 * Tell the upper layer(s) we
 	 * support full VLAN capability
 	 */
 	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO |
 	    IFCAP_VLAN_MTU, 0);
 	if_setcapenable(ifp, if_getcapabilities(ifp));
 
 	/*
 	** Don't turn this on by default, if vlans are
 	** created on another pseudo device (eg. lagg)
 	** then vlan events are not passed thru, breaking
 	** operation, but with HW FILTER off it works. If
 	** using vlans directly on the em driver you can
 	** enable this and get full hardware tag filtering.
 	*/
 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER,0);
 
 #ifdef DEVICE_POLLING
 	if_setcapabilitiesbit(ifp, IFCAP_POLLING,0);
 #endif
 
 	/* Enable only WOL MAGIC by default */
 	if (adapter->wol) {
 		if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
 		if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
 	}
 		
 	/*
 	 * Specify the media types supported by this adapter and register
 	 * callbacks to update media and link information
 	 */
 	ifmedia_init(&adapter->media, IFM_IMASK,
 	    em_media_change, em_media_status);
 	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
 	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
 		u_char fiber_type = IFM_1000_SX;	/* default type */
 
 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX, 
 			    0, NULL);
 		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
 	} else {
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
 			    0, NULL);
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
 			    0, NULL);
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
 			    0, NULL);
 		if (adapter->hw.phy.type != e1000_phy_ife) {
 			ifmedia_add(&adapter->media,
 				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
 			ifmedia_add(&adapter->media,
 				IFM_ETHER | IFM_1000_T, 0, NULL);
 		}
 	}
 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
 	return (0);
 }
 
 
 /*
  * Manage DMA'able memory.
  */
 static void
 em_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
 {
 	if (error)
 		return;
 	*(bus_addr_t *) arg = segs[0].ds_addr;
 }
 
 static int
 em_dma_malloc(struct adapter *adapter, bus_size_t size,
         struct em_dma_alloc *dma, int mapflags)
 {
 	int error;
 
 	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
 				EM_DBA_ALIGN, 0,	/* alignment, bounds */
 				BUS_SPACE_MAXADDR,	/* lowaddr */
 				BUS_SPACE_MAXADDR,	/* highaddr */
 				NULL, NULL,		/* filter, filterarg */
 				size,			/* maxsize */
 				1,			/* nsegments */
 				size,			/* maxsegsize */
 				0,			/* flags */
 				NULL,			/* lockfunc */
 				NULL,			/* lockarg */
 				&dma->dma_tag);
 	if (error) {
 		device_printf(adapter->dev,
 		    "%s: bus_dma_tag_create failed: %d\n",
 		    __func__, error);
 		goto fail_0;
 	}
 
 	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
 	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
 	if (error) {
 		device_printf(adapter->dev,
 		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
 		    __func__, (uintmax_t)size, error);
 		goto fail_2;
 	}
 
 	dma->dma_paddr = 0;
 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
 	    size, em_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
 	if (error || dma->dma_paddr == 0) {
 		device_printf(adapter->dev,
 		    "%s: bus_dmamap_load failed: %d\n",
 		    __func__, error);
 		goto fail_3;
 	}
 
 	return (0);
 
 fail_3:
 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
 fail_2:
 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
 	bus_dma_tag_destroy(dma->dma_tag);
 fail_0:
 	dma->dma_tag = NULL;
 
 	return (error);
 }
 
 static void
 em_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
 {
 	if (dma->dma_tag == NULL)
 		return;
 	if (dma->dma_paddr != 0) {
 		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
 		dma->dma_paddr = 0;
 	}
 	if (dma->dma_vaddr != NULL) {
 		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
 		dma->dma_vaddr = NULL;
 	}
 	bus_dma_tag_destroy(dma->dma_tag);
 	dma->dma_tag = NULL;
 }
 
 
 /*********************************************************************
  *
  *  Allocate memory for the transmit and receive rings, and then
  *  the descriptors associated with each, called only once at attach.
  *
  **********************************************************************/
 static int
 em_allocate_queues(struct adapter *adapter)
 {
 	device_t		dev = adapter->dev;
 	struct tx_ring		*txr = NULL;
 	struct rx_ring		*rxr = NULL;
 	int rsize, tsize, error = E1000_SUCCESS;
 	int txconf = 0, rxconf = 0;
 
 
 	/* Allocate the TX ring struct memory */
 	if (!(adapter->tx_rings =
 	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate TX ring memory\n");
 		error = ENOMEM;
 		goto fail;
 	}
 
 	/* Now allocate the RX */
 	if (!(adapter->rx_rings =
 	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate RX ring memory\n");
 		error = ENOMEM;
 		goto rx_fail;
 	}
 
 	tsize = roundup2(adapter->num_tx_desc *
 	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
 	/*
 	 * Now set up the TX queues, txconf is needed to handle the
 	 * possibility that things fail midcourse and we need to
 	 * undo memory gracefully
 	 */ 
 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
 		/* Set up some basics */
 		txr = &adapter->tx_rings[i];
 		txr->adapter = adapter;
 		txr->me = i;
 
 		/* Initialize the TX lock */
 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
 		    device_get_nameunit(dev), txr->me);
 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
 
 		if (em_dma_malloc(adapter, tsize,
 			&txr->txdma, BUS_DMA_NOWAIT)) {
 			device_printf(dev,
 			    "Unable to allocate TX Descriptor memory\n");
 			error = ENOMEM;
 			goto err_tx_desc;
 		}
 		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
 		bzero((void *)txr->tx_base, tsize);
 
         	if (em_allocate_transmit_buffers(txr)) {
 			device_printf(dev,
 			    "Critical Failure setting up transmit buffers\n");
 			error = ENOMEM;
 			goto err_tx_desc;
         	}
 #if __FreeBSD_version >= 800000
 		/* Allocate a buf ring */
 		txr->br = buf_ring_alloc(4096, M_DEVBUF,
 		    M_WAITOK, &txr->tx_mtx);
 #endif
 	}
 
 	/*
 	 * Next the RX queues...
 	 */ 
 	rsize = roundup2(adapter->num_rx_desc *
-	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
+	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
 		rxr = &adapter->rx_rings[i];
 		rxr->adapter = adapter;
 		rxr->me = i;
 
 		/* Initialize the RX lock */
 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
 		    device_get_nameunit(dev), txr->me);
 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
 
 		if (em_dma_malloc(adapter, rsize,
 			&rxr->rxdma, BUS_DMA_NOWAIT)) {
 			device_printf(dev,
 			    "Unable to allocate RxDescriptor memory\n");
 			error = ENOMEM;
 			goto err_rx_desc;
 		}
-		rxr->rx_base = (struct e1000_rx_desc *)rxr->rxdma.dma_vaddr;
+		rxr->rx_base = (union e1000_rx_desc_extended *)rxr->rxdma.dma_vaddr;
 		bzero((void *)rxr->rx_base, rsize);
 
         	/* Allocate receive buffers for the ring*/
 		if (em_allocate_receive_buffers(rxr)) {
 			device_printf(dev,
 			    "Critical Failure setting up receive buffers\n");
 			error = ENOMEM;
 			goto err_rx_desc;
 		}
 	}
 
 	return (0);
 
 err_rx_desc:
 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
 		em_dma_free(adapter, &rxr->rxdma);
 err_tx_desc:
 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
 		em_dma_free(adapter, &txr->txdma);
 	free(adapter->rx_rings, M_DEVBUF);
 rx_fail:
 #if __FreeBSD_version >= 800000
 	buf_ring_free(txr->br, M_DEVBUF);
 #endif
 	free(adapter->tx_rings, M_DEVBUF);
 fail:
 	return (error);
 }
 
 
 /*********************************************************************
  *
  *  Allocate memory for tx_buffer structures. The tx_buffer stores all
  *  the information needed to transmit a packet on the wire. This is
  *  called only once at attach, setup is done every reset.
  *
  **********************************************************************/
 static int
 em_allocate_transmit_buffers(struct tx_ring *txr)
 {
 	struct adapter *adapter = txr->adapter;
 	device_t dev = adapter->dev;
-	struct em_buffer *txbuf;
+	struct em_txbuffer *txbuf;
 	int error, i;
 
 	/*
 	 * Setup DMA descriptor areas.
 	 */
 	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
 			       1, 0,			/* alignment, bounds */
 			       BUS_SPACE_MAXADDR,	/* lowaddr */
 			       BUS_SPACE_MAXADDR,	/* highaddr */
 			       NULL, NULL,		/* filter, filterarg */
 			       EM_TSO_SIZE,		/* maxsize */
 			       EM_MAX_SCATTER,		/* nsegments */
 			       PAGE_SIZE,		/* maxsegsize */
 			       0,			/* flags */
 			       NULL,			/* lockfunc */
 			       NULL,			/* lockfuncarg */
 			       &txr->txtag))) {
 		device_printf(dev,"Unable to allocate TX DMA tag\n");
 		goto fail;
 	}
 
 	if (!(txr->tx_buffers =
-	    (struct em_buffer *) malloc(sizeof(struct em_buffer) *
+	    (struct em_txbuffer *) malloc(sizeof(struct em_txbuffer) *
 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
 		error = ENOMEM;
 		goto fail;
 	}
 
         /* Create the descriptor buffer dma maps */
 	txbuf = txr->tx_buffers;
 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
 		if (error != 0) {
 			device_printf(dev, "Unable to create TX DMA map\n");
 			goto fail;
 		}
 	}
 
 	return 0;
 fail:
 	/* We free all, it handles case where we are in the middle */
 	em_free_transmit_structures(adapter);
 	return (error);
 }
 
 /*********************************************************************
  *
  *  Initialize a transmit ring.
  *
  **********************************************************************/
 static void
 em_setup_transmit_ring(struct tx_ring *txr)
 {
 	struct adapter *adapter = txr->adapter;
-	struct em_buffer *txbuf;
+	struct em_txbuffer *txbuf;
 	int i;
 #ifdef DEV_NETMAP
 	struct netmap_slot *slot;
 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
 #endif /* DEV_NETMAP */
 
 	/* Clear the old descriptor contents */
 	EM_TX_LOCK(txr);
 #ifdef DEV_NETMAP
 	slot = netmap_reset(na, NR_TX, txr->me, 0);
 #endif /* DEV_NETMAP */
 
 	bzero((void *)txr->tx_base,
 	      (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
 	/* Reset indices */
 	txr->next_avail_desc = 0;
 	txr->next_to_clean = 0;
 
 	/* Free any existing tx buffers. */
         txbuf = txr->tx_buffers;
 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
 		if (txbuf->m_head != NULL) {
 			bus_dmamap_sync(txr->txtag, txbuf->map,
 			    BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(txr->txtag, txbuf->map);
 			m_freem(txbuf->m_head);
 			txbuf->m_head = NULL;
 		}
 #ifdef DEV_NETMAP
 		if (slot) {
 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
 			uint64_t paddr;
 			void *addr;
 
 			addr = PNMB(na, slot + si, &paddr);
 			txr->tx_base[i].buffer_addr = htole64(paddr);
 			/* reload the map for netmap mode */
 			netmap_load_map(na, txr->txtag, txbuf->map, addr);
 		}
 #endif /* DEV_NETMAP */
 
 		/* clear the watch index */
 		txbuf->next_eop = -1;
         }
 
 	/* Set number of descriptors available */
 	txr->tx_avail = adapter->num_tx_desc;
 	txr->busy = EM_TX_IDLE;
 
 	/* Clear checksum offload context. */
 	txr->last_hw_offload = 0;
 	txr->last_hw_ipcss = 0;
 	txr->last_hw_ipcso = 0;
 	txr->last_hw_tucss = 0;
 	txr->last_hw_tucso = 0;
 
 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	EM_TX_UNLOCK(txr);
 }
 
 /*********************************************************************
  *
  *  Initialize all transmit rings.
  *
  **********************************************************************/
 static void
 em_setup_transmit_structures(struct adapter *adapter)
 {
 	struct tx_ring *txr = adapter->tx_rings;
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++)
 		em_setup_transmit_ring(txr);
 
 	return;
 }
 
 /*********************************************************************
  *
  *  Enable transmit unit.
  *
  **********************************************************************/
 static void
 em_initialize_transmit_unit(struct adapter *adapter)
 {
 	struct tx_ring	*txr = adapter->tx_rings;
 	struct e1000_hw	*hw = &adapter->hw;
 	u32	tctl, txdctl = 0, tarc, tipg = 0;
 
 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
 		u64 bus_addr = txr->txdma.dma_paddr;
 		/* Base and Len of TX Ring */
 		E1000_WRITE_REG(hw, E1000_TDLEN(i),
 	    	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
 		E1000_WRITE_REG(hw, E1000_TDBAH(i),
 	    	    (u32)(bus_addr >> 32));
 		E1000_WRITE_REG(hw, E1000_TDBAL(i),
 	    	    (u32)bus_addr);
 		/* Init the HEAD/TAIL indices */
 		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
 		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
 
 		HW_DEBUGOUT2("Base = %x, Length = %x\n",
 		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
 
 		txr->busy = EM_TX_IDLE;
 		txdctl = 0; /* clear txdctl */
                 txdctl |= 0x1f; /* PTHRESH */
                 txdctl |= 1 << 8; /* HTHRESH */
                 txdctl |= 1 << 16;/* WTHRESH */
 		txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */
 		txdctl |= E1000_TXDCTL_GRAN;
                 txdctl |= 1 << 25; /* LWTHRESH */
 
                 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
 	}
 
 	/* Set the default values for the Tx Inter Packet Gap timer */
 	switch (adapter->hw.mac.type) {
 	case e1000_80003es2lan:
 		tipg = DEFAULT_82543_TIPG_IPGR1;
 		tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 <<
 		    E1000_TIPG_IPGR2_SHIFT;
 		break;
 	default:
 		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
 		    (adapter->hw.phy.media_type ==
 		    e1000_media_type_internal_serdes))
 			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
 		else
 			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
 		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
 		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
 	}
 
 	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
 	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
 
 	if(adapter->hw.mac.type >= e1000_82540)
 		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
 		    adapter->tx_abs_int_delay.value);
 
 	if ((adapter->hw.mac.type == e1000_82571) ||
 	    (adapter->hw.mac.type == e1000_82572)) {
 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
 		tarc |= TARC_SPEED_MODE_BIT;
 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
 	} else if (adapter->hw.mac.type == e1000_80003es2lan) {
 		/* errata: program both queues to unweighted RR */
 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
 		tarc |= 1;
 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1));
 		tarc |= 1;
 		E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
 	} else if (adapter->hw.mac.type == e1000_82574) {
 		tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0));
 		tarc |= TARC_ERRATA_BIT;
 		if ( adapter->num_queues > 1) {
 			tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX);
 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc);
 		} else
 			E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc);
 	}
 
 	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
 	if (adapter->tx_int_delay.value > 0)
 		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
 
 	/* Program the Transmit Control Register */
 	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
 	tctl &= ~E1000_TCTL_CT;
 	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
 		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
 
 	if (adapter->hw.mac.type >= e1000_82571)
 		tctl |= E1000_TCTL_MULR;
 
 	/* This write will effectively turn on the transmit unit. */
 	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
 
 }
 
 
 /*********************************************************************
  *
  *  Free all transmit rings.
  *
  **********************************************************************/
 static void
 em_free_transmit_structures(struct adapter *adapter)
 {
 	struct tx_ring *txr = adapter->tx_rings;
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
 		EM_TX_LOCK(txr);
 		em_free_transmit_buffers(txr);
 		em_dma_free(adapter, &txr->txdma);
 		EM_TX_UNLOCK(txr);
 		EM_TX_LOCK_DESTROY(txr);
 	}
 
 	free(adapter->tx_rings, M_DEVBUF);
 }
 
 /*********************************************************************
  *
  *  Free transmit ring related data structures.
  *
  **********************************************************************/
 static void
 em_free_transmit_buffers(struct tx_ring *txr)
 {
 	struct adapter		*adapter = txr->adapter;
-	struct em_buffer	*txbuf;
+	struct em_txbuffer	*txbuf;
 
 	INIT_DEBUGOUT("free_transmit_ring: begin");
 
 	if (txr->tx_buffers == NULL)
 		return;
 
 	for (int i = 0; i < adapter->num_tx_desc; i++) {
 		txbuf = &txr->tx_buffers[i];
 		if (txbuf->m_head != NULL) {
 			bus_dmamap_sync(txr->txtag, txbuf->map,
 			    BUS_DMASYNC_POSTWRITE);
 			bus_dmamap_unload(txr->txtag,
 			    txbuf->map);
 			m_freem(txbuf->m_head);
 			txbuf->m_head = NULL;
 			if (txbuf->map != NULL) {
 				bus_dmamap_destroy(txr->txtag,
 				    txbuf->map);
 				txbuf->map = NULL;
 			}
 		} else if (txbuf->map != NULL) {
 			bus_dmamap_unload(txr->txtag,
 			    txbuf->map);
 			bus_dmamap_destroy(txr->txtag,
 			    txbuf->map);
 			txbuf->map = NULL;
 		}
 	}
 #if __FreeBSD_version >= 800000
 	if (txr->br != NULL)
 		buf_ring_free(txr->br, M_DEVBUF);
 #endif
 	if (txr->tx_buffers != NULL) {
 		free(txr->tx_buffers, M_DEVBUF);
 		txr->tx_buffers = NULL;
 	}
 	if (txr->txtag != NULL) {
 		bus_dma_tag_destroy(txr->txtag);
 		txr->txtag = NULL;
 	}
 	return;
 }
 
 
 /*********************************************************************
  *  The offload context is protocol specific (TCP/UDP) and thus
  *  only needs to be set when the protocol changes. The occasion
  *  of a context change can be a performance detriment, and
  *  might be better just disabled. The reason arises in the way
  *  in which the controller supports pipelined requests from the
  *  Tx data DMA. Up to four requests can be pipelined, and they may
  *  belong to the same packet or to multiple packets. However all
  *  requests for one packet are issued before a request is issued
  *  for a subsequent packet and if a request for the next packet
  *  requires a context change, that request will be stalled
  *  until the previous request completes. This means setting up
  *  a new context effectively disables pipelined Tx data DMA which
  *  in turn greatly slow down performance to send small sized
  *  frames. 
  **********************************************************************/
 static void
 em_transmit_checksum_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
     struct ip *ip, u32 *txd_upper, u32 *txd_lower)
 {
 	struct adapter			*adapter = txr->adapter;
 	struct e1000_context_desc	*TXD = NULL;
-	struct em_buffer		*tx_buffer;
+	struct em_txbuffer		*tx_buffer;
 	int				cur, hdr_len;
 	u32				cmd = 0;
 	u16				offload = 0;
 	u8				ipcso, ipcss, tucso, tucss;
 
 	ipcss = ipcso = tucss = tucso = 0;
 	hdr_len = ip_off + (ip->ip_hl << 2);
 	cur = txr->next_avail_desc;
 
 	/* Setup of IP header checksum. */
 	if (mp->m_pkthdr.csum_flags & CSUM_IP) {
 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
 		offload |= CSUM_IP;
 		ipcss = ip_off;
 		ipcso = ip_off + offsetof(struct ip, ip_sum);
 		/*
 		 * Start offset for header checksum calculation.
 		 * End offset for header checksum calculation.
 		 * Offset of place to put the checksum.
 		 */
 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
 		TXD->lower_setup.ip_fields.ipcss = ipcss;
 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
 		TXD->lower_setup.ip_fields.ipcso = ipcso;
 		cmd |= E1000_TXD_CMD_IP;
 	}
 
 	if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
  		offload |= CSUM_TCP;
  		tucss = hdr_len;
  		tucso = hdr_len + offsetof(struct tcphdr, th_sum);
+		/*
+		 * The 82574L can only remember the *last* context used
+		 * regardless of queue that it was use for.  We cannot reuse
+		 * contexts on this hardware platform and must generate a new
+		 * context every time.  82574L hardware spec, section 7.2.6,
+		 * second note.
+		 */
+		if (adapter->num_queues < 2) {
+ 			/*
+ 		 	* Setting up new checksum offload context for every
+			* frames takes a lot of processing time for hardware.
+			* This also reduces performance a lot for small sized
+			* frames so avoid it if driver can use previously
+			* configured checksum offload context.
+ 		 	*/
+ 			if (txr->last_hw_offload == offload) {
+ 				if (offload & CSUM_IP) {
+ 					if (txr->last_hw_ipcss == ipcss &&
+ 				    	txr->last_hw_ipcso == ipcso &&
+ 				    	txr->last_hw_tucss == tucss &&
+ 				    	txr->last_hw_tucso == tucso)
+ 						return;
+ 				} else {
+ 					if (txr->last_hw_tucss == tucss &&
+ 				    	txr->last_hw_tucso == tucso)
+ 						return;
+ 				}
+  			}
+ 			txr->last_hw_offload = offload;
+ 			txr->last_hw_tucss = tucss;
+ 			txr->last_hw_tucso = tucso;
+		}
  		/*
- 		 * Setting up new checksum offload context for every frames
- 		 * takes a lot of processing time for hardware. This also
- 		 * reduces performance a lot for small sized frames so avoid
- 		 * it if driver can use previously configured checksum
- 		 * offload context.
- 		 */
- 		if (txr->last_hw_offload == offload) {
- 			if (offload & CSUM_IP) {
- 				if (txr->last_hw_ipcss == ipcss &&
- 				    txr->last_hw_ipcso == ipcso &&
- 				    txr->last_hw_tucss == tucss &&
- 				    txr->last_hw_tucso == tucso)
- 					return;
- 			} else {
- 				if (txr->last_hw_tucss == tucss &&
- 				    txr->last_hw_tucso == tucso)
- 					return;
- 			}
-  		}
- 		txr->last_hw_offload = offload;
- 		txr->last_hw_tucss = tucss;
- 		txr->last_hw_tucso = tucso;
- 		/*
  		 * Start offset for payload checksum calculation.
  		 * End offset for payload checksum calculation.
  		 * Offset of place to put the checksum.
  		 */
 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
  		TXD->upper_setup.tcp_fields.tucss = hdr_len;
  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
  		TXD->upper_setup.tcp_fields.tucso = tucso;
  		cmd |= E1000_TXD_CMD_TCP;
  	} else if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
  		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
  		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
  		tucss = hdr_len;
  		tucso = hdr_len + offsetof(struct udphdr, uh_sum);
- 		/*
- 		 * Setting up new checksum offload context for every frames
- 		 * takes a lot of processing time for hardware. This also
- 		 * reduces performance a lot for small sized frames so avoid
- 		 * it if driver can use previously configured checksum
- 		 * offload context.
- 		 */
- 		if (txr->last_hw_offload == offload) {
- 			if (offload & CSUM_IP) {
- 				if (txr->last_hw_ipcss == ipcss &&
- 				    txr->last_hw_ipcso == ipcso &&
- 				    txr->last_hw_tucss == tucss &&
- 				    txr->last_hw_tucso == tucso)
- 					return;
- 			} else {
- 				if (txr->last_hw_tucss == tucss &&
- 				    txr->last_hw_tucso == tucso)
- 					return;
+		/*
+		 * The 82574L can only remember the *last* context used
+		 * regardless of queue that it was use for.  We cannot reuse
+		 * contexts on this hardware platform and must generate a new
+		 * context every time.  82574L hardware spec, section 7.2.6,
+		 * second note.
+		 */
+		if (adapter->num_queues < 2) {
+ 			/*
+ 		 	* Setting up new checksum offload context for every
+			* frames takes a lot of processing time for hardware.
+			* This also reduces performance a lot for small sized
+			* frames so avoid it if driver can use previously
+			* configured checksum offload context.
+ 		 	*/
+ 			if (txr->last_hw_offload == offload) {
+ 				if (offload & CSUM_IP) {
+ 					if (txr->last_hw_ipcss == ipcss &&
+ 				    	txr->last_hw_ipcso == ipcso &&
+ 				    	txr->last_hw_tucss == tucss &&
+ 				    	txr->last_hw_tucso == tucso)
+ 						return;
+ 				} else {
+ 					if (txr->last_hw_tucss == tucss &&
+ 				    	txr->last_hw_tucso == tucso)
+ 						return;
+ 				}
  			}
- 		}
- 		txr->last_hw_offload = offload;
- 		txr->last_hw_tucss = tucss;
- 		txr->last_hw_tucso = tucso;
+ 			txr->last_hw_offload = offload;
+ 			txr->last_hw_tucss = tucss;
+ 			txr->last_hw_tucso = tucso;
+		}
  		/*
  		 * Start offset for header checksum calculation.
  		 * End offset for header checksum calculation.
  		 * Offset of place to put the checksum.
  		 */
 		TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
  		TXD->upper_setup.tcp_fields.tucss = tucss;
  		TXD->upper_setup.tcp_fields.tucse = htole16(0);
  		TXD->upper_setup.tcp_fields.tucso = tucso;
   	}
   
  	if (offload & CSUM_IP) {
  		txr->last_hw_ipcss = ipcss;
  		txr->last_hw_ipcso = ipcso;
   	}
 
 	TXD->tcp_seg_setup.data = htole32(0);
 	TXD->cmd_and_length =
 	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
 	tx_buffer = &txr->tx_buffers[cur];
 	tx_buffer->m_head = NULL;
 	tx_buffer->next_eop = -1;
 
 	if (++cur == adapter->num_tx_desc)
 		cur = 0;
 
 	txr->tx_avail--;
 	txr->next_avail_desc = cur;
 }
 
 
 /**********************************************************************
  *
  *  Setup work for hardware segmentation offload (TSO)
  *
  **********************************************************************/
 static void
 em_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ip_off,
     struct ip *ip, struct tcphdr *tp, u32 *txd_upper, u32 *txd_lower)
 {
 	struct adapter			*adapter = txr->adapter;
 	struct e1000_context_desc	*TXD;
-	struct em_buffer		*tx_buffer;
+	struct em_txbuffer		*tx_buffer;
 	int cur, hdr_len;
 
 	/*
 	 * In theory we can use the same TSO context if and only if
 	 * frame is the same type(IP/TCP) and the same MSS. However
 	 * checking whether a frame has the same IP/TCP structure is
 	 * hard thing so just ignore that and always restablish a
 	 * new TSO context.
 	 */
 	hdr_len = ip_off + (ip->ip_hl << 2) + (tp->th_off << 2);
 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
 		      E1000_TXD_DTYP_D |	/* Data descr type */
 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
 
 	/* IP and/or TCP header checksum calculation and insertion. */
 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
 
 	cur = txr->next_avail_desc;
 	tx_buffer = &txr->tx_buffers[cur];
 	TXD = (struct e1000_context_desc *) &txr->tx_base[cur];
 
 	/*
 	 * Start offset for header checksum calculation.
 	 * End offset for header checksum calculation.
 	 * Offset of place put the checksum.
 	 */
 	TXD->lower_setup.ip_fields.ipcss = ip_off;
 	TXD->lower_setup.ip_fields.ipcse =
 	    htole16(ip_off + (ip->ip_hl << 2) - 1);
 	TXD->lower_setup.ip_fields.ipcso = ip_off + offsetof(struct ip, ip_sum);
 	/*
 	 * Start offset for payload checksum calculation.
 	 * End offset for payload checksum calculation.
 	 * Offset of place to put the checksum.
 	 */
 	TXD->upper_setup.tcp_fields.tucss = ip_off + (ip->ip_hl << 2);
 	TXD->upper_setup.tcp_fields.tucse = 0;
 	TXD->upper_setup.tcp_fields.tucso =
 	    ip_off + (ip->ip_hl << 2) + offsetof(struct tcphdr, th_sum);
 	/*
 	 * Payload size per packet w/o any headers.
 	 * Length of all headers up to payload.
 	 */
 	TXD->tcp_seg_setup.fields.mss = htole16(mp->m_pkthdr.tso_segsz);
 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
 
 	TXD->cmd_and_length = htole32(adapter->txd_cmd |
 				E1000_TXD_CMD_DEXT |	/* Extended descr */
 				E1000_TXD_CMD_TSE |	/* TSE context */
 				E1000_TXD_CMD_IP |	/* Do IP csum */
 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
 				(mp->m_pkthdr.len - (hdr_len))); /* Total len */
 
 	tx_buffer->m_head = NULL;
 	tx_buffer->next_eop = -1;
 
 	if (++cur == adapter->num_tx_desc)
 		cur = 0;
 
 	txr->tx_avail--;
 	txr->next_avail_desc = cur;
 	txr->tx_tso = TRUE;
 }
 
 
 /**********************************************************************
  *
  *  Examine each tx_buffer in the used queue. If the hardware is done
  *  processing the packet then free associated resources. The
  *  tx_buffer is put back on the free queue.
  *
  **********************************************************************/
 static void
 em_txeof(struct tx_ring *txr)
 {
 	struct adapter	*adapter = txr->adapter;
         int first, last, done, processed;
-        struct em_buffer *tx_buffer;
+        struct em_txbuffer *tx_buffer;
         struct e1000_tx_desc   *tx_desc, *eop_desc;
 	if_t ifp = adapter->ifp;
 
 	EM_TX_LOCK_ASSERT(txr);
 #ifdef DEV_NETMAP
 	if (netmap_tx_irq(ifp, txr->me))
 		return;
 #endif /* DEV_NETMAP */
 
 	/* No work, make sure hang detection is disabled */
         if (txr->tx_avail == adapter->num_tx_desc) {
 		txr->busy = EM_TX_IDLE;
                 return;
 	}
 
 	processed = 0;
         first = txr->next_to_clean;
         tx_desc = &txr->tx_base[first];
         tx_buffer = &txr->tx_buffers[first];
 	last = tx_buffer->next_eop;
         eop_desc = &txr->tx_base[last];
 
 	/*
 	 * What this does is get the index of the
 	 * first descriptor AFTER the EOP of the 
 	 * first packet, that way we can do the
 	 * simple comparison on the inner while loop.
 	 */
 	if (++last == adapter->num_tx_desc)
  		last = 0;
 	done = last;
 
         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
             BUS_DMASYNC_POSTREAD);
 
         while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
 		/* We clean the range of the packet */
 		while (first != done) {
                 	tx_desc->upper.data = 0;
                 	tx_desc->lower.data = 0;
                 	tx_desc->buffer_addr = 0;
                 	++txr->tx_avail;
 			++processed;
 
 			if (tx_buffer->m_head) {
 				bus_dmamap_sync(txr->txtag,
 				    tx_buffer->map,
 				    BUS_DMASYNC_POSTWRITE);
 				bus_dmamap_unload(txr->txtag,
 				    tx_buffer->map);
                         	m_freem(tx_buffer->m_head);
                         	tx_buffer->m_head = NULL;
                 	}
 			tx_buffer->next_eop = -1;
 
 	                if (++first == adapter->num_tx_desc)
 				first = 0;
 
 	                tx_buffer = &txr->tx_buffers[first];
 			tx_desc = &txr->tx_base[first];
 		}
 		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
 		/* See if we can continue to the next packet */
 		last = tx_buffer->next_eop;
 		if (last != -1) {
         		eop_desc = &txr->tx_base[last];
 			/* Get new done point */
 			if (++last == adapter->num_tx_desc) last = 0;
 			done = last;
 		} else
 			break;
         }
         bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
             BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
         txr->next_to_clean = first;
 
 	/*
 	** Hang detection: we know there's work outstanding
 	** or the entry return would have been taken, so no
 	** descriptor processed here indicates a potential hang.
 	** The local timer will examine this and do a reset if needed.
 	*/
 	if (processed == 0) {
 		if (txr->busy != EM_TX_HUNG)
 			++txr->busy;
 	} else /* At least one descriptor was cleaned */
 		txr->busy = EM_TX_BUSY; /* note this clears HUNG */
 
         /*
          * If we have a minimum free, clear IFF_DRV_OACTIVE
          * to tell the stack that it is OK to send packets.
 	 * Notice that all writes of OACTIVE happen under the
 	 * TX lock which, with a single queue, guarantees 
 	 * sanity.
          */
         if (txr->tx_avail >= EM_MAX_SCATTER) {
 		if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
 	}
 
 	/* Disable hang detection if all clean */
 	if (txr->tx_avail == adapter->num_tx_desc)
 		txr->busy = EM_TX_IDLE;
 }
 
-
 /*********************************************************************
  *
  *  Refresh RX descriptor mbufs from system mbuf buffer pool.
  *
  **********************************************************************/
 static void
 em_refresh_mbufs(struct rx_ring *rxr, int limit)
 {
 	struct adapter		*adapter = rxr->adapter;
 	struct mbuf		*m;
-	bus_dma_segment_t	segs[1];
-	struct em_buffer	*rxbuf;
+	bus_dma_segment_t	segs;
+	struct em_rxbuffer	*rxbuf;
 	int			i, j, error, nsegs;
 	bool			cleaned = FALSE;
 
 	i = j = rxr->next_to_refresh;
 	/*
 	** Get one descriptor beyond
 	** our work mark to control
 	** the loop.
 	*/
 	if (++j == adapter->num_rx_desc)
 		j = 0;
 
 	while (j != limit) {
 		rxbuf = &rxr->rx_buffers[i];
 		if (rxbuf->m_head == NULL) {
 			m = m_getjcl(M_NOWAIT, MT_DATA,
 			    M_PKTHDR, adapter->rx_mbuf_sz);
 			/*
 			** If we have a temporary resource shortage
 			** that causes a failure, just abort refresh
 			** for now, we will return to this point when
 			** reinvoked from em_rxeof.
 			*/
 			if (m == NULL)
 				goto update;
 		} else
 			m = rxbuf->m_head;
 
 		m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz;
 		m->m_flags |= M_PKTHDR;
 		m->m_data = m->m_ext.ext_buf;
 
 		/* Use bus_dma machinery to setup the memory mapping  */
 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map,
-		    m, segs, &nsegs, BUS_DMA_NOWAIT);
+		    m, &segs, &nsegs, BUS_DMA_NOWAIT);
 		if (error != 0) {
 			printf("Refresh mbufs: hdr dmamap load"
 			    " failure - %d\n", error);
 			m_free(m);
 			rxbuf->m_head = NULL;
 			goto update;
 		}
 		rxbuf->m_head = m;
+		rxbuf->paddr = segs.ds_addr;
 		bus_dmamap_sync(rxr->rxtag,
 		    rxbuf->map, BUS_DMASYNC_PREREAD);
-		rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr);
+		em_setup_rxdesc(&rxr->rx_base[i], rxbuf);
 		cleaned = TRUE;
 
 		i = j; /* Next is precalulated for us */
 		rxr->next_to_refresh = i;
 		/* Calculate next controlling index */
 		if (++j == adapter->num_rx_desc)
 			j = 0;
 	}
 update:
 	/*
 	** Update the tail pointer only if,
 	** and as far as we have refreshed.
 	*/
 	if (cleaned)
 		E1000_WRITE_REG(&adapter->hw,
 		    E1000_RDT(rxr->me), rxr->next_to_refresh);
 
 	return;
 }
 
 
 /*********************************************************************
  *
  *  Allocate memory for rx_buffer structures. Since we use one
  *  rx_buffer per received packet, the maximum number of rx_buffer's
  *  that we'll need is equal to the number of receive descriptors
  *  that we've allocated.
  *
  **********************************************************************/
 static int
 em_allocate_receive_buffers(struct rx_ring *rxr)
 {
 	struct adapter		*adapter = rxr->adapter;
 	device_t		dev = adapter->dev;
-	struct em_buffer	*rxbuf;
+	struct em_rxbuffer	*rxbuf;
 	int			error;
 
-	rxr->rx_buffers = malloc(sizeof(struct em_buffer) *
+	rxr->rx_buffers = malloc(sizeof(struct em_rxbuffer) *
 	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (rxr->rx_buffers == NULL) {
 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
 		return (ENOMEM);
 	}
 
 	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
 				1, 0,			/* alignment, bounds */
 				BUS_SPACE_MAXADDR,	/* lowaddr */
 				BUS_SPACE_MAXADDR,	/* highaddr */
 				NULL, NULL,		/* filter, filterarg */
 				MJUM9BYTES,		/* maxsize */
 				1,			/* nsegments */
 				MJUM9BYTES,		/* maxsegsize */
 				0,			/* flags */
 				NULL,			/* lockfunc */
 				NULL,			/* lockarg */
 				&rxr->rxtag);
 	if (error) {
 		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
 		    __func__, error);
 		goto fail;
 	}
 
 	rxbuf = rxr->rx_buffers;
 	for (int i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
 		rxbuf = &rxr->rx_buffers[i];
 		error = bus_dmamap_create(rxr->rxtag, 0, &rxbuf->map);
 		if (error) {
 			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
 			    __func__, error);
 			goto fail;
 		}
 	}
 
 	return (0);
 
 fail:
 	em_free_receive_structures(adapter);
 	return (error);
 }
 
 
 /*********************************************************************
  *
  *  Initialize a receive ring and its buffers.
  *
  **********************************************************************/
 static int
 em_setup_receive_ring(struct rx_ring *rxr)
 {
 	struct	adapter 	*adapter = rxr->adapter;
-	struct em_buffer	*rxbuf;
+	struct em_rxbuffer	*rxbuf;
 	bus_dma_segment_t	seg[1];
 	int			rsize, nsegs, error = 0;
 #ifdef DEV_NETMAP
 	struct netmap_slot *slot;
 	struct netmap_adapter *na = netmap_getna(adapter->ifp);
 #endif
 
 
 	/* Clear the ring contents */
 	EM_RX_LOCK(rxr);
 	rsize = roundup2(adapter->num_rx_desc *
-	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
+	    sizeof(union e1000_rx_desc_extended), EM_DBA_ALIGN);
 	bzero((void *)rxr->rx_base, rsize);
 #ifdef DEV_NETMAP
 	slot = netmap_reset(na, NR_RX, rxr->me, 0);
 #endif
 
 	/*
 	** Free current RX buffer structs and their mbufs
 	*/
 	for (int i = 0; i < adapter->num_rx_desc; i++) {
 		rxbuf = &rxr->rx_buffers[i];
 		if (rxbuf->m_head != NULL) {
 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
 			    BUS_DMASYNC_POSTREAD);
 			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
 			m_freem(rxbuf->m_head);
 			rxbuf->m_head = NULL; /* mark as freed */
 		}
 	}
 
 	/* Now replenish the mbufs */
         for (int j = 0; j != adapter->num_rx_desc; ++j) {
 		rxbuf = &rxr->rx_buffers[j];
 #ifdef DEV_NETMAP
 		if (slot) {
 			int si = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
 			uint64_t paddr;
 			void *addr;
 
 			addr = PNMB(na, slot + si, &paddr);
 			netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
-			/* Update descriptor */
-			rxr->rx_base[j].buffer_addr = htole64(paddr);
+			em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
 			continue;
 		}
 #endif /* DEV_NETMAP */
 		rxbuf->m_head = m_getjcl(M_NOWAIT, MT_DATA,
 		    M_PKTHDR, adapter->rx_mbuf_sz);
 		if (rxbuf->m_head == NULL) {
 			error = ENOBUFS;
 			goto fail;
 		}
 		rxbuf->m_head->m_len = adapter->rx_mbuf_sz;
 		rxbuf->m_head->m_flags &= ~M_HASFCS; /* we strip it */
 		rxbuf->m_head->m_pkthdr.len = adapter->rx_mbuf_sz;
 
 		/* Get the memory mapping */
 		error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
 		    rxbuf->map, rxbuf->m_head, seg,
 		    &nsegs, BUS_DMA_NOWAIT);
 		if (error != 0) {
 			m_freem(rxbuf->m_head);
 			rxbuf->m_head = NULL;
 			goto fail;
 		}
 		bus_dmamap_sync(rxr->rxtag,
 		    rxbuf->map, BUS_DMASYNC_PREREAD);
 
-		/* Update descriptor */
-		rxr->rx_base[j].buffer_addr = htole64(seg[0].ds_addr);
+		rxbuf->paddr = seg[0].ds_addr;
+		em_setup_rxdesc(&rxr->rx_base[j], rxbuf);
 	}
 	rxr->next_to_check = 0;
 	rxr->next_to_refresh = 0;
 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 fail:
 	EM_RX_UNLOCK(rxr);
 	return (error);
 }
 
 /*********************************************************************
  *
  *  Initialize all receive rings.
  *
  **********************************************************************/
 static int
 em_setup_receive_structures(struct adapter *adapter)
 {
 	struct rx_ring *rxr = adapter->rx_rings;
 	int q;
 
 	for (q = 0; q < adapter->num_queues; q++, rxr++)
 		if (em_setup_receive_ring(rxr))
 			goto fail;
 
 	return (0);
 fail:
 	/*
 	 * Free RX buffers allocated so far, we will only handle
 	 * the rings that completed, the failing case will have
 	 * cleaned up for itself. 'q' failed, so its the terminus.
 	 */
 	for (int i = 0; i < q; ++i) {
 		rxr = &adapter->rx_rings[i];
 		for (int n = 0; n < adapter->num_rx_desc; n++) {
-			struct em_buffer *rxbuf;
+			struct em_rxbuffer *rxbuf;
 			rxbuf = &rxr->rx_buffers[n];
 			if (rxbuf->m_head != NULL) {
 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
 			  	  BUS_DMASYNC_POSTREAD);
 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
 				m_freem(rxbuf->m_head);
 				rxbuf->m_head = NULL;
 			}
 		}
 		rxr->next_to_check = 0;
 		rxr->next_to_refresh = 0;
 	}
 
 	return (ENOBUFS);
 }
 
 /*********************************************************************
  *
  *  Free all receive rings.
  *
  **********************************************************************/
 static void
 em_free_receive_structures(struct adapter *adapter)
 {
 	struct rx_ring *rxr = adapter->rx_rings;
 
 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
 		em_free_receive_buffers(rxr);
 		/* Free the ring memory as well */
 		em_dma_free(adapter, &rxr->rxdma);
 		EM_RX_LOCK_DESTROY(rxr);
 	}
 
 	free(adapter->rx_rings, M_DEVBUF);
 }
 
 
 /*********************************************************************
  *
  *  Free receive ring data structures
  *
  **********************************************************************/
 static void
 em_free_receive_buffers(struct rx_ring *rxr)
 {
 	struct adapter		*adapter = rxr->adapter;
-	struct em_buffer	*rxbuf = NULL;
+	struct em_rxbuffer	*rxbuf = NULL;
 
 	INIT_DEBUGOUT("free_receive_buffers: begin");
 
 	if (rxr->rx_buffers != NULL) {
 		for (int i = 0; i < adapter->num_rx_desc; i++) {
 			rxbuf = &rxr->rx_buffers[i];
 			if (rxbuf->map != NULL) {
 				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
 				    BUS_DMASYNC_POSTREAD);
 				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
 				bus_dmamap_destroy(rxr->rxtag, rxbuf->map);
 			}
 			if (rxbuf->m_head != NULL) {
 				m_freem(rxbuf->m_head);
 				rxbuf->m_head = NULL;
 			}
 		}
 		free(rxr->rx_buffers, M_DEVBUF);
 		rxr->rx_buffers = NULL;
 		rxr->next_to_check = 0;
 		rxr->next_to_refresh = 0;
 	}
 
 	if (rxr->rxtag != NULL) {
 		bus_dma_tag_destroy(rxr->rxtag);
 		rxr->rxtag = NULL;
 	}
 
 	return;
 }
 
 
 /*********************************************************************
  *
  *  Enable receive unit.
  *
  **********************************************************************/
 
 static void
 em_initialize_receive_unit(struct adapter *adapter)
 {
-	struct rx_ring	*rxr = adapter->rx_rings;
+	struct rx_ring *rxr = adapter->rx_rings;
 	if_t ifp = adapter->ifp;
 	struct e1000_hw	*hw = &adapter->hw;
-	u64	bus_addr;
-	u32	rctl, rxcsum;
+	u32	rctl, rxcsum, rfctl;
 
 	INIT_DEBUGOUT("em_initialize_receive_units: begin");
 
 	/*
 	 * Make sure receives are disabled while setting
 	 * up the descriptor ring
 	 */
 	rctl = E1000_READ_REG(hw, E1000_RCTL);
 	/* Do not disable if ever enabled on this hardware */
 	if ((hw->mac.type != e1000_82574) && (hw->mac.type != e1000_82583))
 		E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
 
+	/* Setup the Receive Control Register */
+	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
+	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
+	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
+	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
+
+	/* Do not store bad packets */
+	rctl &= ~E1000_RCTL_SBP;
+
+	/* Enable Long Packet receive */
+	if (if_getmtu(ifp) > ETHERMTU)
+		rctl |= E1000_RCTL_LPE;
+	else
+		rctl &= ~E1000_RCTL_LPE;
+
+        /* Strip the CRC */
+        if (!em_disable_crc_stripping)
+		rctl |= E1000_RCTL_SECRC;
+
 	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
 	    adapter->rx_abs_int_delay.value);
 
 	E1000_WRITE_REG(&adapter->hw, E1000_RDTR,
 	    adapter->rx_int_delay.value);
 	/*
 	 * Set the interrupt throttling rate. Value is calculated
 	 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
 	 */
 	E1000_WRITE_REG(hw, E1000_ITR, DEFAULT_ITR);
 
+	/* Use extended rx descriptor formats */
+	rfctl = E1000_READ_REG(hw, E1000_RFCTL);
+	rfctl |= E1000_RFCTL_EXTEN;
 	/*
 	** When using MSIX interrupts we need to throttle
 	** using the EITR register (82574 only)
 	*/
 	if (hw->mac.type == e1000_82574) {
-		u32 rfctl;
 		for (int i = 0; i < 4; i++)
 			E1000_WRITE_REG(hw, E1000_EITR_82574(i),
 			    DEFAULT_ITR);
 		/* Disable accelerated acknowledge */
-		rfctl = E1000_READ_REG(hw, E1000_RFCTL);
 		rfctl |= E1000_RFCTL_ACK_DIS;
-		E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
 	}
+	E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
 
 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
 	if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
 #ifdef EM_MULTIQUEUE
 		rxcsum |= E1000_RXCSUM_TUOFL |
 			  E1000_RXCSUM_IPOFL |
 			  E1000_RXCSUM_PCSD;
 #else
 		rxcsum |= E1000_RXCSUM_TUOFL;
 #endif
 	} else
 		rxcsum &= ~E1000_RXCSUM_TUOFL;
 
 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
 
 #ifdef EM_MULTIQUEUE
+#define RSSKEYLEN 10
 	if (adapter->num_queues > 1) {
-		uint32_t rss_key[10];
-		uint32_t reta;
+		uint8_t  rss_key[4 * RSSKEYLEN];
+		uint32_t reta = 0;
 		int i;
 
 		/*
 		* Configure RSS key
 		*/
 		arc4rand(rss_key, sizeof(rss_key), 0);
-		for (i = 0; i < 10; ++i)
-			E1000_WRITE_REG_ARRAY(hw,E1000_RSSRK(0), i, rss_key[i]);
+		for (i = 0; i < RSSKEYLEN; ++i) {
+			uint32_t rssrk = 0;
 
+			rssrk = EM_RSSRK_VAL(rss_key, i);
+			E1000_WRITE_REG(hw,E1000_RSSRK(i), rssrk);
+		}
+
 		/*
 		* Configure RSS redirect table in following fashion:
 		* (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)]
 		*/
-		reta = 0;
-		for (i = 0; i < 4; ++i) {
+		for (i = 0; i < sizeof(reta); ++i) {
 			uint32_t q;
+
 			q = (i % adapter->num_queues) << 7;
 			reta |= q << (8 * i);
 		}
-		for (i = 0; i < 32; ++i)
+
+		for (i = 0; i < 32; ++i) {
 			E1000_WRITE_REG(hw, E1000_RETA(i), reta);
+		}
 
 		E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | 
 				E1000_MRQC_RSS_FIELD_IPV4_TCP |
 				E1000_MRQC_RSS_FIELD_IPV4 |
 				E1000_MRQC_RSS_FIELD_IPV6_TCP_EX |
 				E1000_MRQC_RSS_FIELD_IPV6_EX |
-				E1000_MRQC_RSS_FIELD_IPV6 |
-				E1000_MRQC_RSS_FIELD_IPV6_TCP);
+				E1000_MRQC_RSS_FIELD_IPV6);
 	}
 #endif
 	/*
 	** XXX TEMPORARY WORKAROUND: on some systems with 82573
 	** long latencies are observed, like Lenovo X60. This
 	** change eliminates the problem, but since having positive
 	** values in RDTR is a known source of problems on other
 	** platforms another solution is being sought.
 	*/
 	if (hw->mac.type == e1000_82573)
 		E1000_WRITE_REG(hw, E1000_RDTR, 0x20);
 
 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
 		/* Setup the Base and Length of the Rx Descriptor Ring */
+		u64 bus_addr = rxr->rxdma.dma_paddr;
 		u32 rdt = adapter->num_rx_desc - 1; /* default */
 
-		bus_addr = rxr->rxdma.dma_paddr;
 		E1000_WRITE_REG(hw, E1000_RDLEN(i),
-		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
+		    adapter->num_rx_desc * sizeof(union e1000_rx_desc_extended));
 		E1000_WRITE_REG(hw, E1000_RDBAH(i), (u32)(bus_addr >> 32));
 		E1000_WRITE_REG(hw, E1000_RDBAL(i), (u32)bus_addr);
 		/* Setup the Head and Tail Descriptor Pointers */
 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
 #ifdef DEV_NETMAP
 		/*
 		 * an init() while a netmap client is active must
 		 * preserve the rx buffers passed to userspace.
 		 */
 		if (if_getcapenable(ifp) & IFCAP_NETMAP) {
 			struct netmap_adapter *na = netmap_getna(adapter->ifp);
 			rdt -= nm_kr_rxspace(&na->rx_rings[i]);
 		}
 #endif /* DEV_NETMAP */
 		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
 	}
 
 	/*
 	 * Set PTHRESH for improved jumbo performance
 	 * According to 10.2.5.11 of Intel 82574 Datasheet,
 	 * RXDCTL(1) is written whenever RXDCTL(0) is written.
 	 * Only write to RXDCTL(1) if there is a need for different
 	 * settings.
 	 */
 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
 	    (adapter->hw.mac.type == e1000_pch2lan) ||
 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
 	    (if_getmtu(ifp) > ETHERMTU)) {
 		u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0));
 		E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3);
-	} else if ((adapter->hw.mac.type == e1000_82574) &&
-		  (if_getmtu(ifp) > ETHERMTU)) {
+	} else if (adapter->hw.mac.type == e1000_82574) {
 		for (int i = 0; i < adapter->num_queues; i++) {
 			u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
 
-                	rxdctl |= 0x20; /* PTHRESH */
-                	rxdctl |= 4 << 8; /* HTHRESH */
-                	rxdctl |= 4 << 16;/* WTHRESH */
+			rxdctl |= 0x20; /* PTHRESH */
+			rxdctl |= 4 << 8; /* HTHRESH */
+			rxdctl |= 4 << 16;/* WTHRESH */
 			rxdctl |= 1 << 24; /* Switch to granularity */
 			E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
 		}
 	}
 		
 	if (adapter->hw.mac.type >= e1000_pch2lan) {
 		if (if_getmtu(ifp) > ETHERMTU)
 			e1000_lv_jumbo_workaround_ich8lan(hw, TRUE);
 		else
 			e1000_lv_jumbo_workaround_ich8lan(hw, FALSE);
 	}
 
-	/* Setup the Receive Control Register */
-	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
-	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
-	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
-	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
-
-        /* Strip the CRC */
-        if (!em_disable_crc_stripping)
-		rctl |= E1000_RCTL_SECRC;
-
         /* Make sure VLAN Filters are off */
         rctl &= ~E1000_RCTL_VFE;
-	rctl &= ~E1000_RCTL_SBP;
 
 	if (adapter->rx_mbuf_sz == MCLBYTES)
 		rctl |= E1000_RCTL_SZ_2048;
 	else if (adapter->rx_mbuf_sz == MJUMPAGESIZE)
 		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
 	else if (adapter->rx_mbuf_sz > MJUMPAGESIZE)
 		rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
 
-	if (if_getmtu(ifp) > ETHERMTU)
-		rctl |= E1000_RCTL_LPE;
-	else
-		rctl &= ~E1000_RCTL_LPE;
-
+	/* ensure we clear use DTYPE of 00 here */
+	rctl &= ~0x00000C00;
 	/* Write out the settings */
 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
 
 	return;
 }
 
 
 /*********************************************************************
  *
  *  This routine executes in interrupt context. It replenishes
  *  the mbufs in the descriptor and sends data which has been
  *  dma'ed into host memory to upper layer.
  *
  *  We loop at most count times if count is > 0, or until done if
  *  count < 0.
  *  
  *  For polling we also now return the number of cleaned packets
  *********************************************************************/
 static bool
 em_rxeof(struct rx_ring *rxr, int count, int *done)
 {
 	struct adapter		*adapter = rxr->adapter;
 	if_t ifp = adapter->ifp;
 	struct mbuf		*mp, *sendmp;
-	u8			status = 0;
+	u32			status = 0;
 	u16 			len;
 	int			i, processed, rxdone = 0;
 	bool			eop;
-	struct e1000_rx_desc	*cur;
+	union e1000_rx_desc_extended	*cur;
 
 	EM_RX_LOCK(rxr);
 
 	/* Sync the ring */
 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 
 #ifdef DEV_NETMAP
 	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
 		EM_RX_UNLOCK(rxr);
 		return (FALSE);
 	}
 #endif /* DEV_NETMAP */
 
 	for (i = rxr->next_to_check, processed = 0; count != 0;) {
-
 		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
 			break;
 
 		cur = &rxr->rx_base[i];
-		status = cur->status;
+		status = le32toh(cur->wb.upper.status_error);
 		mp = sendmp = NULL;
 
 		if ((status & E1000_RXD_STAT_DD) == 0)
 			break;
 
-		len = le16toh(cur->length);
+		len = le16toh(cur->wb.upper.length);
 		eop = (status & E1000_RXD_STAT_EOP) != 0;
 
-		if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) ||
+		if ((status & E1000_RXDEXT_ERR_FRAME_ERR_MASK) ||
 		    (rxr->discard == TRUE)) {
 			adapter->dropped_pkts++;
 			++rxr->rx_discarded;
 			if (!eop) /* Catch subsequent segs */
 				rxr->discard = TRUE;
 			else
 				rxr->discard = FALSE;
 			em_rx_discard(rxr, i);
 			goto next_desc;
 		}
 		bus_dmamap_unload(rxr->rxtag, rxr->rx_buffers[i].map);
 
 		/* Assign correct length to the current fragment */
 		mp = rxr->rx_buffers[i].m_head;
 		mp->m_len = len;
 
 		/* Trigger for refresh */
 		rxr->rx_buffers[i].m_head = NULL;
 
 		/* First segment? */
 		if (rxr->fmp == NULL) {
 			mp->m_pkthdr.len = len;
 			rxr->fmp = rxr->lmp = mp;
 		} else {
 			/* Chain mbuf's together */
 			mp->m_flags &= ~M_PKTHDR;
 			rxr->lmp->m_next = mp;
 			rxr->lmp = mp;
 			rxr->fmp->m_pkthdr.len += len;
 		}
 
 		if (eop) {
 			--count;
 			sendmp = rxr->fmp;
 			if_setrcvif(sendmp, ifp);
 			if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
-			em_receive_checksum(cur, sendmp);
+			em_receive_checksum(status, sendmp);
 #ifndef __NO_STRICT_ALIGNMENT
 			if (adapter->hw.mac.max_frame_size >
 			    (MCLBYTES - ETHER_ALIGN) &&
 			    em_fixup_rx(rxr) != 0)
 				goto skip;
 #endif
 			if (status & E1000_RXD_STAT_VP) {
 				if_setvtag(sendmp, 
-				    le16toh(cur->special));
+				    le16toh(cur->wb.upper.vlan));
 				sendmp->m_flags |= M_VLANTAG;
 			}
 #ifndef __NO_STRICT_ALIGNMENT
 skip:
 #endif
 			rxr->fmp = rxr->lmp = NULL;
 		}
 next_desc:
 		/* Sync the ring */
 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
 	    		BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 		/* Zero out the receive descriptors status. */
-		cur->status = 0;
+		cur->wb.upper.status_error &= htole32(~0xFF);
 		++rxdone;	/* cumulative for POLL */
 		++processed;
 
 		/* Advance our pointers to the next descriptor. */
 		if (++i == adapter->num_rx_desc)
 			i = 0;
 
 		/* Send to the stack */
 		if (sendmp != NULL) {
 			rxr->next_to_check = i;
 			EM_RX_UNLOCK(rxr);
 			if_input(ifp, sendmp);
 			EM_RX_LOCK(rxr);
 			i = rxr->next_to_check;
 		}
 
 		/* Only refresh mbufs every 8 descriptors */
 		if (processed == 8) {
 			em_refresh_mbufs(rxr, i);
 			processed = 0;
 		}
 	}
 
 	/* Catch any remaining refresh work */
 	if (e1000_rx_unrefreshed(rxr))
 		em_refresh_mbufs(rxr, i);
 
 	rxr->next_to_check = i;
 	if (done != NULL)
 		*done = rxdone;
 	EM_RX_UNLOCK(rxr);
 
 	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
 }
 
 static __inline void
 em_rx_discard(struct rx_ring *rxr, int i)
 {
-	struct em_buffer	*rbuf;
+	struct em_rxbuffer	*rbuf;
 
 	rbuf = &rxr->rx_buffers[i];
 	bus_dmamap_unload(rxr->rxtag, rbuf->map);
 
 	/* Free any previous pieces */
 	if (rxr->fmp != NULL) {
 		rxr->fmp->m_flags |= M_PKTHDR;
 		m_freem(rxr->fmp);
 		rxr->fmp = NULL;
 		rxr->lmp = NULL;
 	}
 	/*
 	** Free buffer and allow em_refresh_mbufs()
 	** to clean up and recharge buffer.
 	*/
 	if (rbuf->m_head) {
 		m_free(rbuf->m_head);
 		rbuf->m_head = NULL;
 	}
 	return;
 }
 
 #ifndef __NO_STRICT_ALIGNMENT
 /*
  * When jumbo frames are enabled we should realign entire payload on
  * architecures with strict alignment. This is serious design mistake of 8254x
  * as it nullifies DMA operations. 8254x just allows RX buffer size to be
  * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
  * payload. On architecures without strict alignment restrictions 8254x still
  * performs unaligned memory access which would reduce the performance too.
  * To avoid copying over an entire frame to align, we allocate a new mbuf and
  * copy ethernet header to the new mbuf. The new mbuf is prepended into the
  * existing mbuf chain.
  *
  * Be aware, best performance of the 8254x is achived only when jumbo frame is
  * not used at all on architectures with strict alignment.
  */
 static int
 em_fixup_rx(struct rx_ring *rxr)
 {
 	struct adapter *adapter = rxr->adapter;
 	struct mbuf *m, *n;
 	int error;
 
 	error = 0;
 	m = rxr->fmp;
 	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
 		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
 		m->m_data += ETHER_HDR_LEN;
 	} else {
 		MGETHDR(n, M_NOWAIT, MT_DATA);
 		if (n != NULL) {
 			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
 			m->m_data += ETHER_HDR_LEN;
 			m->m_len -= ETHER_HDR_LEN;
 			n->m_len = ETHER_HDR_LEN;
 			M_MOVE_PKTHDR(n, m);
 			n->m_next = m;
 			rxr->fmp = n;
 		} else {
 			adapter->dropped_pkts++;
 			m_freem(rxr->fmp);
 			rxr->fmp = NULL;
 			error = ENOMEM;
 		}
 	}
 
 	return (error);
 }
 #endif
 
+static void
+em_setup_rxdesc(union e1000_rx_desc_extended *rxd, const struct em_rxbuffer *rxbuf)
+{
+	rxd->read.buffer_addr = htole64(rxbuf->paddr);
+	/* DD bits must be cleared */
+	rxd->wb.upper.status_error= 0;
+}
+
 /*********************************************************************
  *
  *  Verify that the hardware indicated that the checksum is valid.
  *  Inform the stack about the status of checksum so that stack
  *  doesn't spend time verifying the checksum.
  *
  *********************************************************************/
 static void
-em_receive_checksum(struct e1000_rx_desc *rx_desc, struct mbuf *mp)
+em_receive_checksum(uint32_t status, struct mbuf *mp)
 {
 	mp->m_pkthdr.csum_flags = 0;
 
 	/* Ignore Checksum bit is set */
-	if (rx_desc->status & E1000_RXD_STAT_IXSM)
+	if (status & E1000_RXD_STAT_IXSM)
 		return;
 
-	if (rx_desc->errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE))
-		return;
-
-	/* IP Checksum Good? */
-	if (rx_desc->status & E1000_RXD_STAT_IPCS)
+	/* If the IP checksum exists and there is no IP Checksum error */
+	if ((status & (E1000_RXD_STAT_IPCS | E1000_RXDEXT_STATERR_IPE)) ==
+		E1000_RXD_STAT_IPCS) {
 		mp->m_pkthdr.csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
+	}
 
 	/* TCP or UDP checksum */
-	if (rx_desc->status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
+	if ((status & (E1000_RXD_STAT_TCPCS | E1000_RXDEXT_STATERR_TCPE)) ==
+	    E1000_RXD_STAT_TCPCS) {
+		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
+		mp->m_pkthdr.csum_data = htons(0xffff);
+	}
+	if (status & E1000_RXD_STAT_UDPCS) {
 		mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 		mp->m_pkthdr.csum_data = htons(0xffff);
 	}
 }
 
 /*
  * This routine is run via an vlan
  * config EVENT
  */
 static void
 em_register_vlan(void *arg, if_t ifp, u16 vtag)
 {
 	struct adapter	*adapter = if_getsoftc(ifp);
 	u32		index, bit;
 
 	if ((void*)adapter !=  arg)   /* Not our event */
 		return;
 
 	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
                 return;
 
 	EM_CORE_LOCK(adapter);
 	index = (vtag >> 5) & 0x7F;
 	bit = vtag & 0x1F;
 	adapter->shadow_vfta[index] |= (1 << bit);
 	++adapter->num_vlans;
 	/* Re-init to load the changes */
 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
 		em_init_locked(adapter);
 	EM_CORE_UNLOCK(adapter);
 }
 
 /*
  * This routine is run via an vlan
  * unconfig EVENT
  */
 static void
 em_unregister_vlan(void *arg, if_t ifp, u16 vtag)
 {
 	struct adapter	*adapter = if_getsoftc(ifp);
 	u32		index, bit;
 
 	if (adapter != arg)
 		return;
 
 	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
                 return;
 
 	EM_CORE_LOCK(adapter);
 	index = (vtag >> 5) & 0x7F;
 	bit = vtag & 0x1F;
 	adapter->shadow_vfta[index] &= ~(1 << bit);
 	--adapter->num_vlans;
 	/* Re-init to load the changes */
 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
 		em_init_locked(adapter);
 	EM_CORE_UNLOCK(adapter);
 }
 
 static void
 em_setup_vlan_hw_support(struct adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	u32             reg;
 
 	/*
 	** We get here thru init_locked, meaning
 	** a soft reset, this has already cleared
 	** the VFTA and other state, so if there
 	** have been no vlan's registered do nothing.
 	*/
 	if (adapter->num_vlans == 0)
                 return;
 
 	/*
 	** A soft reset zero's out the VFTA, so
 	** we need to repopulate it now.
 	*/
 	for (int i = 0; i < EM_VFTA_SIZE; i++)
                 if (adapter->shadow_vfta[i] != 0)
 			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
                             i, adapter->shadow_vfta[i]);
 
 	reg = E1000_READ_REG(hw, E1000_CTRL);
 	reg |= E1000_CTRL_VME;
 	E1000_WRITE_REG(hw, E1000_CTRL, reg);
 
 	/* Enable the Filter Table */
 	reg = E1000_READ_REG(hw, E1000_RCTL);
 	reg &= ~E1000_RCTL_CFIEN;
 	reg |= E1000_RCTL_VFE;
 	E1000_WRITE_REG(hw, E1000_RCTL, reg);
 }
 
 static void
 em_enable_intr(struct adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	u32 ims_mask = IMS_ENABLE_MASK;
 
 	if (hw->mac.type == e1000_82574) {
 		E1000_WRITE_REG(hw, EM_EIAC, adapter->ims);
 		ims_mask |= adapter->ims;
 	} 
 	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
 }
 
 static void
 em_disable_intr(struct adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 
 	if (hw->mac.type == e1000_82574)
 		E1000_WRITE_REG(hw, EM_EIAC, 0);
 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
 }
 
 /*
  * Bit of a misnomer, what this really means is
  * to enable OS management of the system... aka
  * to disable special hardware management features 
  */
 static void
 em_init_manageability(struct adapter *adapter)
 {
 	/* A shared code workaround */
 #define E1000_82542_MANC2H E1000_MANC2H
 	if (adapter->has_manage) {
 		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
 
 		/* disable hardware interception of ARP */
 		manc &= ~(E1000_MANC_ARP_EN);
 
                 /* enable receiving management packets to the host */
 		manc |= E1000_MANC_EN_MNG2HOST;
 #define E1000_MNG2HOST_PORT_623 (1 << 5)
 #define E1000_MNG2HOST_PORT_664 (1 << 6)
 		manc2h |= E1000_MNG2HOST_PORT_623;
 		manc2h |= E1000_MNG2HOST_PORT_664;
 		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
 	}
 }
 
 /*
  * Give control back to hardware management
  * controller if there is one.
  */
 static void
 em_release_manageability(struct adapter *adapter)
 {
 	if (adapter->has_manage) {
 		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
 
 		/* re-enable hardware interception of ARP */
 		manc |= E1000_MANC_ARP_EN;
 		manc &= ~E1000_MANC_EN_MNG2HOST;
 
 		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
 	}
 }
 
 /*
  * em_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
  * For ASF and Pass Through versions of f/w this means
  * that the driver is loaded. For AMT version type f/w
  * this means that the network i/f is open.
  */
 static void
 em_get_hw_control(struct adapter *adapter)
 {
 	u32 ctrl_ext, swsm;
 
 	if (adapter->hw.mac.type == e1000_82573) {
 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
 		    swsm | E1000_SWSM_DRV_LOAD);
 		return;
 	}
 	/* else */
 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
 	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
 	return;
 }
 
 /*
  * em_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
  * For ASF and Pass Through versions of f/w this means that
  * the driver is no longer loaded. For AMT versions of the
  * f/w this means that the network i/f is closed.
  */
 static void
 em_release_hw_control(struct adapter *adapter)
 {
 	u32 ctrl_ext, swsm;
 
 	if (!adapter->has_manage)
 		return;
 
 	if (adapter->hw.mac.type == e1000_82573) {
 		swsm = E1000_READ_REG(&adapter->hw, E1000_SWSM);
 		E1000_WRITE_REG(&adapter->hw, E1000_SWSM,
 		    swsm & ~E1000_SWSM_DRV_LOAD);
 		return;
 	}
 	/* else */
 	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
 	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
 	return;
 }
 
 static int
 em_is_valid_ether_addr(u8 *addr)
 {
 	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
 
 	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
 		return (FALSE);
 	}
 
 	return (TRUE);
 }
 
 /*
 ** Parse the interface capabilities with regard
 ** to both system management and wake-on-lan for
 ** later use.
 */
 static void
 em_get_wakeup(device_t dev)
 {
 	struct adapter	*adapter = device_get_softc(dev);
 	u16		eeprom_data = 0, device_id, apme_mask;
 
 	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
 	apme_mask = EM_EEPROM_APME;
 
 	switch (adapter->hw.mac.type) {
 	case e1000_82573:
 	case e1000_82583:
 		adapter->has_amt = TRUE;
 		/* Falls thru */
 	case e1000_82571:
 	case e1000_82572:
 	case e1000_80003es2lan:
 		if (adapter->hw.bus.func == 1) {
 			e1000_read_nvm(&adapter->hw,
 			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
 			break;
 		} else
 			e1000_read_nvm(&adapter->hw,
 			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
 		break;
 	case e1000_ich8lan:
 	case e1000_ich9lan:
 	case e1000_ich10lan:
 	case e1000_pchlan:
 	case e1000_pch2lan:
 		apme_mask = E1000_WUC_APME;
 		adapter->has_amt = TRUE;
 		eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC);
 		break;
 	default:
 		e1000_read_nvm(&adapter->hw,
 		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
 		break;
 	}
 	if (eeprom_data & apme_mask)
 		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
 	/*
          * We have the eeprom settings, now apply the special cases
          * where the eeprom may be wrong or the board won't support
          * wake on lan on a particular port
 	 */
 	device_id = pci_get_device(dev);
         switch (device_id) {
 	case E1000_DEV_ID_82571EB_FIBER:
 		/* Wake events only supported on port A for dual fiber
 		 * regardless of eeprom setting */
 		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
 		    E1000_STATUS_FUNC_1)
 			adapter->wol = 0;
 		break;
 	case E1000_DEV_ID_82571EB_QUAD_COPPER:
 	case E1000_DEV_ID_82571EB_QUAD_FIBER:
 	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
                 /* if quad port adapter, disable WoL on all but port A */
 		if (global_quad_port_a != 0)
 			adapter->wol = 0;
 		/* Reset for multiple quad port adapters */
 		if (++global_quad_port_a == 4)
 			global_quad_port_a = 0;
                 break;
 	}
 	return;
 }
 
 
 /*
  * Enable PCI Wake On Lan capability
  */
 static void
 em_enable_wakeup(device_t dev)
 {
 	struct adapter	*adapter = device_get_softc(dev);
 	if_t ifp = adapter->ifp;
 	u32		pmc, ctrl, ctrl_ext, rctl;
 	u16     	status;
 
 	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
 		return;
 
 	/* Advertise the wakeup capability */
 	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
 	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
 	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
 	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
 
 	if ((adapter->hw.mac.type == e1000_ich8lan) ||
 	    (adapter->hw.mac.type == e1000_pchlan) ||
 	    (adapter->hw.mac.type == e1000_ich9lan) ||
 	    (adapter->hw.mac.type == e1000_ich10lan))
 		e1000_suspend_workarounds_ich8lan(&adapter->hw);
 
 	/* Keep the laser running on Fiber adapters */
 	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
 	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
 		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
 		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
 		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
 	}
 
 	/*
 	** Determine type of Wakeup: note that wol
 	** is set with all bits on by default.
 	*/
 	if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
 		adapter->wol &= ~E1000_WUFC_MAG;
 
 	if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
 		adapter->wol &= ~E1000_WUFC_MC;
 	else {
 		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
 		rctl |= E1000_RCTL_MPE;
 		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
 	}
 
 	if ((adapter->hw.mac.type == e1000_pchlan) ||
 	    (adapter->hw.mac.type == e1000_pch2lan)) {
 		if (em_enable_phy_wakeup(adapter))
 			return;
 	} else {
 		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
 		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
 	}
 
 	if (adapter->hw.phy.type == e1000_phy_igp_3)
 		e1000_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
 
         /* Request PME */
         status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
 	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
 	if (if_getcapenable(ifp) & IFCAP_WOL)
 		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
         pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
 
 	return;
 }
 
 /*
 ** WOL in the newer chipset interfaces (pchlan)
 ** require thing to be copied into the phy
 */
 static int
 em_enable_phy_wakeup(struct adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	u32 mreg, ret = 0;
 	u16 preg;
 
 	/* copy MAC RARs to PHY RARs */
 	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
 
 	/* copy MAC MTA to PHY MTA */
 	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
 		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
 		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
 		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
 		    (u16)((mreg >> 16) & 0xFFFF));
 	}
 
 	/* configure PHY Rx Control register */
 	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
 	mreg = E1000_READ_REG(hw, E1000_RCTL);
 	if (mreg & E1000_RCTL_UPE)
 		preg |= BM_RCTL_UPE;
 	if (mreg & E1000_RCTL_MPE)
 		preg |= BM_RCTL_MPE;
 	preg &= ~(BM_RCTL_MO_MASK);
 	if (mreg & E1000_RCTL_MO_3)
 		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
 				<< BM_RCTL_MO_SHIFT);
 	if (mreg & E1000_RCTL_BAM)
 		preg |= BM_RCTL_BAM;
 	if (mreg & E1000_RCTL_PMCF)
 		preg |= BM_RCTL_PMCF;
 	mreg = E1000_READ_REG(hw, E1000_CTRL);
 	if (mreg & E1000_CTRL_RFCE)
 		preg |= BM_RCTL_RFCE;
 	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
 
 	/* enable PHY wakeup in MAC register */
 	E1000_WRITE_REG(hw, E1000_WUC,
 	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
 	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
 
 	/* configure and enable PHY wakeup in PHY registers */
 	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
 	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
 
 	/* activate PHY wakeup */
 	ret = hw->phy.ops.acquire(hw);
 	if (ret) {
 		printf("Could not acquire PHY\n");
 		return ret;
 	}
 	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
 	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
 	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
 	if (ret) {
 		printf("Could not read PHY page 769\n");
 		goto out;
 	}
 	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
 	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
 	if (ret)
 		printf("Could not set PHY Host Wakeup bit\n");
 out:
 	hw->phy.ops.release(hw);
 
 	return ret;
 }
 
 static void
 em_led_func(void *arg, int onoff)
 {
 	struct adapter	*adapter = arg;
  
 	EM_CORE_LOCK(adapter);
 	if (onoff) {
 		e1000_setup_led(&adapter->hw);
 		e1000_led_on(&adapter->hw);
 	} else {
 		e1000_led_off(&adapter->hw);
 		e1000_cleanup_led(&adapter->hw);
 	}
 	EM_CORE_UNLOCK(adapter);
 }
 
 /*
 ** Disable the L0S and L1 LINK states
 */
 static void
 em_disable_aspm(struct adapter *adapter)
 {
 	int		base, reg;
 	u16		link_cap,link_ctrl;
 	device_t	dev = adapter->dev;
 
 	switch (adapter->hw.mac.type) {
 		case e1000_82573:
 		case e1000_82574:
 		case e1000_82583:
 			break;
 		default:
 			return;
 	}
 	if (pci_find_cap(dev, PCIY_EXPRESS, &base) != 0)
 		return;
 	reg = base + PCIER_LINK_CAP;
 	link_cap = pci_read_config(dev, reg, 2);
 	if ((link_cap & PCIEM_LINK_CAP_ASPM) == 0)
 		return;
 	reg = base + PCIER_LINK_CTL;
 	link_ctrl = pci_read_config(dev, reg, 2);
 	link_ctrl &= ~PCIEM_LINK_CTL_ASPMC;
 	pci_write_config(dev, reg, link_ctrl, 2);
 	return;
 }
 
 /**********************************************************************
  *
  *  Update the board statistics counters.
  *
  **********************************************************************/
 static void
 em_update_stats_counters(struct adapter *adapter)
 {
 
 	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
 	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
 		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
 		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
 	}
 	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
 	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
 	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
 	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
 
 	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
 	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
 	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
 	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
 	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
 	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
 	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
 	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
 	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
 	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
 	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
 	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
 	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
 	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
 	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
 	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
 	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
 	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
 	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
 	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
 
 	/* For the 64-bit byte counters the low dword must be read first. */
 	/* Both registers clear on the read of the high dword */
 
 	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
 	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
 	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
 
 	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
 	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
 	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
 	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
 	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
 
 	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
 	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
 
 	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
 	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
 	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
 	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
 	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
 	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
 	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
 	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
 	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
 	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
 
 	/* Interrupt Counts */
 
 	adapter->stats.iac += E1000_READ_REG(&adapter->hw, E1000_IAC);
 	adapter->stats.icrxptc += E1000_READ_REG(&adapter->hw, E1000_ICRXPTC);
 	adapter->stats.icrxatc += E1000_READ_REG(&adapter->hw, E1000_ICRXATC);
 	adapter->stats.ictxptc += E1000_READ_REG(&adapter->hw, E1000_ICTXPTC);
 	adapter->stats.ictxatc += E1000_READ_REG(&adapter->hw, E1000_ICTXATC);
 	adapter->stats.ictxqec += E1000_READ_REG(&adapter->hw, E1000_ICTXQEC);
 	adapter->stats.ictxqmtc += E1000_READ_REG(&adapter->hw, E1000_ICTXQMTC);
 	adapter->stats.icrxdmtc += E1000_READ_REG(&adapter->hw, E1000_ICRXDMTC);
 	adapter->stats.icrxoc += E1000_READ_REG(&adapter->hw, E1000_ICRXOC);
 
 	if (adapter->hw.mac.type >= e1000_82543) {
 		adapter->stats.algnerrc += 
 		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
 		adapter->stats.rxerrc += 
 		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
 		adapter->stats.tncrs += 
 		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
 		adapter->stats.cexterr += 
 		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
 		adapter->stats.tsctc += 
 		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
 		adapter->stats.tsctfc += 
 		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
 	}
 }
 
 static uint64_t
 em_get_counter(if_t ifp, ift_counter cnt)
 {
 	struct adapter *adapter;
 
 	adapter = if_getsoftc(ifp);
 
 	switch (cnt) {
 	case IFCOUNTER_COLLISIONS:
 		return (adapter->stats.colc);
 	case IFCOUNTER_IERRORS:
 		return (adapter->dropped_pkts + adapter->stats.rxerrc +
 		    adapter->stats.crcerrs + adapter->stats.algnerrc +
 		    adapter->stats.ruc + adapter->stats.roc +
 		    adapter->stats.mpc + adapter->stats.cexterr);
 	case IFCOUNTER_OERRORS:
 		return (adapter->stats.ecol + adapter->stats.latecol +
 		    adapter->watchdog_events);
 	default:
 		return (if_get_counter_default(ifp, cnt));
 	}
 }
 
 /* Export a single 32-bit register via a read-only sysctl. */
 static int
 em_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *adapter;
 	u_int val;
 
 	adapter = oidp->oid_arg1;
 	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
 	return (sysctl_handle_int(oidp, &val, 0, req));
 }
 
 /*
  * Add sysctl variables, one per statistic, to the system.
  */
 static void
 em_add_hw_stats(struct adapter *adapter)
 {
 	device_t dev = adapter->dev;
 
 	struct tx_ring *txr = adapter->tx_rings;
 	struct rx_ring *rxr = adapter->rx_rings;
 
 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
 	struct e1000_hw_stats *stats = &adapter->stats;
 
 	struct sysctl_oid *stat_node, *queue_node, *int_node;
 	struct sysctl_oid_list *stat_list, *queue_list, *int_list;
 
 #define QUEUE_NAME_LEN 32
 	char namebuf[QUEUE_NAME_LEN];
 	
 	/* Driver Statistics */
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
 			CTLFLAG_RD, &adapter->link_irq,
 			"Link MSIX IRQ Handled");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_fail", 
 			 CTLFLAG_RD, &adapter->mbuf_alloc_failed,
 			 "Std mbuf failed");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail", 
 			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
 			 "Std mbuf cluster failed");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 
 			CTLFLAG_RD, &adapter->dropped_pkts,
 			"Driver dropped packets");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 
 			CTLFLAG_RD, &adapter->no_tx_dma_setup,
 			"Driver tx dma failure in xmit");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
 			CTLFLAG_RD, &adapter->rx_overruns,
 			"RX overruns");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
 			CTLFLAG_RD, &adapter->watchdog_events,
 			"Watchdog timeouts");
 	
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
 			em_sysctl_reg_handler, "IU",
 			"Device Control Register");
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
 			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
 			em_sysctl_reg_handler, "IU",
 			"Receiver Control Register");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
 			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
 			"Flow Control High Watermark");
 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 
 			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
 			"Flow Control Low Watermark");
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_tx_%d", i);
 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
 					    CTLFLAG_RD, NULL, "TX Queue Name");
 		queue_list = SYSCTL_CHILDREN(queue_node);
 
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
 				E1000_TDH(txr->me),
 				em_sysctl_reg_handler, "IU",
  				"Transmit Descriptor Head");
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
 				E1000_TDT(txr->me),
 				em_sysctl_reg_handler, "IU",
  				"Transmit Descriptor Tail");
 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tx_irq",
 				CTLFLAG_RD, &txr->tx_irq,
 				"Queue MSI-X Transmit Interrupts");
 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_desc_avail", 
 				CTLFLAG_RD, &txr->no_desc_avail,
 				"Queue No Descriptor Available");
 
 		snprintf(namebuf, QUEUE_NAME_LEN, "queue_rx_%d", i);
 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
 					    CTLFLAG_RD, NULL, "RX Queue Name");
 		queue_list = SYSCTL_CHILDREN(queue_node);
 
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
 				E1000_RDH(rxr->me),
 				em_sysctl_reg_handler, "IU",
 				"Receive Descriptor Head");
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
 				CTLTYPE_UINT | CTLFLAG_RD, adapter,
 				E1000_RDT(rxr->me),
 				em_sysctl_reg_handler, "IU",
 				"Receive Descriptor Tail");
 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "rx_irq",
 				CTLFLAG_RD, &rxr->rx_irq,
 				"Queue MSI-X Receive Interrupts");
 	}
 
 	/* MAC stats get their own sub node */
 
 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
 				    CTLFLAG_RD, NULL, "Statistics");
 	stat_list = SYSCTL_CHILDREN(stat_node);
 
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
 			CTLFLAG_RD, &stats->ecol,
 			"Excessive collisions");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
 			CTLFLAG_RD, &stats->scc,
 			"Single collisions");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
 			CTLFLAG_RD, &stats->mcc,
 			"Multiple collisions");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
 			CTLFLAG_RD, &stats->latecol,
 			"Late collisions");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
 			CTLFLAG_RD, &stats->colc,
 			"Collision Count");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
 			CTLFLAG_RD, &adapter->stats.symerrs,
 			"Symbol Errors");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
 			CTLFLAG_RD, &adapter->stats.sec,
 			"Sequence Errors");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
 			CTLFLAG_RD, &adapter->stats.dc,
 			"Defer Count");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
 			CTLFLAG_RD, &adapter->stats.mpc,
 			"Missed Packets");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
 			CTLFLAG_RD, &adapter->stats.rnbc,
 			"Receive No Buffers");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
 			CTLFLAG_RD, &adapter->stats.ruc,
 			"Receive Undersize");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
 			CTLFLAG_RD, &adapter->stats.rfc,
 			"Fragmented Packets Received ");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
 			CTLFLAG_RD, &adapter->stats.roc,
 			"Oversized Packets Received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
 			CTLFLAG_RD, &adapter->stats.rjc,
 			"Recevied Jabber");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
 			CTLFLAG_RD, &adapter->stats.rxerrc,
 			"Receive Errors");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
 			CTLFLAG_RD, &adapter->stats.crcerrs,
 			"CRC errors");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
 			CTLFLAG_RD, &adapter->stats.algnerrc,
 			"Alignment Errors");
 	/* On 82575 these are collision counts */
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
 			CTLFLAG_RD, &adapter->stats.cexterr,
 			"Collision/Carrier extension errors");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
 			CTLFLAG_RD, &adapter->stats.xonrxc,
 			"XON Received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
 			CTLFLAG_RD, &adapter->stats.xontxc,
 			"XON Transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
 			CTLFLAG_RD, &adapter->stats.xoffrxc,
 			"XOFF Received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
 			CTLFLAG_RD, &adapter->stats.xofftxc,
 			"XOFF Transmitted");
 
 	/* Packet Reception Stats */
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
 			CTLFLAG_RD, &adapter->stats.tpr,
 			"Total Packets Received ");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
 			CTLFLAG_RD, &adapter->stats.gprc,
 			"Good Packets Received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
 			CTLFLAG_RD, &adapter->stats.bprc,
 			"Broadcast Packets Received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
 			CTLFLAG_RD, &adapter->stats.mprc,
 			"Multicast Packets Received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
 			CTLFLAG_RD, &adapter->stats.prc64,
 			"64 byte frames received ");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
 			CTLFLAG_RD, &adapter->stats.prc127,
 			"65-127 byte frames received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
 			CTLFLAG_RD, &adapter->stats.prc255,
 			"128-255 byte frames received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
 			CTLFLAG_RD, &adapter->stats.prc511,
 			"256-511 byte frames received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
 			CTLFLAG_RD, &adapter->stats.prc1023,
 			"512-1023 byte frames received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
 			CTLFLAG_RD, &adapter->stats.prc1522,
 			"1023-1522 byte frames received");
  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
  			CTLFLAG_RD, &adapter->stats.gorc, 
  			"Good Octets Received"); 
 
 	/* Packet Transmission Stats */
  	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
  			CTLFLAG_RD, &adapter->stats.gotc, 
  			"Good Octets Transmitted"); 
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
 			CTLFLAG_RD, &adapter->stats.tpt,
 			"Total Packets Transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
 			CTLFLAG_RD, &adapter->stats.gptc,
 			"Good Packets Transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
 			CTLFLAG_RD, &adapter->stats.bptc,
 			"Broadcast Packets Transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
 			CTLFLAG_RD, &adapter->stats.mptc,
 			"Multicast Packets Transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
 			CTLFLAG_RD, &adapter->stats.ptc64,
 			"64 byte frames transmitted ");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
 			CTLFLAG_RD, &adapter->stats.ptc127,
 			"65-127 byte frames transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
 			CTLFLAG_RD, &adapter->stats.ptc255,
 			"128-255 byte frames transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
 			CTLFLAG_RD, &adapter->stats.ptc511,
 			"256-511 byte frames transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
 			CTLFLAG_RD, &adapter->stats.ptc1023,
 			"512-1023 byte frames transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
 			CTLFLAG_RD, &adapter->stats.ptc1522,
 			"1024-1522 byte frames transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
 			CTLFLAG_RD, &adapter->stats.tsctc,
 			"TSO Contexts Transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
 			CTLFLAG_RD, &adapter->stats.tsctfc,
 			"TSO Contexts Failed");
 
 
 	/* Interrupt Stats */
 
 	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 
 				    CTLFLAG_RD, NULL, "Interrupt Statistics");
 	int_list = SYSCTL_CHILDREN(int_node);
 
 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
 			CTLFLAG_RD, &adapter->stats.iac,
 			"Interrupt Assertion Count");
 
 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
 			CTLFLAG_RD, &adapter->stats.icrxptc,
 			"Interrupt Cause Rx Pkt Timer Expire Count");
 
 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
 			CTLFLAG_RD, &adapter->stats.icrxatc,
 			"Interrupt Cause Rx Abs Timer Expire Count");
 
 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
 			CTLFLAG_RD, &adapter->stats.ictxptc,
 			"Interrupt Cause Tx Pkt Timer Expire Count");
 
 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
 			CTLFLAG_RD, &adapter->stats.ictxatc,
 			"Interrupt Cause Tx Abs Timer Expire Count");
 
 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
 			CTLFLAG_RD, &adapter->stats.ictxqec,
 			"Interrupt Cause Tx Queue Empty Count");
 
 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
 			CTLFLAG_RD, &adapter->stats.ictxqmtc,
 			"Interrupt Cause Tx Queue Min Thresh Count");
 
 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
 			CTLFLAG_RD, &adapter->stats.icrxdmtc,
 			"Interrupt Cause Rx Desc Min Thresh Count");
 
 	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
 			CTLFLAG_RD, &adapter->stats.icrxoc,
 			"Interrupt Cause Receiver Overrun Count");
 }
 
 /**********************************************************************
  *
  *  This routine provides a way to dump out the adapter eeprom,
  *  often a useful debug/service tool. This only dumps the first
  *  32 words, stuff that matters is in that extent.
  *
  **********************************************************************/
 static int
 em_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *adapter = (struct adapter *)arg1;
 	int error;
 	int result;
 
 	result = -1;
 	error = sysctl_handle_int(oidp, &result, 0, req);
 
 	if (error || !req->newptr)
 		return (error);
 
 	/*
 	 * This value will cause a hex dump of the
 	 * first 32 16-bit words of the EEPROM to
 	 * the screen.
 	 */
 	if (result == 1)
 		em_print_nvm_info(adapter);
 
 	return (error);
 }
 
 static void
 em_print_nvm_info(struct adapter *adapter)
 {
 	u16	eeprom_data;
 	int	i, j, row = 0;
 
 	/* Its a bit crude, but it gets the job done */
 	printf("\nInterface EEPROM Dump:\n");
 	printf("Offset\n0x0000  ");
 	for (i = 0, j = 0; i < 32; i++, j++) {
 		if (j == 8) { /* Make the offset block */
 			j = 0; ++row;
 			printf("\n0x00%x0  ",row);
 		}
 		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
 		printf("%04x ", eeprom_data);
 	}
 	printf("\n");
 }
 
 static int
 em_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
 {
 	struct em_int_delay_info *info;
 	struct adapter *adapter;
 	u32 regval;
 	int error, usecs, ticks;
 
 	info = (struct em_int_delay_info *)arg1;
 	usecs = info->value;
 	error = sysctl_handle_int(oidp, &usecs, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
 		return (EINVAL);
 	info->value = usecs;
 	ticks = EM_USECS_TO_TICKS(usecs);
 	if (info->offset == E1000_ITR)	/* units are 256ns here */
 		ticks *= 4;
 
 	adapter = info->adapter;
 	
 	EM_CORE_LOCK(adapter);
 	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
 	regval = (regval & ~0xffff) | (ticks & 0xffff);
 	/* Handle a few special cases. */
 	switch (info->offset) {
 	case E1000_RDTR:
 		break;
 	case E1000_TIDV:
 		if (ticks == 0) {
 			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
 			/* Don't write 0 into the TIDV register. */
 			regval++;
 		} else
 			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
 		break;
 	}
 	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
 	EM_CORE_UNLOCK(adapter);
 	return (0);
 }
 
 static void
 em_add_int_delay_sysctl(struct adapter *adapter, const char *name,
 	const char *description, struct em_int_delay_info *info,
 	int offset, int value)
 {
 	info->adapter = adapter;
 	info->offset = offset;
 	info->value = value;
 	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
 	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
 	    info, 0, em_sysctl_int_delay, "I", description);
 }
 
 static void
 em_set_sysctl_value(struct adapter *adapter, const char *name,
 	const char *description, int *limit, int value)
 {
 	*limit = value;
 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
 	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
 }
 
 
 /*
 ** Set flow control using sysctl:
 ** Flow control values:
 **      0 - off
 **      1 - rx pause
 **      2 - tx pause
 **      3 - full
 */
 static int
 em_set_flowcntl(SYSCTL_HANDLER_ARGS)
 {       
         int		error;
 	static int	input = 3; /* default is full */
         struct adapter	*adapter = (struct adapter *) arg1;
                     
         error = sysctl_handle_int(oidp, &input, 0, req);
     
         if ((error) || (req->newptr == NULL))
                 return (error);
                 
 	if (input == adapter->fc) /* no change? */
 		return (error);
 
         switch (input) {
                 case e1000_fc_rx_pause:
                 case e1000_fc_tx_pause:
                 case e1000_fc_full:
                 case e1000_fc_none:
                         adapter->hw.fc.requested_mode = input;
 			adapter->fc = input;
                         break;
                 default:
 			/* Do nothing */
 			return (error);
         }
 
         adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
         e1000_force_mac_fc(&adapter->hw);
         return (error);
 }
 
 /*
 ** Manage Energy Efficient Ethernet:
 ** Control values:
 **     0/1 - enabled/disabled
 */
 static int
 em_sysctl_eee(SYSCTL_HANDLER_ARGS)
 {
        struct adapter *adapter = (struct adapter *) arg1;
        int             error, value;
 
        value = adapter->hw.dev_spec.ich8lan.eee_disable;
        error = sysctl_handle_int(oidp, &value, 0, req);
        if (error || req->newptr == NULL)
                return (error);
        EM_CORE_LOCK(adapter);
        adapter->hw.dev_spec.ich8lan.eee_disable = (value != 0);
        em_init_locked(adapter);
        EM_CORE_UNLOCK(adapter);
        return (0);
 }
 
 static int
 em_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *adapter;
 	int error;
 	int result;
 
 	result = -1;
 	error = sysctl_handle_int(oidp, &result, 0, req);
 
 	if (error || !req->newptr)
 		return (error);
 
 	if (result == 1) {
 		adapter = (struct adapter *)arg1;
 		em_print_debug_info(adapter);
         }
 
 	return (error);
 }
 
 /*
 ** This routine is meant to be fluid, add whatever is
 ** needed for debugging a problem.  -jfv
 */
 static void
 em_print_debug_info(struct adapter *adapter)
 {
 	device_t dev = adapter->dev;
 	struct tx_ring *txr = adapter->tx_rings;
 	struct rx_ring *rxr = adapter->rx_rings;
 
 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING)
 		printf("Interface is RUNNING ");
 	else
 		printf("Interface is NOT RUNNING\n");
 
 	if (if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE)
 		printf("and INACTIVE\n");
 	else
 		printf("and ACTIVE\n");
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
 		device_printf(dev, "TX Queue %d ------\n", i);
 		device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
 	    		E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
 	    		E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
 		device_printf(dev, "Tx Queue Status = %d\n", txr->busy);
 		device_printf(dev, "TX descriptors avail = %d\n",
 	    		txr->tx_avail);
 		device_printf(dev, "Tx Descriptors avail failure = %ld\n",
 	    		txr->no_desc_avail);
 		device_printf(dev, "RX Queue %d ------\n", i);
 		device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
 	    		E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
 	    		E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
 		device_printf(dev, "RX discarded packets = %ld\n",
 	    		rxr->rx_discarded);
 		device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
 		device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
 	}
 }
 
 #ifdef EM_MULTIQUEUE
 /*
  * 82574 only:
  * Write a new value to the EEPROM increasing the number of MSIX
  * vectors from 3 to 5, for proper multiqueue support.
  */
 static void
 em_enable_vectors_82574(struct adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	device_t dev = adapter->dev;
 	u16 edata;
 
 	e1000_read_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
 	printf("Current cap: %#06x\n", edata);
 	if (((edata & EM_NVM_MSIX_N_MASK) >> EM_NVM_MSIX_N_SHIFT) != 4) {
 		device_printf(dev, "Writing to eeprom: increasing "
 		    "reported MSIX vectors from 3 to 5...\n");
 		edata &= ~(EM_NVM_MSIX_N_MASK);
 		edata |= 4 << EM_NVM_MSIX_N_SHIFT;
 		e1000_write_nvm(hw, EM_NVM_PCIE_CTRL, 1, &edata);
 		e1000_update_nvm_checksum(hw);
 		device_printf(dev, "Writing to eeprom: done\n");
 	}
 }
 #endif
 
 #ifdef DDB
 DB_COMMAND(em_reset_dev, em_ddb_reset_dev)
 {
 	devclass_t	dc;
 	int max_em;
 
 	dc = devclass_find("em");
 	max_em = devclass_get_maxunit(dc);
 
 	for (int index = 0; index < (max_em - 1); index++) {
 		device_t dev;
 		dev = devclass_get_device(dc, index);
 		if (device_get_driver(dev) == &em_driver) {
 			struct adapter *adapter = device_get_softc(dev);
 			EM_CORE_LOCK(adapter);
 			em_init_locked(adapter);
 			EM_CORE_UNLOCK(adapter);
 		}
 	}
 }
 DB_COMMAND(em_dump_queue, em_ddb_dump_queue)
 {
 	devclass_t	dc;
 	int max_em;
 
 	dc = devclass_find("em");
 	max_em = devclass_get_maxunit(dc);
 
 	for (int index = 0; index < (max_em - 1); index++) {
 		device_t dev;
 		dev = devclass_get_device(dc, index);
 		if (device_get_driver(dev) == &em_driver)
 			em_print_debug_info(device_get_softc(dev));
 	}
 
 }
 #endif
Index: projects/release-pkg/sys/dev/e1000/if_em.h
===================================================================
--- projects/release-pkg/sys/dev/e1000/if_em.h	(revision 293335)
+++ projects/release-pkg/sys/dev/e1000/if_em.h	(revision 293336)
@@ -1,544 +1,556 @@
 /******************************************************************************
 
   Copyright (c) 2001-2015, Intel Corporation 
   All rights reserved.
   
   Redistribution and use in source and binary forms, with or without 
   modification, are permitted provided that the following conditions are met:
   
    1. Redistributions of source code must retain the above copyright notice, 
       this list of conditions and the following disclaimer.
   
    2. Redistributions in binary form must reproduce the above copyright 
       notice, this list of conditions and the following disclaimer in the 
       documentation and/or other materials provided with the distribution.
   
    3. Neither the name of the Intel Corporation nor the names of its 
       contributors may be used to endorse or promote products derived from 
       this software without specific prior written permission.
   
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   POSSIBILITY OF SUCH DAMAGE.
 
 ******************************************************************************/
 /*$FreeBSD$*/
 
 
 #ifndef _EM_H_DEFINED_
 #define _EM_H_DEFINED_
 
 
 /* Tunables */
 
 /*
  * EM_TXD: Maximum number of Transmit Descriptors
  * Valid Range: 80-256 for 82542 and 82543-based adapters
  *              80-4096 for others
  * Default Value: 256
  *   This value is the number of transmit descriptors allocated by the driver.
  *   Increasing this value allows the driver to queue more transmits. Each
  *   descriptor is 16 bytes.
  *   Since TDLEN should be multiple of 128bytes, the number of transmit
  *   desscriptors should meet the following condition.
  *      (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0
  */
 #define EM_MIN_TXD		80
 #define EM_MAX_TXD		4096
 #ifdef EM_MULTIQUEUE
 #define EM_DEFAULT_TXD		4096
 #else
 #define EM_DEFAULT_TXD		1024
 #endif
 
 /*
  * EM_RXD - Maximum number of receive Descriptors
  * Valid Range: 80-256 for 82542 and 82543-based adapters
  *              80-4096 for others
  * Default Value: 256
  *   This value is the number of receive descriptors allocated by the driver.
  *   Increasing this value allows the driver to buffer more incoming packets.
  *   Each descriptor is 16 bytes.  A receive buffer is also allocated for each
  *   descriptor. The maximum MTU size is 16110.
  *   Since TDLEN should be multiple of 128bytes, the number of transmit
  *   desscriptors should meet the following condition.
  *      (num_tx_desc * sizeof(struct e1000_tx_desc)) % 128 == 0
  */
 #define EM_MIN_RXD		80
 #define EM_MAX_RXD		4096
 #ifdef EM_MULTIQUEUE
 #define EM_DEFAULT_RXD		4096
 #else
 #define EM_DEFAULT_RXD		1024
 #endif
 
 /*
  * EM_TIDV - Transmit Interrupt Delay Value
  * Valid Range: 0-65535 (0=off)
  * Default Value: 64
  *   This value delays the generation of transmit interrupts in units of
  *   1.024 microseconds. Transmit interrupt reduction can improve CPU
  *   efficiency if properly tuned for specific network traffic. If the
  *   system is reporting dropped transmits, this value may be set too high
  *   causing the driver to run out of available transmit descriptors.
  */
 #define EM_TIDV                         64
 
 /*
  * EM_TADV - Transmit Absolute Interrupt Delay Value
  * (Not valid for 82542/82543/82544)
  * Valid Range: 0-65535 (0=off)
  * Default Value: 64
  *   This value, in units of 1.024 microseconds, limits the delay in which a
  *   transmit interrupt is generated. Useful only if EM_TIDV is non-zero,
  *   this value ensures that an interrupt is generated after the initial
  *   packet is sent on the wire within the set amount of time.  Proper tuning,
  *   along with EM_TIDV, may improve traffic throughput in specific
  *   network conditions.
  */
 #define EM_TADV                         64
 
 /*
  * EM_RDTR - Receive Interrupt Delay Timer (Packet Timer)
  * Valid Range: 0-65535 (0=off)
  * Default Value: 0
  *   This value delays the generation of receive interrupts in units of 1.024
  *   microseconds.  Receive interrupt reduction can improve CPU efficiency if
  *   properly tuned for specific network traffic. Increasing this value adds
  *   extra latency to frame reception and can end up decreasing the throughput
  *   of TCP traffic. If the system is reporting dropped receives, this value
  *   may be set too high, causing the driver to run out of available receive
  *   descriptors.
  *
  *   CAUTION: When setting EM_RDTR to a value other than 0, adapters
  *            may hang (stop transmitting) under certain network conditions.
  *            If this occurs a WATCHDOG message is logged in the system
  *            event log. In addition, the controller is automatically reset,
  *            restoring the network connection. To eliminate the potential
  *            for the hang ensure that EM_RDTR is set to 0.
  */
 #ifdef EM_MULTIQUEUE
 #define EM_RDTR                         64
 #else
 #define EM_RDTR                         0
 #endif
 
 /*
  * Receive Interrupt Absolute Delay Timer (Not valid for 82542/82543/82544)
  * Valid Range: 0-65535 (0=off)
  * Default Value: 64
  *   This value, in units of 1.024 microseconds, limits the delay in which a
  *   receive interrupt is generated. Useful only if EM_RDTR is non-zero,
  *   this value ensures that an interrupt is generated after the initial
  *   packet is received within the set amount of time.  Proper tuning,
  *   along with EM_RDTR, may improve traffic throughput in specific network
  *   conditions.
  */
 #ifdef EM_MULTIQUEUE
 #define EM_RADV                         128
 #else
 #define EM_RADV                         64
 #endif
 
 /*
  * This parameter controls the max duration of transmit watchdog.
  */
 #define EM_WATCHDOG                   (10 * hz)
 
 /*
  * This parameter controls when the driver calls the routine to reclaim
  * transmit descriptors.
  */
 #define EM_TX_CLEANUP_THRESHOLD	(adapter->num_tx_desc / 8)
 
 /*
  * This parameter controls whether or not autonegotation is enabled.
  *              0 - Disable autonegotiation
  *              1 - Enable  autonegotiation
  */
 #define DO_AUTO_NEG                     1
 
 /*
  * This parameter control whether or not the driver will wait for
  * autonegotiation to complete.
  *              1 - Wait for autonegotiation to complete
  *              0 - Don't wait for autonegotiation to complete
  */
 #define WAIT_FOR_AUTO_NEG_DEFAULT       0
 
 /* Tunables -- End */
 
 #define AUTONEG_ADV_DEFAULT	(ADVERTISE_10_HALF | ADVERTISE_10_FULL | \
 				ADVERTISE_100_HALF | ADVERTISE_100_FULL | \
 				ADVERTISE_1000_FULL)
 
 #define AUTO_ALL_MODES		0
 
 /* PHY master/slave setting */
 #define EM_MASTER_SLAVE		e1000_ms_hw_default
 
 /*
  * Micellaneous constants
  */
 #define EM_VENDOR_ID                    0x8086
 #define EM_FLASH                        0x0014 
 
 #define EM_JUMBO_PBA                    0x00000028
 #define EM_DEFAULT_PBA                  0x00000030
 #define EM_SMARTSPEED_DOWNSHIFT         3
 #define EM_SMARTSPEED_MAX               15
 #define EM_MAX_LOOP			10
 
 #define MAX_NUM_MULTICAST_ADDRESSES     128
 #define PCI_ANY_ID                      (~0U)
 #define ETHER_ALIGN                     2
 #define EM_FC_PAUSE_TIME		0x0680
 #define EM_EEPROM_APME			0x400;
 #define EM_82544_APME			0x0004;
 
 /*
  * Driver state logic for the detection of a hung state
  * in hardware.  Set TX_HUNG whenever a TX packet is used
  * (data is sent) and clear it when txeof() is invoked if
  * any descriptors from the ring are cleaned/reclaimed.
  * Increment internal counter if no descriptors are cleaned
  * and compare to TX_MAXTRIES.  When counter > TX_MAXTRIES,
  * reset adapter.
  */
 #define EM_TX_IDLE			0x00000000
 #define EM_TX_BUSY			0x00000001
 #define EM_TX_HUNG			0x80000000
 #define EM_TX_MAXTRIES			10
 
 /*
  * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be
  * multiple of 128 bytes. So we align TDBA/RDBA on 128 byte boundary. This will
  * also optimize cache line size effect. H/W supports up to cache line size 128.
  */
 #define EM_DBA_ALIGN			128
 
 /*
  * See Intel 82574 Driver Programming Interface Manual, Section 10.2.6.9
  */
 #define TARC_COMPENSATION_MODE	(1 << 7)	/* Compensation Mode */
 #define TARC_SPEED_MODE_BIT 	(1 << 21)	/* On PCI-E MACs only */
 #define TARC_MQ_FIX		(1 << 23) | \
 				(1 << 24) | \
 				(1 << 25)	/* Handle errata in MQ mode */
 #define TARC_ERRATA_BIT 	(1 << 26)	/* Note from errata on 82574 */
 
 /* PCI Config defines */
 #define EM_BAR_TYPE(v)		((v) & EM_BAR_TYPE_MASK)
 #define EM_BAR_TYPE_MASK	0x00000001
 #define EM_BAR_TYPE_MMEM	0x00000000
 #define EM_BAR_TYPE_FLASH	0x0014 
 #define EM_BAR_MEM_TYPE(v)	((v) & EM_BAR_MEM_TYPE_MASK)
 #define EM_BAR_MEM_TYPE_MASK	0x00000006
 #define EM_BAR_MEM_TYPE_32BIT	0x00000000
 #define EM_BAR_MEM_TYPE_64BIT	0x00000004
 #define EM_MSIX_BAR		3	/* On 82575 */
 
 /* More backward compatibility */
 #if __FreeBSD_version < 900000
 #define SYSCTL_ADD_UQUAD SYSCTL_ADD_QUAD
 #endif
 
 /* Defines for printing debug information */
 #define DEBUG_INIT  0
 #define DEBUG_IOCTL 0
 #define DEBUG_HW    0
 
 #define INIT_DEBUGOUT(S)            if (DEBUG_INIT)  printf(S "\n")
 #define INIT_DEBUGOUT1(S, A)        if (DEBUG_INIT)  printf(S "\n", A)
 #define INIT_DEBUGOUT2(S, A, B)     if (DEBUG_INIT)  printf(S "\n", A, B)
 #define IOCTL_DEBUGOUT(S)           if (DEBUG_IOCTL) printf(S "\n")
 #define IOCTL_DEBUGOUT1(S, A)       if (DEBUG_IOCTL) printf(S "\n", A)
 #define IOCTL_DEBUGOUT2(S, A, B)    if (DEBUG_IOCTL) printf(S "\n", A, B)
 #define HW_DEBUGOUT(S)              if (DEBUG_HW) printf(S "\n")
 #define HW_DEBUGOUT1(S, A)          if (DEBUG_HW) printf(S "\n", A)
 #define HW_DEBUGOUT2(S, A, B)       if (DEBUG_HW) printf(S "\n", A, B)
 
 #define EM_MAX_SCATTER		64
 #define EM_VFTA_SIZE		128
 #define EM_TSO_SIZE		(65535 + sizeof(struct ether_vlan_header))
 #define EM_TSO_SEG_SIZE		4096	/* Max dma segment size */
 #define EM_MSIX_MASK		0x01F00000 /* For 82574 use */
 #define EM_MSIX_LINK		0x01000000 /* For 82574 use */
 #define ETH_ZLEN		60
 #define ETH_ADDR_LEN		6
 #define CSUM_OFFLOAD		7	/* Offload bits in mbuf flag */
 
 /*
  * 82574 has a nonstandard address for EIAC
  * and since its only used in MSIX, and in
  * the em driver only 82574 uses MSIX we can
  * solve it just using this define.
  */
 #define EM_EIAC 0x000DC
 /*
  * 82574 only reports 3 MSI-X vectors by default;
  * defines assisting with making it report 5 are
  * located here.
  */
 #define EM_NVM_PCIE_CTRL	0x1B
 #define EM_NVM_MSIX_N_MASK	(0x7 << EM_NVM_MSIX_N_SHIFT)
 #define EM_NVM_MSIX_N_SHIFT	7
 
 /*
  * Bus dma allocation structure used by
  * e1000_dma_malloc and e1000_dma_free.
  */
 struct em_dma_alloc {
         bus_addr_t              dma_paddr;
         caddr_t                 dma_vaddr;
         bus_dma_tag_t           dma_tag;
         bus_dmamap_t            dma_map;
         bus_dma_segment_t       dma_seg;
         int                     dma_nseg;
 };
 
 struct adapter;
 
 struct em_int_delay_info {
 	struct adapter *adapter;	/* Back-pointer to the adapter struct */
 	int offset;			/* Register offset to read/write */
 	int value;			/* Current value in usecs */
 };
 
 /*
  * The transmit ring, one per tx queue
  */
 struct tx_ring {
         struct adapter          *adapter;
         struct mtx              tx_mtx;
         char                    mtx_name[16];
         u32                     me;
         u32                     msix;
 	u32			ims;
         int			busy;
 	struct em_dma_alloc	txdma;
 	struct e1000_tx_desc	*tx_base;
         struct task             tx_task;
         struct taskqueue        *tq;
         u32                     next_avail_desc;
         u32                     next_to_clean;
-        struct em_buffer	*tx_buffers;
+        struct em_txbuffer	*tx_buffers;
         volatile u16            tx_avail;
 	u32			tx_tso;		/* last tx was tso */
         u16			last_hw_offload;
 	u8			last_hw_ipcso;
 	u8			last_hw_ipcss;
 	u8			last_hw_tucso;
 	u8			last_hw_tucss;
 #if __FreeBSD_version >= 800000
 	struct buf_ring         *br;
 #endif
 	/* Interrupt resources */
         bus_dma_tag_t           txtag;
 	void                    *tag;
 	struct resource         *res;
         unsigned long		tx_irq;
         unsigned long		no_desc_avail;
 };
 
 /*
  * The Receive ring, one per rx queue
  */
 struct rx_ring {
         struct adapter          *adapter;
         u32                     me;
         u32                     msix;
 	u32			ims;
         struct mtx              rx_mtx;
         char                    mtx_name[16];
         u32                     payload;
         struct task             rx_task;
         struct taskqueue        *tq;
-        struct e1000_rx_desc	*rx_base;
+        union e1000_rx_desc_extended	*rx_base;
         struct em_dma_alloc	rxdma;
         u32			next_to_refresh;
         u32			next_to_check;
-        struct em_buffer	*rx_buffers;
+        struct em_rxbuffer	*rx_buffers;
 	struct mbuf		*fmp;
 	struct mbuf		*lmp;
 
         /* Interrupt resources */
         void                    *tag;
         struct resource         *res;
         bus_dma_tag_t           rxtag;
 	bool			discard;
 
         /* Soft stats */
         unsigned long		rx_irq;
         unsigned long		rx_discarded;
         unsigned long		rx_packets;
         unsigned long		rx_bytes;
 };
 
 
 /* Our adapter structure */
 struct adapter {
 	if_t 		ifp;
 	struct e1000_hw	hw;
 
 	/* FreeBSD operating-system-specific structures. */
 	struct e1000_osdep osdep;
 	struct device	*dev;
 	struct cdev	*led_dev;
 
 	struct resource *memory;
 	struct resource *flash;
 	struct resource *msix_mem;
 
 	struct resource	*res;
 	void		*tag;
 	u32		linkvec;
 	u32		ivars;
 
 	struct ifmedia	media;
 	struct callout	timer;
 	int		msix;
 	int		if_flags;
 	int		max_frame_size;
 	int		min_frame_size;
 	struct mtx	core_mtx;
 	int		em_insert_vlan_header;
 	u32		ims;
 	bool		in_detach;
 
 	/* Task for FAST handling */
 	struct task     link_task;
 	struct task     que_task;
 	struct taskqueue *tq;           /* private task queue */
 
 	eventhandler_tag vlan_attach;
 	eventhandler_tag vlan_detach;
 
 	u16	num_vlans;
 	u8	num_queues;
 
         /*
          * Transmit rings:
          *      Allocated at run time, an array of rings.
          */
         struct tx_ring  *tx_rings;
         int             num_tx_desc;
         u32		txd_cmd;
 
         /*
          * Receive rings:
          *      Allocated at run time, an array of rings.
          */
         struct rx_ring  *rx_rings;
         int             num_rx_desc;
         u32             rx_process_limit;
 	u32		rx_mbuf_sz;
 
 	/* Management and WOL features */
 	u32		wol;
 	bool		has_manage;
 	bool		has_amt;
 
 	/* Multicast array memory */
 	u8		*mta;
 
 	/*
 	** Shadow VFTA table, this is needed because
 	** the real vlan filter table gets cleared during
 	** a soft reset and the driver needs to be able
 	** to repopulate it.
 	*/
 	u32		shadow_vfta[EM_VFTA_SIZE];
 
 	/* Info about the interface */
 	u16		link_active;
 	u16		fc;
 	u16		link_speed;
 	u16		link_duplex;
 	u32		smartspeed;
 
 	struct em_int_delay_info tx_int_delay;
 	struct em_int_delay_info tx_abs_int_delay;
 	struct em_int_delay_info rx_int_delay;
 	struct em_int_delay_info rx_abs_int_delay;
 	struct em_int_delay_info tx_itr;
 
 	/* Misc stats maintained by the driver */
 	unsigned long	dropped_pkts;
 	unsigned long	mbuf_alloc_failed;
 	unsigned long	mbuf_cluster_failed;
 	unsigned long	no_tx_map_avail;
         unsigned long	no_tx_dma_setup;
 	unsigned long	rx_overruns;
 	unsigned long	watchdog_events;
 	unsigned long	link_irq;
 
 	struct e1000_hw_stats stats;
 };
 
 /********************************************************************************
  * vendor_info_array
  *
  * This array contains the list of Subvendor/Subdevice IDs on which the driver
  * should load.
  *
  ********************************************************************************/
 typedef struct _em_vendor_info_t {
 	unsigned int vendor_id;
 	unsigned int device_id;
 	unsigned int subvendor_id;
 	unsigned int subdevice_id;
 	unsigned int index;
 } em_vendor_info_t;
 
-struct em_buffer {
+struct em_txbuffer {
 	int		next_eop;  /* Index of the desc to watch */
         struct mbuf    *m_head;
         bus_dmamap_t    map;         /* bus_dma map for packet */
 };
 
+struct em_rxbuffer {
+	int		next_eop;  /* Index of the desc to watch */
+        struct mbuf    *m_head;
+        bus_dmamap_t    map;         /* bus_dma map for packet */
+	bus_addr_t	paddr;
+};
 
+
 /*
 ** Find the number of unrefreshed RX descriptors
 */
 static inline u16
 e1000_rx_unrefreshed(struct rx_ring *rxr)
 {
 	struct adapter	*adapter = rxr->adapter;
 
 	if (rxr->next_to_check > rxr->next_to_refresh)
 		return (rxr->next_to_check - rxr->next_to_refresh - 1);
 	else
 		return ((adapter->num_rx_desc + rxr->next_to_check) -
 		    rxr->next_to_refresh - 1); 
 }
 
 #define	EM_CORE_LOCK_INIT(_sc, _name) \
 	mtx_init(&(_sc)->core_mtx, _name, "EM Core Lock", MTX_DEF)
 #define	EM_TX_LOCK_INIT(_sc, _name) \
 	mtx_init(&(_sc)->tx_mtx, _name, "EM TX Lock", MTX_DEF)
 #define	EM_RX_LOCK_INIT(_sc, _name) \
 	mtx_init(&(_sc)->rx_mtx, _name, "EM RX Lock", MTX_DEF)
 #define	EM_CORE_LOCK_DESTROY(_sc)	mtx_destroy(&(_sc)->core_mtx)
 #define	EM_TX_LOCK_DESTROY(_sc)		mtx_destroy(&(_sc)->tx_mtx)
 #define	EM_RX_LOCK_DESTROY(_sc)		mtx_destroy(&(_sc)->rx_mtx)
 #define	EM_CORE_LOCK(_sc)		mtx_lock(&(_sc)->core_mtx)
 #define	EM_TX_LOCK(_sc)			mtx_lock(&(_sc)->tx_mtx)
 #define	EM_TX_TRYLOCK(_sc)		mtx_trylock(&(_sc)->tx_mtx)
 #define	EM_RX_LOCK(_sc)			mtx_lock(&(_sc)->rx_mtx)
 #define	EM_CORE_UNLOCK(_sc)		mtx_unlock(&(_sc)->core_mtx)
 #define	EM_TX_UNLOCK(_sc)		mtx_unlock(&(_sc)->tx_mtx)
 #define	EM_RX_UNLOCK(_sc)		mtx_unlock(&(_sc)->rx_mtx)
 #define	EM_CORE_LOCK_ASSERT(_sc)	mtx_assert(&(_sc)->core_mtx, MA_OWNED)
 #define	EM_TX_LOCK_ASSERT(_sc)		mtx_assert(&(_sc)->tx_mtx, MA_OWNED)
 #define	EM_RX_LOCK_ASSERT(_sc)		mtx_assert(&(_sc)->rx_mtx, MA_OWNED)
 
+#define EM_RSSRK_SIZE	4
+#define EM_RSSRK_VAL(key, i)		(key[(i) * EM_RSSRK_SIZE] | \
+					 key[(i) * EM_RSSRK_SIZE + 1] << 8 | \
+					 key[(i) * EM_RSSRK_SIZE + 2] << 16 | \
+					 key[(i) * EM_RSSRK_SIZE + 3] << 24)
 #endif /* _EM_H_DEFINED_ */
Index: projects/release-pkg/sys/dev/ixgbe/if_ix.c
===================================================================
--- projects/release-pkg/sys/dev/ixgbe/if_ix.c	(revision 293335)
+++ projects/release-pkg/sys/dev/ixgbe/if_ix.c	(revision 293336)
@@ -1,5902 +1,5942 @@
 /******************************************************************************
 
   Copyright (c) 2001-2015, Intel Corporation 
   All rights reserved.
   
   Redistribution and use in source and binary forms, with or without 
   modification, are permitted provided that the following conditions are met:
   
    1. Redistributions of source code must retain the above copyright notice, 
       this list of conditions and the following disclaimer.
   
    2. Redistributions in binary form must reproduce the above copyright 
       notice, this list of conditions and the following disclaimer in the 
       documentation and/or other materials provided with the distribution.
   
    3. Neither the name of the Intel Corporation nor the names of its 
       contributors may be used to endorse or promote products derived from 
       this software without specific prior written permission.
   
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   POSSIBILITY OF SUCH DAMAGE.
 
 ******************************************************************************/
 /*$FreeBSD$*/
 
 
 #ifndef IXGBE_STANDALONE_BUILD
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_rss.h"
 #endif
 
 #include "ixgbe.h"
 
 #ifdef	RSS
 #include <net/rss_config.h>
 #include <netinet/in_rss.h>
 #endif
 
 /*********************************************************************
  *  Driver version
  *********************************************************************/
 char ixgbe_driver_version[] = "3.1.13-k";
 
 
 /*********************************************************************
  *  PCI Device ID Table
  *
  *  Used by probe to select devices to load on
  *  Last field stores an index into ixgbe_strings
  *  Last entry must be all 0s
  *
  *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
  *********************************************************************/
 
 static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
 {
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_QSFP_SF_QP, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T1, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550T, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550T1, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_KR, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_KX4, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_10G_T, 0, 0, 0},
 	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_SFP, 0, 0, 0},
 	/* required last entry */
 	{0, 0, 0, 0, 0}
 };
 
 /*********************************************************************
  *  Table of branding strings
  *********************************************************************/
 
 static char    *ixgbe_strings[] = {
 	"Intel(R) PRO/10GbE PCI-Express Network Driver"
 };
 
 /*********************************************************************
  *  Function prototypes
  *********************************************************************/
 static int      ixgbe_probe(device_t);
 static int      ixgbe_attach(device_t);
 static int      ixgbe_detach(device_t);
 static int      ixgbe_shutdown(device_t);
 static int	ixgbe_suspend(device_t);
 static int	ixgbe_resume(device_t);
 static int      ixgbe_ioctl(struct ifnet *, u_long, caddr_t);
 static void	ixgbe_init(void *);
 static void	ixgbe_init_locked(struct adapter *);
 static void     ixgbe_stop(void *);
 #if __FreeBSD_version >= 1100036
 static uint64_t	ixgbe_get_counter(struct ifnet *, ift_counter);
 #endif
 static void	ixgbe_add_media_types(struct adapter *);
 static void     ixgbe_media_status(struct ifnet *, struct ifmediareq *);
 static int      ixgbe_media_change(struct ifnet *);
 static void     ixgbe_identify_hardware(struct adapter *);
 static int      ixgbe_allocate_pci_resources(struct adapter *);
 static void	ixgbe_get_slot_info(struct adapter *);
 static int      ixgbe_allocate_msix(struct adapter *);
 static int      ixgbe_allocate_legacy(struct adapter *);
 static int	ixgbe_setup_msix(struct adapter *);
 static void	ixgbe_free_pci_resources(struct adapter *);
 static void	ixgbe_local_timer(void *);
 static int	ixgbe_setup_interface(device_t, struct adapter *);
 static void	ixgbe_config_gpie(struct adapter *);
 static void	ixgbe_config_dmac(struct adapter *);
 static void	ixgbe_config_delay_values(struct adapter *);
 static void	ixgbe_config_link(struct adapter *);
 static void	ixgbe_check_wol_support(struct adapter *);
 static int	ixgbe_setup_low_power_mode(struct adapter *);
 static void	ixgbe_rearm_queues(struct adapter *, u64);
 
 static void     ixgbe_initialize_transmit_units(struct adapter *);
 static void     ixgbe_initialize_receive_units(struct adapter *);
 static void	ixgbe_enable_rx_drop(struct adapter *);
 static void	ixgbe_disable_rx_drop(struct adapter *);
 static void	ixgbe_initialize_rss_mapping(struct adapter *);
 
 static void     ixgbe_enable_intr(struct adapter *);
 static void     ixgbe_disable_intr(struct adapter *);
 static void     ixgbe_update_stats_counters(struct adapter *);
 static void     ixgbe_set_promisc(struct adapter *);
 static void     ixgbe_set_multi(struct adapter *);
 static void     ixgbe_update_link_status(struct adapter *);
 static void	ixgbe_set_ivar(struct adapter *, u8, u8, s8);
 static void	ixgbe_configure_ivars(struct adapter *);
 static u8 *	ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
 
 static void	ixgbe_setup_vlan_hw_support(struct adapter *);
 static void	ixgbe_register_vlan(void *, struct ifnet *, u16);
 static void	ixgbe_unregister_vlan(void *, struct ifnet *, u16);
 
 static void	ixgbe_add_device_sysctls(struct adapter *);
 static void     ixgbe_add_hw_stats(struct adapter *);
 
 /* Sysctl handlers */
 static void	ixgbe_set_sysctl_value(struct adapter *, const char *,
 		     const char *, int *, int);
 static int	ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS);
 static int	ixgbe_set_advertise(SYSCTL_HANDLER_ARGS);
 static int	ixgbe_sysctl_thermal_test(SYSCTL_HANDLER_ARGS);
 static int	ixgbe_sysctl_dmac(SYSCTL_HANDLER_ARGS);
 static int	ixgbe_sysctl_phy_temp(SYSCTL_HANDLER_ARGS);
 static int	ixgbe_sysctl_phy_overtemp_occurred(SYSCTL_HANDLER_ARGS);
 #ifdef IXGBE_DEBUG
 static int	ixgbe_sysctl_power_state(SYSCTL_HANDLER_ARGS);
 static int	ixgbe_sysctl_print_rss_config(SYSCTL_HANDLER_ARGS);
 #endif
 static int	ixgbe_sysctl_wol_enable(SYSCTL_HANDLER_ARGS);
 static int	ixgbe_sysctl_wufc(SYSCTL_HANDLER_ARGS);
 static int	ixgbe_sysctl_eee_enable(SYSCTL_HANDLER_ARGS);
 static int	ixgbe_sysctl_eee_negotiated(SYSCTL_HANDLER_ARGS);
 static int	ixgbe_sysctl_eee_rx_lpi_status(SYSCTL_HANDLER_ARGS);
 static int	ixgbe_sysctl_eee_tx_lpi_status(SYSCTL_HANDLER_ARGS);
 static int	ixgbe_sysctl_eee_tx_lpi_delay(SYSCTL_HANDLER_ARGS);
 
 /* Support for pluggable optic modules */
 static bool	ixgbe_sfp_probe(struct adapter *);
 static void	ixgbe_setup_optics(struct adapter *);
 
 /* Legacy (single vector interrupt handler */
 static void	ixgbe_legacy_irq(void *);
 
 /* The MSI/X Interrupt handlers */
 static void	ixgbe_msix_que(void *);
 static void	ixgbe_msix_link(void *);
 
 /* Deferred interrupt tasklets */
 static void	ixgbe_handle_que(void *, int);
 static void	ixgbe_handle_link(void *, int);
 static void	ixgbe_handle_msf(void *, int);
 static void	ixgbe_handle_mod(void *, int);
 static void	ixgbe_handle_phy(void *, int);
 
 #ifdef IXGBE_FDIR
 static void	ixgbe_reinit_fdir(void *, int);
 #endif
 
 #ifdef PCI_IOV
 static void	ixgbe_ping_all_vfs(struct adapter *);
 static void	ixgbe_handle_mbx(void *, int);
 static int	ixgbe_init_iov(device_t, u16, const nvlist_t *);
 static void	ixgbe_uninit_iov(device_t);
 static int	ixgbe_add_vf(device_t, u16, const nvlist_t *);
 static void	ixgbe_initialize_iov(struct adapter *);
 static void	ixgbe_recalculate_max_frame(struct adapter *);
 static void	ixgbe_init_vf(struct adapter *, struct ixgbe_vf *);
 #endif /* PCI_IOV */
 
 
 /*********************************************************************
  *  FreeBSD Device Interface Entry Points
  *********************************************************************/
 
 static device_method_t ix_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe, ixgbe_probe),
 	DEVMETHOD(device_attach, ixgbe_attach),
 	DEVMETHOD(device_detach, ixgbe_detach),
 	DEVMETHOD(device_shutdown, ixgbe_shutdown),
 	DEVMETHOD(device_suspend, ixgbe_suspend),
 	DEVMETHOD(device_resume, ixgbe_resume),
 #ifdef PCI_IOV
 	DEVMETHOD(pci_iov_init, ixgbe_init_iov),
 	DEVMETHOD(pci_iov_uninit, ixgbe_uninit_iov),
 	DEVMETHOD(pci_iov_add_vf, ixgbe_add_vf),
 #endif /* PCI_IOV */
 	DEVMETHOD_END
 };
 
 static driver_t ix_driver = {
 	"ix", ix_methods, sizeof(struct adapter),
 };
 
 devclass_t ix_devclass;
 DRIVER_MODULE(ix, pci, ix_driver, ix_devclass, 0, 0);
 
 MODULE_DEPEND(ix, pci, 1, 1, 1);
 MODULE_DEPEND(ix, ether, 1, 1, 1);
 #ifdef DEV_NETMAP
 MODULE_DEPEND(ix, netmap, 1, 1, 1);
 #endif /* DEV_NETMAP */
 
 /*
 ** TUNEABLE PARAMETERS:
 */
 
 static SYSCTL_NODE(_hw, OID_AUTO, ix, CTLFLAG_RD, 0,
 		   "IXGBE driver parameters");
 
 /*
 ** AIM: Adaptive Interrupt Moderation
 ** which means that the interrupt rate
 ** is varied over time based on the
 ** traffic for that interrupt vector
 */
 static int ixgbe_enable_aim = TRUE;
 SYSCTL_INT(_hw_ix, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &ixgbe_enable_aim, 0,
     "Enable adaptive interrupt moderation");
 
 static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
 SYSCTL_INT(_hw_ix, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
     &ixgbe_max_interrupt_rate, 0, "Maximum interrupts per second");
 
 /* How many packets rxeof tries to clean at a time */
 static int ixgbe_rx_process_limit = 256;
 SYSCTL_INT(_hw_ix, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
     &ixgbe_rx_process_limit, 0,
     "Maximum number of received packets to process at a time,"
     "-1 means unlimited");
 
 /* How many packets txeof tries to clean at a time */
 static int ixgbe_tx_process_limit = 256;
 SYSCTL_INT(_hw_ix, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN,
     &ixgbe_tx_process_limit, 0,
     "Maximum number of sent packets to process at a time,"
     "-1 means unlimited");
 
 /*
 ** Smart speed setting, default to on
 ** this only works as a compile option
 ** right now as its during attach, set
 ** this to 'ixgbe_smart_speed_off' to
 ** disable.
 */
 static int ixgbe_smart_speed = ixgbe_smart_speed_on;
 
 /*
  * MSIX should be the default for best performance,
  * but this allows it to be forced off for testing.
  */
 static int ixgbe_enable_msix = 1;
 SYSCTL_INT(_hw_ix, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &ixgbe_enable_msix, 0,
     "Enable MSI-X interrupts");
 
 /*
  * Number of Queues, can be set to 0,
  * it then autoconfigures based on the
  * number of cpus with a max of 8. This
  * can be overriden manually here.
  */
 static int ixgbe_num_queues = 0;
 SYSCTL_INT(_hw_ix, OID_AUTO, num_queues, CTLFLAG_RDTUN, &ixgbe_num_queues, 0,
     "Number of queues to configure, 0 indicates autoconfigure");
 
 /*
 ** Number of TX descriptors per ring,
 ** setting higher than RX as this seems
 ** the better performing choice.
 */
 static int ixgbe_txd = PERFORM_TXD;
 SYSCTL_INT(_hw_ix, OID_AUTO, txd, CTLFLAG_RDTUN, &ixgbe_txd, 0,
     "Number of transmit descriptors per queue");
 
 /* Number of RX descriptors per ring */
 static int ixgbe_rxd = PERFORM_RXD;
 SYSCTL_INT(_hw_ix, OID_AUTO, rxd, CTLFLAG_RDTUN, &ixgbe_rxd, 0,
     "Number of receive descriptors per queue");
 
 /*
 ** Defining this on will allow the use
 ** of unsupported SFP+ modules, note that
 ** doing so you are on your own :)
 */
 static int allow_unsupported_sfp = FALSE;
 TUNABLE_INT("hw.ix.unsupported_sfp", &allow_unsupported_sfp);
 
 /* Keep running tab on them for sanity check */
 static int ixgbe_total_ports;
 
 #ifdef IXGBE_FDIR
 /* 
 ** Flow Director actually 'steals'
 ** part of the packet buffer as its
 ** filter pool, this variable controls
 ** how much it uses:
 **  0 = 64K, 1 = 128K, 2 = 256K
 */
 static int fdir_pballoc = 1;
 #endif
 
 #ifdef DEV_NETMAP
 /*
  * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
  * be a reference on how to implement netmap support in a driver.
  * Additional comments are in ixgbe_netmap.h .
  *
  * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
  * that extend the standard driver.
  */
 #include <dev/netmap/ixgbe_netmap.h>
 #endif /* DEV_NETMAP */
 
 static MALLOC_DEFINE(M_IXGBE, "ix", "ix driver allocations");
 
 /*********************************************************************
  *  Device identification routine
  *
  *  ixgbe_probe determines if the driver should be loaded on
  *  adapter based on PCI vendor/device id of the adapter.
  *
  *  return BUS_PROBE_DEFAULT on success, positive on failure
  *********************************************************************/
 
 static int
 ixgbe_probe(device_t dev)
 {
 	ixgbe_vendor_info_t *ent;
 
 	u16	pci_vendor_id = 0;
 	u16	pci_device_id = 0;
 	u16	pci_subvendor_id = 0;
 	u16	pci_subdevice_id = 0;
 	char	adapter_name[256];
 
 	INIT_DEBUGOUT("ixgbe_probe: begin");
 
 	pci_vendor_id = pci_get_vendor(dev);
 	if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID)
 		return (ENXIO);
 
 	pci_device_id = pci_get_device(dev);
 	pci_subvendor_id = pci_get_subvendor(dev);
 	pci_subdevice_id = pci_get_subdevice(dev);
 
 	ent = ixgbe_vendor_info_array;
 	while (ent->vendor_id != 0) {
 		if ((pci_vendor_id == ent->vendor_id) &&
 		    (pci_device_id == ent->device_id) &&
 
 		    ((pci_subvendor_id == ent->subvendor_id) ||
 		     (ent->subvendor_id == 0)) &&
 
 		    ((pci_subdevice_id == ent->subdevice_id) ||
 		     (ent->subdevice_id == 0))) {
 			sprintf(adapter_name, "%s, Version - %s",
 				ixgbe_strings[ent->index],
 				ixgbe_driver_version);
 			device_set_desc_copy(dev, adapter_name);
 			++ixgbe_total_ports;
 			return (BUS_PROBE_DEFAULT);
 		}
 		ent++;
 	}
 	return (ENXIO);
 }
 
 /*********************************************************************
  *  Device initialization routine
  *
  *  The attach entry point is called when the driver is being loaded.
  *  This routine identifies the type of hardware, allocates all resources
  *  and initializes the hardware.
  *
  *  return 0 on success, positive on failure
  *********************************************************************/
 
 static int
 ixgbe_attach(device_t dev)
 {
 	struct adapter *adapter;
 	struct ixgbe_hw *hw;
 	int             error = 0;
 	u16		csum;
 	u32		ctrl_ext;
 
 	INIT_DEBUGOUT("ixgbe_attach: begin");
 
 	/* Allocate, clear, and link in our adapter structure */
 	adapter = device_get_softc(dev);
 	adapter->dev = dev;
 	hw = &adapter->hw;
 
 #ifdef DEV_NETMAP
 	adapter->init_locked = ixgbe_init_locked;
 	adapter->stop_locked = ixgbe_stop;
 #endif
 
 	/* Core Lock Init*/
 	IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
 
 	/* Set up the timer callout */
 	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
 
 	/* Determine hardware revision */
 	ixgbe_identify_hardware(adapter);
 
 	/* Do base PCI setup - map BAR0 */
 	if (ixgbe_allocate_pci_resources(adapter)) {
 		device_printf(dev, "Allocation of PCI resources failed\n");
 		error = ENXIO;
 		goto err_out;
 	}
 
 	/* Sysctls for limiting the amount of work done in the taskqueues */
 	ixgbe_set_sysctl_value(adapter, "rx_processing_limit",
 	    "max number of rx packets to process",
 	    &adapter->rx_process_limit, ixgbe_rx_process_limit);
 
 	ixgbe_set_sysctl_value(adapter, "tx_processing_limit",
 	    "max number of tx packets to process",
 	&adapter->tx_process_limit, ixgbe_tx_process_limit);
 
 	/* Do descriptor calc and sanity checks */
 	if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
 	    ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
 		device_printf(dev, "TXD config issue, using default!\n");
 		adapter->num_tx_desc = DEFAULT_TXD;
 	} else
 		adapter->num_tx_desc = ixgbe_txd;
 
 	/*
 	** With many RX rings it is easy to exceed the
 	** system mbuf allocation. Tuning nmbclusters
 	** can alleviate this.
 	*/
 	if (nmbclusters > 0) {
 		int s;
 		s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
 		if (s > nmbclusters) {
 			device_printf(dev, "RX Descriptors exceed "
 			    "system mbuf max, using default instead!\n");
 			ixgbe_rxd = DEFAULT_RXD;
 		}
 	}
 
 	if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
 	    ixgbe_rxd < MIN_RXD || ixgbe_rxd > MAX_RXD) {
 		device_printf(dev, "RXD config issue, using default!\n");
 		adapter->num_rx_desc = DEFAULT_RXD;
 	} else
 		adapter->num_rx_desc = ixgbe_rxd;
 
 	/* Allocate our TX/RX Queues */
 	if (ixgbe_allocate_queues(adapter)) {
 		error = ENOMEM;
 		goto err_out;
 	}
 
 	/* Allocate multicast array memory. */
 	adapter->mta = malloc(sizeof(*adapter->mta) *
 	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
 	if (adapter->mta == NULL) {
 		device_printf(dev, "Can not allocate multicast setup array\n");
 		error = ENOMEM;
 		goto err_late;
 	}
 
 	/* Initialize the shared code */
 	hw->allow_unsupported_sfp = allow_unsupported_sfp;
 	error = ixgbe_init_shared_code(hw);
 	if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
 		/*
 		** No optics in this port, set up
 		** so the timer routine will probe 
 		** for later insertion.
 		*/
 		adapter->sfp_probe = TRUE;
 		error = 0;
 	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
 		device_printf(dev, "Unsupported SFP+ module detected!\n");
 		error = EIO;
 		goto err_late;
 	} else if (error) {
 		device_printf(dev, "Unable to initialize the shared code\n");
 		error = EIO;
 		goto err_late;
 	}
 
 	/* Make sure we have a good EEPROM before we read from it */
 	if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
 		device_printf(dev, "The EEPROM Checksum Is Not Valid\n");
 		error = EIO;
 		goto err_late;
 	}
 
 	error = ixgbe_init_hw(hw);
 	switch (error) {
 	case IXGBE_ERR_EEPROM_VERSION:
 		device_printf(dev, "This device is a pre-production adapter/"
 		    "LOM.  Please be aware there may be issues associated "
 		    "with your hardware.\nIf you are experiencing problems "
 		    "please contact your Intel or hardware representative "
 		    "who provided you with this hardware.\n");
 		break;
 	case IXGBE_ERR_SFP_NOT_SUPPORTED:
 		device_printf(dev, "Unsupported SFP+ Module\n");
 		error = EIO;
 		goto err_late;
 	case IXGBE_ERR_SFP_NOT_PRESENT:
 		device_printf(dev, "No SFP+ Module found\n");
 		/* falls thru */
 	default:
 		break;
 	}
 
 	if ((adapter->msix > 1) && (ixgbe_enable_msix))
 		error = ixgbe_allocate_msix(adapter); 
 	else
 		error = ixgbe_allocate_legacy(adapter); 
 	if (error) 
 		goto err_late;
 
 	/* Setup OS specific network interface */
 	if (ixgbe_setup_interface(dev, adapter) != 0)
 		goto err_late;
 
 	/* Initialize statistics */
 	ixgbe_update_stats_counters(adapter);
 
 	/* Register for VLAN events */
 	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
 	    ixgbe_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
 	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
 	    ixgbe_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
 
         /* Check PCIE slot type/speed/width */
 	ixgbe_get_slot_info(adapter);
 
 	/* Set an initial default flow control & dmac value */
 	adapter->fc = ixgbe_fc_full;
 	adapter->dmac = 0;
 	adapter->eee_enabled = 0;
 
 #ifdef PCI_IOV
 	if ((hw->mac.type != ixgbe_mac_82598EB) && (adapter->msix > 1)) {
 		nvlist_t *pf_schema, *vf_schema;
 
 		hw->mbx.ops.init_params(hw);
 		pf_schema = pci_iov_schema_alloc_node();
 		vf_schema = pci_iov_schema_alloc_node();
 		pci_iov_schema_add_unicast_mac(vf_schema, "mac-addr", 0, NULL);
 		pci_iov_schema_add_bool(vf_schema, "mac-anti-spoof",
 		    IOV_SCHEMA_HASDEFAULT, TRUE);
 		pci_iov_schema_add_bool(vf_schema, "allow-set-mac",
 		    IOV_SCHEMA_HASDEFAULT, FALSE);
 		pci_iov_schema_add_bool(vf_schema, "allow-promisc",
 		    IOV_SCHEMA_HASDEFAULT, FALSE);
 		error = pci_iov_attach(dev, pf_schema, vf_schema);
 		if (error != 0) {
 			device_printf(dev,
 			    "Error %d setting up SR-IOV\n", error);
 		}
 	}
 #endif /* PCI_IOV */
 
 	/* Check for certain supported features */
 	ixgbe_check_wol_support(adapter);
 
 	/* Add sysctls */
 	ixgbe_add_device_sysctls(adapter);
 	ixgbe_add_hw_stats(adapter);
 
 	/* let hardware know driver is loaded */
 	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
 	ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
 	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
 
 #ifdef DEV_NETMAP
 	ixgbe_netmap_attach(adapter);
 #endif /* DEV_NETMAP */
 	INIT_DEBUGOUT("ixgbe_attach: end");
 	return (0);
 
 err_late:
 	ixgbe_free_transmit_structures(adapter);
 	ixgbe_free_receive_structures(adapter);
 err_out:
 	if (adapter->ifp != NULL)
 		if_free(adapter->ifp);
 	ixgbe_free_pci_resources(adapter);
 	free(adapter->mta, M_DEVBUF);
 	return (error);
 }
 
 /*********************************************************************
  *  Device removal routine
  *
  *  The detach entry point is called when the driver is being removed.
  *  This routine stops the adapter and deallocates all the resources
  *  that were allocated for driver operation.
  *
  *  return 0 on success, positive on failure
  *********************************************************************/
 
 static int
 ixgbe_detach(device_t dev)
 {
 	struct adapter *adapter = device_get_softc(dev);
 	struct ix_queue *que = adapter->queues;
 	struct tx_ring *txr = adapter->tx_rings;
 	u32	ctrl_ext;
 
 	INIT_DEBUGOUT("ixgbe_detach: begin");
 
 	/* Make sure VLANS are not using driver */
 	if (adapter->ifp->if_vlantrunk != NULL) {
 		device_printf(dev,"Vlan in use, detach first\n");
 		return (EBUSY);
 	}
 
 #ifdef PCI_IOV
 	if (pci_iov_detach(dev) != 0) {
 		device_printf(dev, "SR-IOV in use; detach first.\n");
 		return (EBUSY);
 	}
 #endif /* PCI_IOV */
 
 	ether_ifdetach(adapter->ifp);
 	/* Stop the adapter */
 	IXGBE_CORE_LOCK(adapter);
 	ixgbe_setup_low_power_mode(adapter);
 	IXGBE_CORE_UNLOCK(adapter);
 
 	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
 		if (que->tq) {
 #ifndef IXGBE_LEGACY_TX
 			taskqueue_drain(que->tq, &txr->txq_task);
 #endif
 			taskqueue_drain(que->tq, &que->que_task);
 			taskqueue_free(que->tq);
 		}
 	}
 
 	/* Drain the Link queue */
 	if (adapter->tq) {
 		taskqueue_drain(adapter->tq, &adapter->link_task);
 		taskqueue_drain(adapter->tq, &adapter->mod_task);
 		taskqueue_drain(adapter->tq, &adapter->msf_task);
 #ifdef PCI_IOV
 		taskqueue_drain(adapter->tq, &adapter->mbx_task);
 #endif
 		taskqueue_drain(adapter->tq, &adapter->phy_task);
 #ifdef IXGBE_FDIR
 		taskqueue_drain(adapter->tq, &adapter->fdir_task);
 #endif
 		taskqueue_free(adapter->tq);
 	}
 
 	/* let hardware know driver is unloading */
 	ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
 	ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
 
 	/* Unregister VLAN events */
 	if (adapter->vlan_attach != NULL)
 		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
 	if (adapter->vlan_detach != NULL)
 		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
 
 	callout_drain(&adapter->timer);
 #ifdef DEV_NETMAP
 	netmap_detach(adapter->ifp);
 #endif /* DEV_NETMAP */
 	ixgbe_free_pci_resources(adapter);
 	bus_generic_detach(dev);
 	if_free(adapter->ifp);
 
 	ixgbe_free_transmit_structures(adapter);
 	ixgbe_free_receive_structures(adapter);
 	free(adapter->mta, M_DEVBUF);
 
 	IXGBE_CORE_LOCK_DESTROY(adapter);
 	return (0);
 }
 
 /*********************************************************************
  *
  *  Shutdown entry point
  *
  **********************************************************************/
 
 static int
 ixgbe_shutdown(device_t dev)
 {
 	struct adapter *adapter = device_get_softc(dev);
 	int error = 0;
 
 	INIT_DEBUGOUT("ixgbe_shutdown: begin");
 
 	IXGBE_CORE_LOCK(adapter);
 	error = ixgbe_setup_low_power_mode(adapter);
 	IXGBE_CORE_UNLOCK(adapter);
 
 	return (error);
 }
 
 /**
  * Methods for going from:
  * D0 -> D3: ixgbe_suspend
  * D3 -> D0: ixgbe_resume
  */
 static int
 ixgbe_suspend(device_t dev)
 {
 	struct adapter *adapter = device_get_softc(dev);
 	int error = 0;
 
 	INIT_DEBUGOUT("ixgbe_suspend: begin");
 
 	IXGBE_CORE_LOCK(adapter);
 
 	error = ixgbe_setup_low_power_mode(adapter);
 
 	IXGBE_CORE_UNLOCK(adapter);
 
 	return (error);
 }
 
 static int
 ixgbe_resume(device_t dev)
 {
 	struct adapter *adapter = device_get_softc(dev);
 	struct ifnet *ifp = adapter->ifp;
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 wus;
 
 	INIT_DEBUGOUT("ixgbe_resume: begin");
 
 	IXGBE_CORE_LOCK(adapter);
 
 	/* Read & clear WUS register */
 	wus = IXGBE_READ_REG(hw, IXGBE_WUS);
 	if (wus)
 		device_printf(dev, "Woken up by (WUS): %#010x\n",
 		    IXGBE_READ_REG(hw, IXGBE_WUS));
 	IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff);
 	/* And clear WUFC until next low-power transition */
 	IXGBE_WRITE_REG(hw, IXGBE_WUFC, 0);
 
 	/*
 	 * Required after D3->D0 transition;
 	 * will re-advertise all previous advertised speeds
 	 */
 	if (ifp->if_flags & IFF_UP)
 		ixgbe_init_locked(adapter);
 
 	IXGBE_CORE_UNLOCK(adapter);
 
 	return (0);
 }
 
 
 /*********************************************************************
  *  Ioctl entry point
  *
  *  ixgbe_ioctl is called when the user wants to configure the
  *  interface.
  *
  *  return 0 on success, positive on failure
  **********************************************************************/
 
 static int
 ixgbe_ioctl(struct ifnet * ifp, u_long command, caddr_t data)
 {
 	struct adapter	*adapter = ifp->if_softc;
 	struct ifreq	*ifr = (struct ifreq *) data;
 #if defined(INET) || defined(INET6)
 	struct ifaddr *ifa = (struct ifaddr *)data;
 #endif
 	int             error = 0;
 	bool		avoid_reset = FALSE;
 
 	switch (command) {
 
         case SIOCSIFADDR:
 #ifdef INET
 		if (ifa->ifa_addr->sa_family == AF_INET)
 			avoid_reset = TRUE;
 #endif
 #ifdef INET6
 		if (ifa->ifa_addr->sa_family == AF_INET6)
 			avoid_reset = TRUE;
 #endif
 		/*
 		** Calling init results in link renegotiation,
 		** so we avoid doing it when possible.
 		*/
 		if (avoid_reset) {
 			ifp->if_flags |= IFF_UP;
 			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
 				ixgbe_init(adapter);
 #ifdef INET
 			if (!(ifp->if_flags & IFF_NOARP))
 				arp_ifinit(ifp, ifa);
 #endif
 		} else
 			error = ether_ioctl(ifp, command, data);
 		break;
 	case SIOCSIFMTU:
 		IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
 		if (ifr->ifr_mtu > IXGBE_MAX_MTU) {
 			error = EINVAL;
 		} else {
 			IXGBE_CORE_LOCK(adapter);
 			ifp->if_mtu = ifr->ifr_mtu;
 			adapter->max_frame_size =
 				ifp->if_mtu + IXGBE_MTU_HDR;
 			ixgbe_init_locked(adapter);
 #ifdef PCI_IOV
 			ixgbe_recalculate_max_frame(adapter);
 #endif
 			IXGBE_CORE_UNLOCK(adapter);
 		}
 		break;
 	case SIOCSIFFLAGS:
 		IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
 		IXGBE_CORE_LOCK(adapter);
 		if (ifp->if_flags & IFF_UP) {
 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
 				if ((ifp->if_flags ^ adapter->if_flags) &
 				    (IFF_PROMISC | IFF_ALLMULTI)) {
 					ixgbe_set_promisc(adapter);
                                 }
 			} else
 				ixgbe_init_locked(adapter);
 		} else
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 				ixgbe_stop(adapter);
 		adapter->if_flags = ifp->if_flags;
 		IXGBE_CORE_UNLOCK(adapter);
 		break;
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 			IXGBE_CORE_LOCK(adapter);
 			ixgbe_disable_intr(adapter);
 			ixgbe_set_multi(adapter);
 			ixgbe_enable_intr(adapter);
 			IXGBE_CORE_UNLOCK(adapter);
 		}
 		break;
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
 		IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
 		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
 		break;
 	case SIOCSIFCAP:
 	{
 		IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
 
 		int mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 		if (!mask)
 			break;
 
 		/* HW cannot turn these on/off separately */
 		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
 			ifp->if_capenable ^= IFCAP_RXCSUM;
 			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
 		}
 		if (mask & IFCAP_TXCSUM)
 			ifp->if_capenable ^= IFCAP_TXCSUM;
 		if (mask & IFCAP_TXCSUM_IPV6)
 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
 		if (mask & IFCAP_TSO4)
 			ifp->if_capenable ^= IFCAP_TSO4;
 		if (mask & IFCAP_TSO6)
 			ifp->if_capenable ^= IFCAP_TSO6;
 		if (mask & IFCAP_LRO)
 			ifp->if_capenable ^= IFCAP_LRO;
 		if (mask & IFCAP_VLAN_HWTAGGING)
 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
 		if (mask & IFCAP_VLAN_HWFILTER)
 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
 		if (mask & IFCAP_VLAN_HWTSO)
 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
 
 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 			IXGBE_CORE_LOCK(adapter);
 			ixgbe_init_locked(adapter);
 			IXGBE_CORE_UNLOCK(adapter);
 		}
 		VLAN_CAPABILITIES(ifp);
 		break;
 	}
 #if __FreeBSD_version >= 1100036
 	case SIOCGI2C:
 	{
 		struct ixgbe_hw *hw = &adapter->hw;
 		struct ifi2creq i2c;
 		int i;
 		IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)");
 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
 		if (error != 0)
 			break;
 		if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) {
 			error = EINVAL;
 			break;
 		}
 		if (i2c.len > sizeof(i2c.data)) {
 			error = EINVAL;
 			break;
 		}
 
 		for (i = 0; i < i2c.len; i++)
 			hw->phy.ops.read_i2c_byte(hw, i2c.offset + i,
 			    i2c.dev_addr, &i2c.data[i]);
 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
 		break;
 	}
 #endif
 	default:
 		IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
 		error = ether_ioctl(ifp, command, data);
 		break;
 	}
 
 	return (error);
 }
 
 /*
  * Set the various hardware offload abilities.
  *
  * This takes the ifnet's if_capenable flags (e.g. set by the user using
  * ifconfig) and indicates to the OS via the ifnet's if_hwassist field what
  * mbuf offload flags the driver will understand.
  */
 static void
 ixgbe_set_if_hwassist(struct adapter *adapter)
 {
 	struct ifnet *ifp = adapter->ifp;
 
 	ifp->if_hwassist = 0;
 #if __FreeBSD_version >= 1000000
 	if (ifp->if_capenable & IFCAP_TSO4)
 		ifp->if_hwassist |= CSUM_IP_TSO;
 	if (ifp->if_capenable & IFCAP_TSO6)
 		ifp->if_hwassist |= CSUM_IP6_TSO;
 	if (ifp->if_capenable & IFCAP_TXCSUM)
 		ifp->if_hwassist |= (CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP |
 		    CSUM_IP_SCTP);
 	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
 		ifp->if_hwassist |= (CSUM_IP6_UDP | CSUM_IP6_TCP |
 		    CSUM_IP6_SCTP);
 #else
 	if (ifp->if_capenable & IFCAP_TSO)
 		ifp->if_hwassist |= CSUM_TSO;
 	if (ifp->if_capenable & IFCAP_TXCSUM) {
 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
 		struct ixgbe_hw *hw = &adapter->hw;
 		if (hw->mac.type != ixgbe_mac_82598EB)
 			ifp->if_hwassist |= CSUM_SCTP;
 	}
 #endif
 }
 
 /*********************************************************************
  *  Init entry point
  *
  *  This routine is used in two ways. It is used by the stack as
  *  init entry point in network interface structure. It is also used
  *  by the driver as a hw/sw initialization routine to get to a
  *  consistent state.
  *
  *  return 0 on success, positive on failure
  **********************************************************************/
 #define IXGBE_MHADD_MFS_SHIFT 16
 
 static void
 ixgbe_init_locked(struct adapter *adapter)
 {
 	struct ifnet   *ifp = adapter->ifp;
 	device_t 	dev = adapter->dev;
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct tx_ring  *txr;
 	struct rx_ring  *rxr;
 	u32		txdctl, mhadd;
 	u32		rxdctl, rxctrl;
 	int err = 0;
 #ifdef PCI_IOV
 	enum ixgbe_iov_mode mode;
 #endif
 
 	mtx_assert(&adapter->core_mtx, MA_OWNED);
 	INIT_DEBUGOUT("ixgbe_init_locked: begin");
 
 	hw->adapter_stopped = FALSE;
 	ixgbe_stop_adapter(hw);
         callout_stop(&adapter->timer);
 
 #ifdef PCI_IOV
 	mode = ixgbe_get_iov_mode(adapter);
 	adapter->pool = ixgbe_max_vfs(mode);
 	/* Queue indices may change with IOV mode */
 	for (int i = 0; i < adapter->num_queues; i++) {
 		adapter->rx_rings[i].me = ixgbe_pf_que_index(mode, i);
 		adapter->tx_rings[i].me = ixgbe_pf_que_index(mode, i);
 	}
 #endif
         /* reprogram the RAR[0] in case user changed it. */
 	ixgbe_set_rar(hw, 0, hw->mac.addr, adapter->pool, IXGBE_RAH_AV);
 
 	/* Get the latest mac address, User can use a LAA */
 	bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS);
 	ixgbe_set_rar(hw, 0, hw->mac.addr, adapter->pool, 1);
 	hw->addr_ctrl.rar_used_count = 1;
 
 	/* Set hardware offload abilities from ifnet flags */
 	ixgbe_set_if_hwassist(adapter);
 
 	/* Prepare transmit descriptors and buffers */
 	if (ixgbe_setup_transmit_structures(adapter)) {
 		device_printf(dev, "Could not setup transmit structures\n");
 		ixgbe_stop(adapter);
 		return;
 	}
 
 	ixgbe_init_hw(hw);
 #ifdef PCI_IOV
 	ixgbe_initialize_iov(adapter);
 #endif
 	ixgbe_initialize_transmit_units(adapter);
 
 	/* Setup Multicast table */
 	ixgbe_set_multi(adapter);
 
 	/* Determine the correct mbuf pool, based on frame size */
 	if (adapter->max_frame_size <= MCLBYTES)
 		adapter->rx_mbuf_sz = MCLBYTES;
 	else
 		adapter->rx_mbuf_sz = MJUMPAGESIZE;
 
 	/* Prepare receive descriptors and buffers */
 	if (ixgbe_setup_receive_structures(adapter)) {
 		device_printf(dev, "Could not setup receive structures\n");
 		ixgbe_stop(adapter);
 		return;
 	}
 
 	/* Configure RX settings */
 	ixgbe_initialize_receive_units(adapter);
 
 	/* Enable SDP & MSIX interrupts based on adapter */
 	ixgbe_config_gpie(adapter);
 
 	/* Set MTU size */
 	if (ifp->if_mtu > ETHERMTU) {
 		/* aka IXGBE_MAXFRS on 82599 and newer */
 		mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
 		mhadd &= ~IXGBE_MHADD_MFS_MASK;
 		mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
 		IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
 	}
 	
 	/* Now enable all the queues */
 	for (int i = 0; i < adapter->num_queues; i++) {
 		txr = &adapter->tx_rings[i];
 		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txr->me));
 		txdctl |= IXGBE_TXDCTL_ENABLE;
 		/* Set WTHRESH to 8, burst writeback */
 		txdctl |= (8 << 16);
 		/*
 		 * When the internal queue falls below PTHRESH (32),
 		 * start prefetching as long as there are at least
 		 * HTHRESH (1) buffers ready. The values are taken
 		 * from the Intel linux driver 3.8.21.
 		 * Prefetching enables tx line rate even with 1 queue.
 		 */
 		txdctl |= (32 << 0) | (1 << 8);
 		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txr->me), txdctl);
 	}
 
 	for (int i = 0, j = 0; i < adapter->num_queues; i++) {
 		rxr = &adapter->rx_rings[i];
 		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me));
 		if (hw->mac.type == ixgbe_mac_82598EB) {
 			/*
 			** PTHRESH = 21
 			** HTHRESH = 4
 			** WTHRESH = 8
 			*/
 			rxdctl &= ~0x3FFFFF;
 			rxdctl |= 0x080420;
 		}
 		rxdctl |= IXGBE_RXDCTL_ENABLE;
 		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxr->me), rxdctl);
 		for (; j < 10; j++) {
 			if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)) &
 			    IXGBE_RXDCTL_ENABLE)
 				break;
 			else
 				msec_delay(1);
 		}
 		wmb();
 #ifdef DEV_NETMAP
 		/*
 		 * In netmap mode, we must preserve the buffers made
 		 * available to userspace before the if_init()
 		 * (this is true by default on the TX side, because
 		 * init makes all buffers available to userspace).
 		 *
 		 * netmap_reset() and the device specific routines
 		 * (e.g. ixgbe_setup_receive_rings()) map these
 		 * buffers at the end of the NIC ring, so here we
 		 * must set the RDT (tail) register to make sure
 		 * they are not overwritten.
 		 *
 		 * In this driver the NIC ring starts at RDH = 0,
 		 * RDT points to the last slot available for reception (?),
 		 * so RDT = num_rx_desc - 1 means the whole ring is available.
 		 */
 		if (ifp->if_capenable & IFCAP_NETMAP) {
 			struct netmap_adapter *na = NA(adapter->ifp);
 			struct netmap_kring *kring = &na->rx_rings[i];
 			int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring);
 
 			IXGBE_WRITE_REG(hw, IXGBE_RDT(rxr->me), t);
 		} else
 #endif /* DEV_NETMAP */
 		IXGBE_WRITE_REG(hw, IXGBE_RDT(rxr->me), adapter->num_rx_desc - 1);
 	}
 
 	/* Enable Receive engine */
 	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
 	if (hw->mac.type == ixgbe_mac_82598EB)
 		rxctrl |= IXGBE_RXCTRL_DMBYPS;
 	rxctrl |= IXGBE_RXCTRL_RXEN;
 	ixgbe_enable_rx_dma(hw, rxctrl);
 
 	callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
 
 	/* Set up MSI/X routing */
 	if (ixgbe_enable_msix)  {
 		ixgbe_configure_ivars(adapter);
 		/* Set up auto-mask */
 		if (hw->mac.type == ixgbe_mac_82598EB)
 			IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
 		else {
 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
 			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
 		}
 	} else {  /* Simple settings for Legacy/MSI */
                 ixgbe_set_ivar(adapter, 0, 0, 0);
                 ixgbe_set_ivar(adapter, 0, 0, 1);
 		IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
 	}
 
 #ifdef IXGBE_FDIR
 	/* Init Flow director */
 	if (hw->mac.type != ixgbe_mac_82598EB) {
 		u32 hdrm = 32 << fdir_pballoc;
 
 		hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
 		ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
 	}
 #endif
 
 	/*
 	 * Check on any SFP devices that
 	 * need to be kick-started
 	 */
 	if (hw->phy.type == ixgbe_phy_none) {
 		err = hw->phy.ops.identify(hw);
 		if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
                 	device_printf(dev,
 			    "Unsupported SFP+ module type was detected.\n");
 			return;
         	}
 	}
 
 	/* Set moderation on the Link interrupt */
 	IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->vector), IXGBE_LINK_ITR);
 
 	/* Configure Energy Efficient Ethernet for supported devices */
 	if (hw->mac.ops.setup_eee) {
 		err = hw->mac.ops.setup_eee(hw, adapter->eee_enabled);
 		if (err)
 			device_printf(dev, "Error setting up EEE: %d\n", err);
 	}
 
 	/* Config/Enable Link */
 	ixgbe_config_link(adapter);
 
 	/* Hardware Packet Buffer & Flow Control setup */
 	ixgbe_config_delay_values(adapter);
 
 	/* Initialize the FC settings */
 	ixgbe_start_hw(hw);
 
 	/* Set up VLAN support and filter */
 	ixgbe_setup_vlan_hw_support(adapter);
 
 	/* Setup DMA Coalescing */
 	ixgbe_config_dmac(adapter);
 
 	/* And now turn on interrupts */
 	ixgbe_enable_intr(adapter);
 
 #ifdef PCI_IOV
 	/* Enable the use of the MBX by the VF's */
 	{
 		u32 reg = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
 		reg |= IXGBE_CTRL_EXT_PFRSTD;
 		IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, reg);
 	}
 #endif
 
 	/* Now inform the stack we're ready */
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 
 	return;
 }
 
 static void
 ixgbe_init(void *arg)
 {
 	struct adapter *adapter = arg;
 
 	IXGBE_CORE_LOCK(adapter);
 	ixgbe_init_locked(adapter);
 	IXGBE_CORE_UNLOCK(adapter);
 	return;
 }
 
 static void
 ixgbe_config_gpie(struct adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 gpie;
 
 	gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
 
 	/* Fan Failure Interrupt */
 	if (hw->device_id == IXGBE_DEV_ID_82598AT)
 		gpie |= IXGBE_SDP1_GPIEN;
 
 	/*
 	 * Module detection (SDP2)
 	 * Media ready (SDP1)
 	 */
 	if (hw->mac.type == ixgbe_mac_82599EB) {
 		gpie |= IXGBE_SDP2_GPIEN;
 		if (hw->device_id != IXGBE_DEV_ID_82599_QSFP_SF_QP)
 			gpie |= IXGBE_SDP1_GPIEN;
 	}
 
 	/*
 	 * Thermal Failure Detection (X540)
 	 * Link Detection (X552 SFP+, X552/X557-AT)
 	 */
 	if (hw->mac.type == ixgbe_mac_X540 ||
 	    hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
 	    hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T)
 		gpie |= IXGBE_SDP0_GPIEN_X540;
 
 	if (adapter->msix > 1) {
 		/* Enable Enhanced MSIX mode */
 		gpie |= IXGBE_GPIE_MSIX_MODE;
 		gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
 		    IXGBE_GPIE_OCD;
 	}
 
 	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
 	return;
 }
 
 /*
  * Requires adapter->max_frame_size to be set.
  */
 static void
 ixgbe_config_delay_values(struct adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 rxpb, frame, size, tmp;
 
 	frame = adapter->max_frame_size;
 
 	/* Calculate High Water */
 	switch (hw->mac.type) {
 	case ixgbe_mac_X540:
 	case ixgbe_mac_X550:
 	case ixgbe_mac_X550EM_x:
 		tmp = IXGBE_DV_X540(frame, frame);
 		break;
 	default:
 		tmp = IXGBE_DV(frame, frame);
 		break;
 	}
 	size = IXGBE_BT2KB(tmp);
 	rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
 	hw->fc.high_water[0] = rxpb - size;
 
 	/* Now calculate Low Water */
 	switch (hw->mac.type) {
 	case ixgbe_mac_X540:
 	case ixgbe_mac_X550:
 	case ixgbe_mac_X550EM_x:
 		tmp = IXGBE_LOW_DV_X540(frame);
 		break;
 	default:
 		tmp = IXGBE_LOW_DV(frame);
 		break;
 	}
 	hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
 
 	hw->fc.requested_mode = adapter->fc;
 	hw->fc.pause_time = IXGBE_FC_PAUSE;
 	hw->fc.send_xon = TRUE;
 }
 
 /*
 **
 ** MSIX Interrupt Handlers and Tasklets
 **
 */
 
 static inline void
 ixgbe_enable_queue(struct adapter *adapter, u32 vector)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	u64	queue = (u64)(1 << vector);
 	u32	mask;
 
 	if (hw->mac.type == ixgbe_mac_82598EB) {
                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
 	} else {
                 mask = (queue & 0xFFFFFFFF);
                 if (mask)
                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
                 mask = (queue >> 32);
                 if (mask)
                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
 	}
 }
 
 static inline void
 ixgbe_disable_queue(struct adapter *adapter, u32 vector)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	u64	queue = (u64)(1 << vector);
 	u32	mask;
 
 	if (hw->mac.type == ixgbe_mac_82598EB) {
                 mask = (IXGBE_EIMS_RTX_QUEUE & queue);
                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
 	} else {
                 mask = (queue & 0xFFFFFFFF);
                 if (mask)
                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
                 mask = (queue >> 32);
                 if (mask)
                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
 	}
 }
 
 static void
 ixgbe_handle_que(void *context, int pending)
 {
 	struct ix_queue *que = context;
 	struct adapter  *adapter = que->adapter;
 	struct tx_ring  *txr = que->txr;
 	struct ifnet    *ifp = adapter->ifp;
 
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
 		ixgbe_rxeof(que);
 		IXGBE_TX_LOCK(txr);
 		ixgbe_txeof(txr);
 #ifndef IXGBE_LEGACY_TX
 		if (!drbr_empty(ifp, txr->br))
 			ixgbe_mq_start_locked(ifp, txr);
 #else
 		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 			ixgbe_start_locked(txr, ifp);
 #endif
 		IXGBE_TX_UNLOCK(txr);
 	}
 
 	/* Reenable this interrupt */
 	if (que->res != NULL)
 		ixgbe_enable_queue(adapter, que->msix);
 	else
 		ixgbe_enable_intr(adapter);
 	return;
 }
 
 
 /*********************************************************************
  *
  *  Legacy Interrupt Service routine
  *
  **********************************************************************/
 
 static void
 ixgbe_legacy_irq(void *arg)
 {
 	struct ix_queue *que = arg;
 	struct adapter	*adapter = que->adapter;
 	struct ixgbe_hw	*hw = &adapter->hw;
 	struct ifnet    *ifp = adapter->ifp;
 	struct 		tx_ring *txr = adapter->tx_rings;
 	bool		more;
 	u32       	reg_eicr;
 
 
 	reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
 
 	++que->irqs;
 	if (reg_eicr == 0) {
 		ixgbe_enable_intr(adapter);
 		return;
 	}
 
 	more = ixgbe_rxeof(que);
 
 	IXGBE_TX_LOCK(txr);
 	ixgbe_txeof(txr);
 #ifdef IXGBE_LEGACY_TX
 	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 		ixgbe_start_locked(txr, ifp);
 #else
 	if (!drbr_empty(ifp, txr->br))
 		ixgbe_mq_start_locked(ifp, txr);
 #endif
 	IXGBE_TX_UNLOCK(txr);
 
 	/* Check for fan failure */
 	if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
 	    (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
 		    "REPLACE IMMEDIATELY!!\n");
 		IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1_BY_MAC(hw));
 	}
 
 	/* Link status change */
 	if (reg_eicr & IXGBE_EICR_LSC)
 		taskqueue_enqueue(adapter->tq, &adapter->link_task);
 
 	/* External PHY interrupt */
 	if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T &&
 	    (reg_eicr & IXGBE_EICR_GPI_SDP0_X540))
 		taskqueue_enqueue(adapter->tq, &adapter->phy_task);
 
 	if (more)
 		taskqueue_enqueue(que->tq, &que->que_task);
 	else
 		ixgbe_enable_intr(adapter);
 	return;
 }
 
 
 /*********************************************************************
  *
  *  MSIX Queue Interrupt Service routine
  *
  **********************************************************************/
 void
 ixgbe_msix_que(void *arg)
 {
 	struct ix_queue	*que = arg;
 	struct adapter  *adapter = que->adapter;
 	struct ifnet    *ifp = adapter->ifp;
 	struct tx_ring	*txr = que->txr;
 	struct rx_ring	*rxr = que->rxr;
 	bool		more;
 	u32		newitr = 0;
 
 
 	/* Protect against spurious interrupts */
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
 		return;
 
 	ixgbe_disable_queue(adapter, que->msix);
 	++que->irqs;
 
 	more = ixgbe_rxeof(que);
 
 	IXGBE_TX_LOCK(txr);
 	ixgbe_txeof(txr);
 #ifdef IXGBE_LEGACY_TX
 	if (!IFQ_DRV_IS_EMPTY(ifp->if_snd))
 		ixgbe_start_locked(txr, ifp);
 #else
 	if (!drbr_empty(ifp, txr->br))
 		ixgbe_mq_start_locked(ifp, txr);
 #endif
 	IXGBE_TX_UNLOCK(txr);
 
 	/* Do AIM now? */
 
 	if (ixgbe_enable_aim == FALSE)
 		goto no_calc;
 	/*
 	** Do Adaptive Interrupt Moderation:
         **  - Write out last calculated setting
 	**  - Calculate based on average size over
 	**    the last interval.
 	*/
         if (que->eitr_setting)
                 IXGBE_WRITE_REG(&adapter->hw,
                     IXGBE_EITR(que->msix), que->eitr_setting);
  
         que->eitr_setting = 0;
 
         /* Idle, do nothing */
         if ((txr->bytes == 0) && (rxr->bytes == 0))
                 goto no_calc;
                                 
 	if ((txr->bytes) && (txr->packets))
                	newitr = txr->bytes/txr->packets;
 	if ((rxr->bytes) && (rxr->packets))
 		newitr = max(newitr,
 		    (rxr->bytes / rxr->packets));
 	newitr += 24; /* account for hardware frame, crc */
 
 	/* set an upper boundary */
 	newitr = min(newitr, 3000);
 
 	/* Be nice to the mid range */
 	if ((newitr > 300) && (newitr < 1200))
 		newitr = (newitr / 3);
 	else
 		newitr = (newitr / 2);
 
         if (adapter->hw.mac.type == ixgbe_mac_82598EB)
                 newitr |= newitr << 16;
         else
                 newitr |= IXGBE_EITR_CNT_WDIS;
                  
         /* save for next interrupt */
         que->eitr_setting = newitr;
 
         /* Reset state */
         txr->bytes = 0;
         txr->packets = 0;
         rxr->bytes = 0;
         rxr->packets = 0;
 
 no_calc:
 	if (more)
 		taskqueue_enqueue(que->tq, &que->que_task);
 	else
 		ixgbe_enable_queue(adapter, que->msix);
 	return;
 }
 
 
 static void
 ixgbe_msix_link(void *arg)
 {
 	struct adapter	*adapter = arg;
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32		reg_eicr, mod_mask;
 
 	++adapter->link_irq;
 
 	/* Pause other interrupts */
 	IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_OTHER);
 
 	/* First get the cause */
 	reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
 	/* Be sure the queue bits are not cleared */
 	reg_eicr &= ~IXGBE_EICR_RTX_QUEUE;
 	/* Clear interrupt with write */
 	IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
 
 	/* Link status change */
 	if (reg_eicr & IXGBE_EICR_LSC) {
 		IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_LSC);
 		taskqueue_enqueue(adapter->tq, &adapter->link_task);
 	}
 
 	if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
 #ifdef IXGBE_FDIR
 		if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
 			/* This is probably overkill :) */
 			if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
 				return;
                 	/* Disable the interrupt */
 			IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
 			taskqueue_enqueue(adapter->tq, &adapter->fdir_task);
 		} else
 #endif
 		if (reg_eicr & IXGBE_EICR_ECC) {
 			device_printf(adapter->dev, "CRITICAL: ECC ERROR!! "
 			    "Please Reboot!!\n");
 			IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
 		}
 
 		/* Check for over temp condition */
 		if (reg_eicr & IXGBE_EICR_TS) {
 			device_printf(adapter->dev, "CRITICAL: OVER TEMP!! "
 			    "PHY IS SHUT DOWN!!\n");
 			device_printf(adapter->dev, "System shutdown required!\n");
 			IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
 		}
 #ifdef PCI_IOV
 		if (reg_eicr & IXGBE_EICR_MAILBOX)
 			taskqueue_enqueue(adapter->tq, &adapter->mbx_task);
 #endif
 	}
 
 	/* Pluggable optics-related interrupt */
 	if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP)
 		mod_mask = IXGBE_EICR_GPI_SDP0_X540;
 	else
 		mod_mask = IXGBE_EICR_GPI_SDP2_BY_MAC(hw);
 
 	if (ixgbe_is_sfp(hw)) {
 		if (reg_eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw)) {
 			IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1_BY_MAC(hw));
 			taskqueue_enqueue(adapter->tq, &adapter->msf_task);
 		} else if (reg_eicr & mod_mask) {
 			IXGBE_WRITE_REG(hw, IXGBE_EICR, mod_mask);
 			taskqueue_enqueue(adapter->tq, &adapter->mod_task);
 		}
 	}
 
 	/* Check for fan failure */
 	if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
 	    (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
 		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
                 device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
 		    "REPLACE IMMEDIATELY!!\n");
 	}
 
 	/* External PHY interrupt */
 	if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T &&
 	    (reg_eicr & IXGBE_EICR_GPI_SDP0_X540)) {
 		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP0_X540);
 		taskqueue_enqueue(adapter->tq, &adapter->phy_task);
 	}
 
 	/* Re-enable other interrupts */
 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
 	return;
 }
 
 /*********************************************************************
  *
  *  Media Ioctl callback
  *
  *  This routine is called whenever the user queries the status of
  *  the interface using ifconfig.
  *
  **********************************************************************/
 static void
 ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
 {
 	struct adapter *adapter = ifp->if_softc;
 	struct ixgbe_hw *hw = &adapter->hw;
 	int layer;
 
 	INIT_DEBUGOUT("ixgbe_media_status: begin");
 	IXGBE_CORE_LOCK(adapter);
 	ixgbe_update_link_status(adapter);
 
 	ifmr->ifm_status = IFM_AVALID;
 	ifmr->ifm_active = IFM_ETHER;
 
 	if (!adapter->link_active) {
 		IXGBE_CORE_UNLOCK(adapter);
 		return;
 	}
 
 	ifmr->ifm_status |= IFM_ACTIVE;
 	layer = adapter->phy_layer;
 
 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T ||
 	    layer & IXGBE_PHYSICAL_LAYER_1000BASE_T ||
 	    layer & IXGBE_PHYSICAL_LAYER_100BASE_TX)
 		switch (adapter->link_speed) {
 		case IXGBE_LINK_SPEED_10GB_FULL:
 			ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
 			break;
 		case IXGBE_LINK_SPEED_1GB_FULL:
 			ifmr->ifm_active |= IFM_1000_T | IFM_FDX;
 			break;
 		case IXGBE_LINK_SPEED_100_FULL:
 			ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
 			break;
 		}
 	if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU ||
 	    layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA)
 		switch (adapter->link_speed) {
 		case IXGBE_LINK_SPEED_10GB_FULL:
 			ifmr->ifm_active |= IFM_10G_TWINAX | IFM_FDX;
 			break;
 		}
 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR)
 		switch (adapter->link_speed) {
 		case IXGBE_LINK_SPEED_10GB_FULL:
 			ifmr->ifm_active |= IFM_10G_LR | IFM_FDX;
 			break;
 		case IXGBE_LINK_SPEED_1GB_FULL:
 			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
 			break;
 		}
 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM)
 		switch (adapter->link_speed) {
 		case IXGBE_LINK_SPEED_10GB_FULL:
 			ifmr->ifm_active |= IFM_10G_LRM | IFM_FDX;
 			break;
 		case IXGBE_LINK_SPEED_1GB_FULL:
 			ifmr->ifm_active |= IFM_1000_LX | IFM_FDX;
 			break;
 		}
 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR ||
 	    layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX)
 		switch (adapter->link_speed) {
 		case IXGBE_LINK_SPEED_10GB_FULL:
 			ifmr->ifm_active |= IFM_10G_SR | IFM_FDX;
 			break;
 		case IXGBE_LINK_SPEED_1GB_FULL:
 			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
 			break;
 		}
 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4)
 		switch (adapter->link_speed) {
 		case IXGBE_LINK_SPEED_10GB_FULL:
 			ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX;
 			break;
 		}
 	/*
 	** XXX: These need to use the proper media types once
 	** they're added.
 	*/
 #ifndef IFM_ETH_XTYPE
 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR)
 		switch (adapter->link_speed) {
 		case IXGBE_LINK_SPEED_10GB_FULL:
 			ifmr->ifm_active |= IFM_10G_SR | IFM_FDX;
 			break;
 		case IXGBE_LINK_SPEED_2_5GB_FULL:
 			ifmr->ifm_active |= IFM_2500_SX | IFM_FDX;
 			break;
 		case IXGBE_LINK_SPEED_1GB_FULL:
 			ifmr->ifm_active |= IFM_1000_CX | IFM_FDX;
 			break;
 		}
 	else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4
 	    || layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX)
 		switch (adapter->link_speed) {
 		case IXGBE_LINK_SPEED_10GB_FULL:
 			ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX;
 			break;
 		case IXGBE_LINK_SPEED_2_5GB_FULL:
 			ifmr->ifm_active |= IFM_2500_SX | IFM_FDX;
 			break;
 		case IXGBE_LINK_SPEED_1GB_FULL:
 			ifmr->ifm_active |= IFM_1000_CX | IFM_FDX;
 			break;
 		}
 #else
 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR)
 		switch (adapter->link_speed) {
 		case IXGBE_LINK_SPEED_10GB_FULL:
 			ifmr->ifm_active |= IFM_10G_KR | IFM_FDX;
 			break;
 		case IXGBE_LINK_SPEED_2_5GB_FULL:
 			ifmr->ifm_active |= IFM_2500_KX | IFM_FDX;
 			break;
 		case IXGBE_LINK_SPEED_1GB_FULL:
 			ifmr->ifm_active |= IFM_1000_KX | IFM_FDX;
 			break;
 		}
 	else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4
 	    || layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX)
 		switch (adapter->link_speed) {
 		case IXGBE_LINK_SPEED_10GB_FULL:
 			ifmr->ifm_active |= IFM_10G_KX4 | IFM_FDX;
 			break;
 		case IXGBE_LINK_SPEED_2_5GB_FULL:
 			ifmr->ifm_active |= IFM_2500_KX | IFM_FDX;
 			break;
 		case IXGBE_LINK_SPEED_1GB_FULL:
 			ifmr->ifm_active |= IFM_1000_KX | IFM_FDX;
 			break;
 		}
 #endif
 	
 	/* If nothing is recognized... */
 	if (IFM_SUBTYPE(ifmr->ifm_active) == 0)
 		ifmr->ifm_active |= IFM_UNKNOWN;
 	
 #if __FreeBSD_version >= 900025
 	/* Display current flow control setting used on link */
 	if (hw->fc.current_mode == ixgbe_fc_rx_pause ||
 	    hw->fc.current_mode == ixgbe_fc_full)
 		ifmr->ifm_active |= IFM_ETH_RXPAUSE;
 	if (hw->fc.current_mode == ixgbe_fc_tx_pause ||
 	    hw->fc.current_mode == ixgbe_fc_full)
 		ifmr->ifm_active |= IFM_ETH_TXPAUSE;
 #endif
 
 	IXGBE_CORE_UNLOCK(adapter);
 
 	return;
 }
 
 /*********************************************************************
  *
  *  Media Ioctl callback
  *
  *  This routine is called when the user changes speed/duplex using
  *  media/mediopt option with ifconfig.
  *
  **********************************************************************/
 static int
 ixgbe_media_change(struct ifnet * ifp)
 {
 	struct adapter *adapter = ifp->if_softc;
 	struct ifmedia *ifm = &adapter->media;
 	struct ixgbe_hw *hw = &adapter->hw;
 	ixgbe_link_speed speed = 0;
 
 	INIT_DEBUGOUT("ixgbe_media_change: begin");
 
 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
 		return (EINVAL);
 
 	if (hw->phy.media_type == ixgbe_media_type_backplane)
 		return (ENODEV);
 
 	/*
 	** We don't actually need to check against the supported
 	** media types of the adapter; ifmedia will take care of
 	** that for us.
 	*/
 #ifndef IFM_ETH_XTYPE
 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
 		case IFM_AUTO:
 		case IFM_10G_T:
 			speed |= IXGBE_LINK_SPEED_100_FULL;
 		case IFM_10G_LRM:
 		case IFM_10G_SR: /* KR, too */
 		case IFM_10G_LR:
 		case IFM_10G_CX4: /* KX4 */
 			speed |= IXGBE_LINK_SPEED_1GB_FULL;
 		case IFM_10G_TWINAX:
 			speed |= IXGBE_LINK_SPEED_10GB_FULL;
 			break;
 		case IFM_1000_T:
 			speed |= IXGBE_LINK_SPEED_100_FULL;
 		case IFM_1000_LX:
 		case IFM_1000_SX:
 		case IFM_1000_CX: /* KX */
 			speed |= IXGBE_LINK_SPEED_1GB_FULL;
 			break;
 		case IFM_100_TX:
 			speed |= IXGBE_LINK_SPEED_100_FULL;
 			break;
 		default:
 			goto invalid;
 	}
 #else
 	switch (IFM_SUBTYPE(ifm->ifm_media)) {
 		case IFM_AUTO:
 		case IFM_10G_T:
 			speed |= IXGBE_LINK_SPEED_100_FULL;
 		case IFM_10G_LRM:
 		case IFM_10G_KR:
 		case IFM_10G_LR:
 		case IFM_10G_KX4:
 			speed |= IXGBE_LINK_SPEED_1GB_FULL;
 		case IFM_10G_TWINAX:
 			speed |= IXGBE_LINK_SPEED_10GB_FULL;
 			break;
 		case IFM_1000_T:
 			speed |= IXGBE_LINK_SPEED_100_FULL;
 		case IFM_1000_LX:
 		case IFM_1000_SX:
 		case IFM_1000_KX:
 			speed |= IXGBE_LINK_SPEED_1GB_FULL;
 			break;
 		case IFM_100_TX:
 			speed |= IXGBE_LINK_SPEED_100_FULL;
 			break;
 		default:
 			goto invalid;
 	}
 #endif
 
 	hw->mac.autotry_restart = TRUE;
 	hw->mac.ops.setup_link(hw, speed, TRUE);
 	adapter->advertise =
 		((speed & IXGBE_LINK_SPEED_10GB_FULL) << 2) |
 		((speed & IXGBE_LINK_SPEED_1GB_FULL) << 1) |
 		((speed & IXGBE_LINK_SPEED_100_FULL) << 0);
 
 	return (0);
 
 invalid:
 	device_printf(adapter->dev, "Invalid media type!\n");
 	return (EINVAL);
 }
 
 static void
 ixgbe_set_promisc(struct adapter *adapter)
 {
 	u_int32_t       reg_rctl;
 	struct ifnet   *ifp = adapter->ifp;
 	int		mcnt = 0;
 
 	reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
 	reg_rctl &= (~IXGBE_FCTRL_UPE);
 	if (ifp->if_flags & IFF_ALLMULTI)
 		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
 	else {
 		struct	ifmultiaddr *ifma;
 #if __FreeBSD_version < 800000
 		IF_ADDR_LOCK(ifp);
 #else
 		if_maddr_rlock(ifp);
 #endif
 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 			if (ifma->ifma_addr->sa_family != AF_LINK)
 				continue;
 			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
 				break;
 			mcnt++;
 		}
 #if __FreeBSD_version < 800000
 		IF_ADDR_UNLOCK(ifp);
 #else
 		if_maddr_runlock(ifp);
 #endif
 	}
 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
 		reg_rctl &= (~IXGBE_FCTRL_MPE);
 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
 
 	if (ifp->if_flags & IFF_PROMISC) {
 		reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
 	} else if (ifp->if_flags & IFF_ALLMULTI) {
 		reg_rctl |= IXGBE_FCTRL_MPE;
 		reg_rctl &= ~IXGBE_FCTRL_UPE;
 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
 	}
 	return;
 }
 
 
 /*********************************************************************
  *  Multicast Update
  *
  *  This routine is called whenever multicast address list is updated.
  *
  **********************************************************************/
 #define IXGBE_RAR_ENTRIES 16
 
 static void
 ixgbe_set_multi(struct adapter *adapter)
 {
 	u32			fctrl;
 	u8			*update_ptr;
 	struct ifmultiaddr	*ifma;
 	struct ixgbe_mc_addr	*mta;
 	int			mcnt = 0;
 	struct ifnet		*ifp = adapter->ifp;
 
 	IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
 
 	mta = adapter->mta;
 	bzero(mta, sizeof(*mta) * MAX_NUM_MULTICAST_ADDRESSES);
 
 #if __FreeBSD_version < 800000
 	IF_ADDR_LOCK(ifp);
 #else
 	if_maddr_rlock(ifp);
 #endif
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_LINK)
 			continue;
 		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
 			break;
 		bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr),
 		    mta[mcnt].addr, IXGBE_ETH_LENGTH_OF_ADDRESS);
 		mta[mcnt].vmdq = adapter->pool;
 		mcnt++;
 	}
 #if __FreeBSD_version < 800000
 	IF_ADDR_UNLOCK(ifp);
 #else
 	if_maddr_runlock(ifp);
 #endif
 
 	fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
 	fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
 	if (ifp->if_flags & IFF_PROMISC)
 		fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
 	else if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES ||
 	    ifp->if_flags & IFF_ALLMULTI) {
 		fctrl |= IXGBE_FCTRL_MPE;
 		fctrl &= ~IXGBE_FCTRL_UPE;
 	} else
 		fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
 	
 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
 
 	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) {
 		update_ptr = (u8 *)mta;
 		ixgbe_update_mc_addr_list(&adapter->hw,
 		    update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
 	}
 
 	return;
 }
 
 /*
  * This is an iterator function now needed by the multicast
  * shared code. It simply feeds the shared code routine the
  * addresses in the array of ixgbe_set_multi() one by one.
  */
 static u8 *
 ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
 {
 	struct ixgbe_mc_addr *mta;
 
 	mta = (struct ixgbe_mc_addr *)*update_ptr;
 	*vmdq = mta->vmdq;
 
 	*update_ptr = (u8*)(mta + 1);;
 	return (mta->addr);
 }
 
 
 /*********************************************************************
  *  Timer routine
  *
  *  This routine checks for link status,updates statistics,
  *  and runs the watchdog check.
  *
  **********************************************************************/
 
 static void
 ixgbe_local_timer(void *arg)
 {
 	struct adapter	*adapter = arg;
 	device_t	dev = adapter->dev;
 	struct ix_queue *que = adapter->queues;
 	u64		queues = 0;
 	int		hung = 0;
 
 	mtx_assert(&adapter->core_mtx, MA_OWNED);
 
 	/* Check for pluggable optics */
 	if (adapter->sfp_probe)
 		if (!ixgbe_sfp_probe(adapter))
 			goto out; /* Nothing to do */
 
 	ixgbe_update_link_status(adapter);
 	ixgbe_update_stats_counters(adapter);
 
 	/*
 	** Check the TX queues status
 	**	- mark hung queues so we don't schedule on them
 	**      - watchdog only if all queues show hung
 	*/          
 	for (int i = 0; i < adapter->num_queues; i++, que++) {
 		/* Keep track of queues with work for soft irq */
 		if (que->txr->busy)
 			queues |= ((u64)1 << que->me);
 		/*
 		** Each time txeof runs without cleaning, but there
 		** are uncleaned descriptors it increments busy. If
 		** we get to the MAX we declare it hung.
 		*/
 		if (que->busy == IXGBE_QUEUE_HUNG) {
 			++hung;
 			/* Mark the queue as inactive */
 			adapter->active_queues &= ~((u64)1 << que->me);
 			continue;
 		} else {
 			/* Check if we've come back from hung */
 			if ((adapter->active_queues & ((u64)1 << que->me)) == 0)
                                 adapter->active_queues |= ((u64)1 << que->me);
 		}
 		if (que->busy >= IXGBE_MAX_TX_BUSY) {
 			device_printf(dev,"Warning queue %d "
 			    "appears to be hung!\n", i);
 			que->txr->busy = IXGBE_QUEUE_HUNG;
 			++hung;
 		}
 
 	}
 
 	/* Only truly watchdog if all queues show hung */
 	if (hung == adapter->num_queues)
 		goto watchdog;
 	else if (queues != 0) { /* Force an IRQ on queues with work */
 		ixgbe_rearm_queues(adapter, queues);
 	}
 
 out:
 	callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
 	return;
 
 watchdog:
 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
 	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 	adapter->watchdog_events++;
 	ixgbe_init_locked(adapter);
 }
 
 
 /*
 ** Note: this routine updates the OS on the link state
 **	the real check of the hardware only happens with
 **	a link interrupt.
 */
 static void
 ixgbe_update_link_status(struct adapter *adapter)
 {
 	struct ifnet	*ifp = adapter->ifp;
 	device_t dev = adapter->dev;
 
 	if (adapter->link_up){ 
 		if (adapter->link_active == FALSE) {
 			if (bootverbose)
 				device_printf(dev,"Link is up %d Gbps %s \n",
 				    ((adapter->link_speed == 128)? 10:1),
 				    "Full Duplex");
 			adapter->link_active = TRUE;
 			/* Update any Flow Control changes */
 			ixgbe_fc_enable(&adapter->hw);
 			/* Update DMA coalescing config */
 			ixgbe_config_dmac(adapter);
 			if_link_state_change(ifp, LINK_STATE_UP);
 #ifdef PCI_IOV
 			ixgbe_ping_all_vfs(adapter);
 #endif
 		}
 	} else { /* Link down */
 		if (adapter->link_active == TRUE) {
 			if (bootverbose)
 				device_printf(dev,"Link is Down\n");
 			if_link_state_change(ifp, LINK_STATE_DOWN);
 			adapter->link_active = FALSE;
 #ifdef PCI_IOV
 			ixgbe_ping_all_vfs(adapter);
 #endif
 		}
 	}
 
 	return;
 }
 
 
 /*********************************************************************
  *
  *  This routine disables all traffic on the adapter by issuing a
  *  global reset on the MAC and deallocates TX/RX buffers.
  *
  **********************************************************************/
 
 static void
 ixgbe_stop(void *arg)
 {
 	struct ifnet   *ifp;
 	struct adapter *adapter = arg;
 	struct ixgbe_hw *hw = &adapter->hw;
 	ifp = adapter->ifp;
 
 	mtx_assert(&adapter->core_mtx, MA_OWNED);
 
 	INIT_DEBUGOUT("ixgbe_stop: begin\n");
 	ixgbe_disable_intr(adapter);
 	callout_stop(&adapter->timer);
 
 	/* Let the stack know...*/
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 
 	ixgbe_reset_hw(hw);
 	hw->adapter_stopped = FALSE;
 	ixgbe_stop_adapter(hw);
 	if (hw->mac.type == ixgbe_mac_82599EB)
 		ixgbe_stop_mac_link_on_d3_82599(hw);
 	/* Turn off the laser - noop with no optics */
 	ixgbe_disable_tx_laser(hw);
 
 	/* Update the stack */
 	adapter->link_up = FALSE;
        	ixgbe_update_link_status(adapter);
 
 	/* reprogram the RAR[0] in case user changed it. */
 	ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
 
 	return;
 }
 
 
 /*********************************************************************
  *
  *  Determine hardware revision.
  *
  **********************************************************************/
 static void
 ixgbe_identify_hardware(struct adapter *adapter)
 {
 	device_t        dev = adapter->dev;
 	struct ixgbe_hw *hw = &adapter->hw;
 
 	/* Save off the information about this board */
 	hw->vendor_id = pci_get_vendor(dev);
 	hw->device_id = pci_get_device(dev);
 	hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
 	hw->subsystem_vendor_id =
 	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
 	hw->subsystem_device_id =
 	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
 
 	/*
 	** Make sure BUSMASTER is set
 	*/
 	pci_enable_busmaster(dev);
 
 	/* We need this here to set the num_segs below */
 	ixgbe_set_mac_type(hw);
 
 	/* Pick up the 82599 settings */
 	if (hw->mac.type != ixgbe_mac_82598EB) {
 		hw->phy.smart_speed = ixgbe_smart_speed;
 		adapter->num_segs = IXGBE_82599_SCATTER;
 	} else
 		adapter->num_segs = IXGBE_82598_SCATTER;
 
 	return;
 }
 
 /*********************************************************************
  *
  *  Determine optic type
  *
  **********************************************************************/
 static void
 ixgbe_setup_optics(struct adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	int		layer;
 
 	layer = adapter->phy_layer = ixgbe_get_supported_physical_layer(hw);
 
 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
 		adapter->optics = IFM_10G_T;
 		return;
 	}
 
 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
 		adapter->optics = IFM_1000_T;
 		return;
 	}
 
 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
 		adapter->optics = IFM_1000_SX;
 		return;
 	}
 
 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
 	    IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
 		adapter->optics = IFM_10G_LR;
 		return;
 	}
 
 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
 		adapter->optics = IFM_10G_SR;
 		return;
 	}
 
 	if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
 		adapter->optics = IFM_10G_TWINAX;
 		return;
 	}
 
 	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
 	    IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
 		adapter->optics = IFM_10G_CX4;
 		return;
 	}
 
 	/* If we get here just set the default */
 	adapter->optics = IFM_ETHER | IFM_AUTO;
 	return;
 }
 
 /*********************************************************************
  *
  *  Setup the Legacy or MSI Interrupt handler
  *
  **********************************************************************/
 static int
 ixgbe_allocate_legacy(struct adapter *adapter)
 {
 	device_t	dev = adapter->dev;
 	struct		ix_queue *que = adapter->queues;
 #ifndef IXGBE_LEGACY_TX
 	struct tx_ring		*txr = adapter->tx_rings;
 #endif
 	int		error, rid = 0;
 
 	/* MSI RID at 1 */
 	if (adapter->msix == 1)
 		rid = 1;
 
 	/* We allocate a single interrupt resource */
 	adapter->res = bus_alloc_resource_any(dev,
             SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
 	if (adapter->res == NULL) {
 		device_printf(dev, "Unable to allocate bus resource: "
 		    "interrupt\n");
 		return (ENXIO);
 	}
 
 	/*
 	 * Try allocating a fast interrupt and the associated deferred
 	 * processing contexts.
 	 */
 #ifndef IXGBE_LEGACY_TX
 	TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr);
 #endif
 	TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
 	que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT,
             taskqueue_thread_enqueue, &que->tq);
 	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s ixq",
             device_get_nameunit(adapter->dev));
 
 	/* Tasklets for Link, SFP and Multispeed Fiber */
 	TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
 	TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
 	TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
 	TASK_INIT(&adapter->phy_task, 0, ixgbe_handle_phy, adapter);
 #ifdef IXGBE_FDIR
 	TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
 #endif
 	adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT,
 	    taskqueue_thread_enqueue, &adapter->tq);
 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
 	    device_get_nameunit(adapter->dev));
 
 	if ((error = bus_setup_intr(dev, adapter->res,
             INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_legacy_irq,
             que, &adapter->tag)) != 0) {
 		device_printf(dev, "Failed to register fast interrupt "
 		    "handler: %d\n", error);
 		taskqueue_free(que->tq);
 		taskqueue_free(adapter->tq);
 		que->tq = NULL;
 		adapter->tq = NULL;
 		return (error);
 	}
 	/* For simplicity in the handlers */
 	adapter->active_queues = IXGBE_EIMS_ENABLE_MASK;
 
 	return (0);
 }
 
 
 /*********************************************************************
  *
  *  Setup MSIX Interrupt resources and handlers 
  *
  **********************************************************************/
 static int
 ixgbe_allocate_msix(struct adapter *adapter)
 {
 	device_t        dev = adapter->dev;
 	struct 		ix_queue *que = adapter->queues;
 	struct  	tx_ring *txr = adapter->tx_rings;
 	int 		error, rid, vector = 0;
 	int		cpu_id = 0;
 #ifdef	RSS
 	cpuset_t	cpu_mask;
 #endif
 
 #ifdef	RSS
 	/*
 	 * If we're doing RSS, the number of queues needs to
 	 * match the number of RSS buckets that are configured.
 	 *
 	 * + If there's more queues than RSS buckets, we'll end
 	 *   up with queues that get no traffic.
 	 *
 	 * + If there's more RSS buckets than queues, we'll end
 	 *   up having multiple RSS buckets map to the same queue,
 	 *   so there'll be some contention.
 	 */
 	if (adapter->num_queues != rss_getnumbuckets()) {
 		device_printf(dev,
 		    "%s: number of queues (%d) != number of RSS buckets (%d)"
 		    "; performance will be impacted.\n",
 		    __func__,
 		    adapter->num_queues,
 		    rss_getnumbuckets());
 	}
 #endif
 
 	for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) {
 		rid = vector + 1;
 		que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
 		    RF_SHAREABLE | RF_ACTIVE);
 		if (que->res == NULL) {
 			device_printf(dev,"Unable to allocate"
 		    	    " bus resource: que interrupt [%d]\n", vector);
 			return (ENXIO);
 		}
 		/* Set the handler function */
 		error = bus_setup_intr(dev, que->res,
 		    INTR_TYPE_NET | INTR_MPSAFE, NULL,
 		    ixgbe_msix_que, que, &que->tag);
 		if (error) {
 			que->res = NULL;
 			device_printf(dev, "Failed to register QUE handler");
 			return (error);
 		}
 #if __FreeBSD_version >= 800504
 		bus_describe_intr(dev, que->res, que->tag, "q%d", i);
 #endif
 		que->msix = vector;
 		adapter->active_queues |= (u64)(1 << que->msix);
 #ifdef	RSS
 		/*
 		 * The queue ID is used as the RSS layer bucket ID.
 		 * We look up the queue ID -> RSS CPU ID and select
 		 * that.
 		 */
 		cpu_id = rss_getcpu(i % rss_getnumbuckets());
 #else
 		/*
 		 * Bind the msix vector, and thus the
 		 * rings to the corresponding cpu.
 		 *
 		 * This just happens to match the default RSS round-robin
 		 * bucket -> queue -> CPU allocation.
 		 */
 		if (adapter->num_queues > 1)
 			cpu_id = i;
 #endif
 		if (adapter->num_queues > 1)
 			bus_bind_intr(dev, que->res, cpu_id);
 #ifdef IXGBE_DEBUG
 #ifdef	RSS
 		device_printf(dev,
 		    "Bound RSS bucket %d to CPU %d\n",
 		    i, cpu_id);
 #else
 		device_printf(dev,
 		    "Bound queue %d to cpu %d\n",
 		    i, cpu_id);
 #endif
 #endif /* IXGBE_DEBUG */
 
 
 #ifndef IXGBE_LEGACY_TX
 		TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr);
 #endif
 		TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
 		que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT,
 		    taskqueue_thread_enqueue, &que->tq);
 #ifdef	RSS
 		CPU_SETOF(cpu_id, &cpu_mask);
 		taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET,
 		    &cpu_mask,
 		    "%s (bucket %d)",
 		    device_get_nameunit(adapter->dev),
 		    cpu_id);
 #else
 		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s:q%d",
 		    device_get_nameunit(adapter->dev), i);
 #endif
 	}
 
 	/* and Link */
 	rid = vector + 1;
 	adapter->res = bus_alloc_resource_any(dev,
     	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
 	if (!adapter->res) {
 		device_printf(dev,"Unable to allocate"
     	    " bus resource: Link interrupt [%d]\n", rid);
 		return (ENXIO);
 	}
 	/* Set the link handler function */
 	error = bus_setup_intr(dev, adapter->res,
 	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
 	    ixgbe_msix_link, adapter, &adapter->tag);
 	if (error) {
 		adapter->res = NULL;
 		device_printf(dev, "Failed to register LINK handler");
 		return (error);
 	}
 #if __FreeBSD_version >= 800504
 	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
 #endif
 	adapter->vector = vector;
 	/* Tasklets for Link, SFP and Multispeed Fiber */
 	TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
 	TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
 	TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
 #ifdef PCI_IOV
 	TASK_INIT(&adapter->mbx_task, 0, ixgbe_handle_mbx, adapter);
 #endif
 	TASK_INIT(&adapter->phy_task, 0, ixgbe_handle_phy, adapter);
 #ifdef IXGBE_FDIR
 	TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
 #endif
 	adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT,
 	    taskqueue_thread_enqueue, &adapter->tq);
 	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
 	    device_get_nameunit(adapter->dev));
 
 	return (0);
 }
 
 /*
  * Setup Either MSI/X or MSI
  */
 static int
 ixgbe_setup_msix(struct adapter *adapter)
 {
 	device_t dev = adapter->dev;
 	int rid, want, queues, msgs;
 
 	/* Override by tuneable */
 	if (ixgbe_enable_msix == 0)
 		goto msi;
 
 	/* First try MSI/X */
 	msgs = pci_msix_count(dev); 
 	if (msgs == 0)
 		goto msi;
 	rid = PCIR_BAR(MSIX_82598_BAR);
 	adapter->msix_mem = bus_alloc_resource_any(dev,
 	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
        	if (adapter->msix_mem == NULL) {
 		rid += 4;	/* 82599 maps in higher BAR */
 		adapter->msix_mem = bus_alloc_resource_any(dev,
 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
 	}
        	if (adapter->msix_mem == NULL) {
 		/* May not be enabled */
 		device_printf(adapter->dev,
 		    "Unable to map MSIX table \n");
 		goto msi;
 	}
 
 	/* Figure out a reasonable auto config value */
 	queues = (mp_ncpus > (msgs - 1)) ? (msgs - 1) : mp_ncpus;
 
 #ifdef	RSS
 	/* If we're doing RSS, clamp at the number of RSS buckets */
 	if (queues > rss_getnumbuckets())
 		queues = rss_getnumbuckets();
 #endif
 
 	if (ixgbe_num_queues != 0)
 		queues = ixgbe_num_queues;
 	/* Set max queues to 8 when autoconfiguring */
 	else if ((ixgbe_num_queues == 0) && (queues > 8))
 		queues = 8;
 
 	/* reflect correct sysctl value */
 	ixgbe_num_queues = queues;
 
 	/*
 	** Want one vector (RX/TX pair) per queue
 	** plus an additional for Link.
 	*/
 	want = queues + 1;
 	if (msgs >= want)
 		msgs = want;
 	else {
                	device_printf(adapter->dev,
 		    "MSIX Configuration Problem, "
 		    "%d vectors but %d queues wanted!\n",
 		    msgs, want);
 		goto msi;
 	}
 	if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
                	device_printf(adapter->dev,
 		    "Using MSIX interrupts with %d vectors\n", msgs);
 		adapter->num_queues = queues;
 		return (msgs);
 	}
 	/*
 	** If MSIX alloc failed or provided us with
 	** less than needed, free and fall through to MSI
 	*/
 	pci_release_msi(dev);
 
 msi:
        	if (adapter->msix_mem != NULL) {
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    rid, adapter->msix_mem);
 		adapter->msix_mem = NULL;
 	}
 	msgs = 1;
 	if (pci_alloc_msi(dev, &msgs) == 0) {
 		device_printf(adapter->dev, "Using an MSI interrupt\n");
 		return (msgs);
 	}
 	device_printf(adapter->dev, "Using a Legacy interrupt\n");
 	return (0);
 }
 
 
 static int
 ixgbe_allocate_pci_resources(struct adapter *adapter)
 {
 	int             rid;
 	device_t        dev = adapter->dev;
 
 	rid = PCIR_BAR(0);
 	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
 	    &rid, RF_ACTIVE);
 
 	if (!(adapter->pci_mem)) {
 		device_printf(dev, "Unable to allocate bus resource: memory\n");
 		return (ENXIO);
 	}
 
 	/* Save bus_space values for READ/WRITE_REG macros */
 	adapter->osdep.mem_bus_space_tag =
 		rman_get_bustag(adapter->pci_mem);
 	adapter->osdep.mem_bus_space_handle =
 		rman_get_bushandle(adapter->pci_mem);
 	/* Set hw values for shared code */
 	adapter->hw.hw_addr = (u8 *) &adapter->osdep.mem_bus_space_handle;
 	adapter->hw.back = adapter;
 
 	/* Default to 1 queue if MSI-X setup fails */
 	adapter->num_queues = 1;
 
 	/*
 	** Now setup MSI or MSI-X, should
 	** return us the number of supported
 	** vectors. (Will be 1 for MSI)
 	*/
 	adapter->msix = ixgbe_setup_msix(adapter);
 	return (0);
 }
 
 static void
 ixgbe_free_pci_resources(struct adapter * adapter)
 {
 	struct 		ix_queue *que = adapter->queues;
 	device_t	dev = adapter->dev;
 	int		rid, memrid;
 
 	if (adapter->hw.mac.type == ixgbe_mac_82598EB)
 		memrid = PCIR_BAR(MSIX_82598_BAR);
 	else
 		memrid = PCIR_BAR(MSIX_82599_BAR);
 
 	/*
 	** There is a slight possibility of a failure mode
 	** in attach that will result in entering this function
 	** before interrupt resources have been initialized, and
 	** in that case we do not want to execute the loops below
 	** We can detect this reliably by the state of the adapter
 	** res pointer.
 	*/
 	if (adapter->res == NULL)
 		goto mem;
 
 	/*
 	**  Release all msix queue resources:
 	*/
 	for (int i = 0; i < adapter->num_queues; i++, que++) {
 		rid = que->msix + 1;
 		if (que->tag != NULL) {
 			bus_teardown_intr(dev, que->res, que->tag);
 			que->tag = NULL;
 		}
 		if (que->res != NULL)
 			bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
 	}
 
 
 	/* Clean the Legacy or Link interrupt last */
 	if (adapter->vector) /* we are doing MSIX */
 		rid = adapter->vector + 1;
 	else
 		(adapter->msix != 0) ? (rid = 1):(rid = 0);
 
 	if (adapter->tag != NULL) {
 		bus_teardown_intr(dev, adapter->res, adapter->tag);
 		adapter->tag = NULL;
 	}
 	if (adapter->res != NULL)
 		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
 
 mem:
 	if (adapter->msix)
 		pci_release_msi(dev);
 
 	if (adapter->msix_mem != NULL)
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    memrid, adapter->msix_mem);
 
 	if (adapter->pci_mem != NULL)
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    PCIR_BAR(0), adapter->pci_mem);
 
 	return;
 }
 
 /*********************************************************************
  *
  *  Setup networking device structure and register an interface.
  *
  **********************************************************************/
 static int
 ixgbe_setup_interface(device_t dev, struct adapter *adapter)
 {
 	struct ifnet   *ifp;
 
 	INIT_DEBUGOUT("ixgbe_setup_interface: begin");
 
 	ifp = adapter->ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL) {
 		device_printf(dev, "can not allocate ifnet structure\n");
 		return (-1);
 	}
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	ifp->if_baudrate = IF_Gbps(10);
 	ifp->if_init = ixgbe_init;
 	ifp->if_softc = adapter;
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_ioctl = ixgbe_ioctl;
 #if __FreeBSD_version >= 1100036
 	if_setgetcounterfn(ifp, ixgbe_get_counter);
 #endif
 #if __FreeBSD_version >= 1100045
 	/* TSO parameters */
 	ifp->if_hw_tsomax = 65518;
 	ifp->if_hw_tsomaxsegcount = IXGBE_82599_SCATTER;
 	ifp->if_hw_tsomaxsegsize = 2048;
 #endif
 #ifndef IXGBE_LEGACY_TX
 	ifp->if_transmit = ixgbe_mq_start;
 	ifp->if_qflush = ixgbe_qflush;
 #else
 	ifp->if_start = ixgbe_start;
 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2);
 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 2;
 	IFQ_SET_READY(&ifp->if_snd);
 #endif
 
 	ether_ifattach(ifp, adapter->hw.mac.addr);
 
 	adapter->max_frame_size =
 	    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
 
 	/*
 	 * Tell the upper layer(s) we support long frames.
 	 */
 	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
 
 	/* Set capability flags */
 	ifp->if_capabilities |= IFCAP_RXCSUM
 			     |  IFCAP_TXCSUM
 			     |  IFCAP_RXCSUM_IPV6
 			     |  IFCAP_TXCSUM_IPV6
 			     |  IFCAP_TSO4
 			     |  IFCAP_TSO6
 			     |  IFCAP_LRO
 			     |  IFCAP_VLAN_HWTAGGING
 			     |  IFCAP_VLAN_HWTSO
 			     |  IFCAP_VLAN_HWCSUM
 			     |  IFCAP_JUMBO_MTU
 			     |  IFCAP_VLAN_MTU
 			     |  IFCAP_HWSTATS;
 
 	/* Enable the above capabilities by default */
 	ifp->if_capenable = ifp->if_capabilities;
 
 	/*
 	** Don't turn this on by default, if vlans are
 	** created on another pseudo device (eg. lagg)
 	** then vlan events are not passed thru, breaking
 	** operation, but with HW FILTER off it works. If
 	** using vlans directly on the ixgbe driver you can
 	** enable this and get full hardware tag filtering.
 	*/
 	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
 
 	/*
 	 * Specify the media types supported by this adapter and register
 	 * callbacks to update media and link information
 	 */
 	ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
 		    ixgbe_media_status);
 
 	adapter->phy_layer = ixgbe_get_supported_physical_layer(&adapter->hw);
 	ixgbe_add_media_types(adapter);
 
 	/* Set autoselect media by default */
 	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
 
 	return (0);
 }
 
 static void
 ixgbe_add_media_types(struct adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	device_t dev = adapter->dev;
 	int layer;
 
 	layer = adapter->phy_layer;
 
 	/* Media types with matching FreeBSD media defines */
 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T)
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_T, 0, NULL);
 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T)
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL);
 	if (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX)
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, 0, NULL);
 	
 	if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU ||
 	    layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA)
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_TWINAX, 0, NULL);
 
 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) {
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_LR, 0, NULL);
 		if (hw->phy.multispeed_fiber)
 			ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_LX, 0, NULL);
 	}
 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_SR, 0, NULL);
 		if (hw->phy.multispeed_fiber)
 			ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
 	} else if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX)
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4)
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_CX4, 0, NULL);
 
 #ifdef IFM_ETH_XTYPE
 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR)
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_KR, 0, NULL);
 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4)
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_KX4, 0, NULL);
 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX)
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_KX, 0, NULL);
 #else
 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) {
 		device_printf(dev, "Media supported: 10GbaseKR\n");
 		device_printf(dev, "10GbaseKR mapped to 10GbaseSR\n");
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_SR, 0, NULL);
 	}
 	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) {
 		device_printf(dev, "Media supported: 10GbaseKX4\n");
 		device_printf(dev, "10GbaseKX4 mapped to 10GbaseCX4\n");
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_CX4, 0, NULL);
 	}
 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) {
 		device_printf(dev, "Media supported: 1000baseKX\n");
 		device_printf(dev, "1000baseKX mapped to 1000baseCX\n");
 		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_CX, 0, NULL);
 	}
 #endif
 	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_BX)
 		device_printf(dev, "Media supported: 1000baseBX\n");
 	
 	if (hw->device_id == IXGBE_DEV_ID_82598AT) {
 		ifmedia_add(&adapter->media,
 		    IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
 		ifmedia_add(&adapter->media,
 		    IFM_ETHER | IFM_1000_T, 0, NULL);
 	}
 
 	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
 }
 
 static void
 ixgbe_config_link(struct adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32	autoneg, err = 0;
 	bool	sfp, negotiate;
 
 	sfp = ixgbe_is_sfp(hw);
 
 	if (sfp) { 
-		if (hw->phy.multispeed_fiber) {
-			hw->mac.ops.setup_sfp(hw);
-			ixgbe_enable_tx_laser(hw);
-			taskqueue_enqueue(adapter->tq, &adapter->msf_task);
-		} else
-			taskqueue_enqueue(adapter->tq, &adapter->mod_task);
+		taskqueue_enqueue(adapter->tq, &adapter->mod_task);
 	} else {
 		if (hw->mac.ops.check_link)
 			err = ixgbe_check_link(hw, &adapter->link_speed,
 			    &adapter->link_up, FALSE);
 		if (err)
 			goto out;
 		autoneg = hw->phy.autoneg_advertised;
 		if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
                 	err  = hw->mac.ops.get_link_capabilities(hw,
 			    &autoneg, &negotiate);
 		if (err)
 			goto out;
 		if (hw->mac.ops.setup_link)
                 	err = hw->mac.ops.setup_link(hw,
 			    autoneg, adapter->link_up);
 	}
 out:
 	return;
 }
 
 
 /*********************************************************************
  *
  *  Enable transmit units.
  *
  **********************************************************************/
 static void
 ixgbe_initialize_transmit_units(struct adapter *adapter)
 {
 	struct tx_ring	*txr = adapter->tx_rings;
 	struct ixgbe_hw	*hw = &adapter->hw;
 
 	/* Setup the Base and Length of the Tx Descriptor Ring */
 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
 		u64	tdba = txr->txdma.dma_paddr;
 		u32	txctrl = 0;
 		int	j = txr->me;
 
 		IXGBE_WRITE_REG(hw, IXGBE_TDBAL(j),
 		       (tdba & 0x00000000ffffffffULL));
 		IXGBE_WRITE_REG(hw, IXGBE_TDBAH(j), (tdba >> 32));
 		IXGBE_WRITE_REG(hw, IXGBE_TDLEN(j),
 		    adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc));
 
 		/* Setup the HW Tx Head and Tail descriptor pointers */
 		IXGBE_WRITE_REG(hw, IXGBE_TDH(j), 0);
 		IXGBE_WRITE_REG(hw, IXGBE_TDT(j), 0);
 
 		/* Cache the tail address */
 		txr->tail = IXGBE_TDT(j);
 
 		/* Disable Head Writeback */
 		/*
 		 * Note: for X550 series devices, these registers are actually
 		 * prefixed with TPH_ isntead of DCA_, but the addresses and
 		 * fields remain the same.
 		 */
 		switch (hw->mac.type) {
 		case ixgbe_mac_82598EB:
 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(j));
 			break;
 		default:
 			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(j));
 			break;
                 }
 		txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
 		switch (hw->mac.type) {
 		case ixgbe_mac_82598EB:
 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(j), txctrl);
 			break;
 		default:
 			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(j), txctrl);
 			break;
 		}
 
 	}
 
 	if (hw->mac.type != ixgbe_mac_82598EB) {
 		u32 dmatxctl, rttdcs;
 #ifdef PCI_IOV
 		enum ixgbe_iov_mode mode = ixgbe_get_iov_mode(adapter);
 #endif
 		dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
 		dmatxctl |= IXGBE_DMATXCTL_TE;
 		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
 		/* Disable arbiter to set MTQC */
 		rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
 		rttdcs |= IXGBE_RTTDCS_ARBDIS;
 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
 #ifdef PCI_IOV
 		IXGBE_WRITE_REG(hw, IXGBE_MTQC, ixgbe_get_mtqc(mode));
 #else
 		IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
 #endif
 		rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
 		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
 	}
 
 	return;
 }
 
 static void
 ixgbe_initialize_rss_mapping(struct adapter *adapter)
 {
 	struct ixgbe_hw	*hw = &adapter->hw;
 	u32 reta = 0, mrqc, rss_key[10];
 	int queue_id, table_size, index_mult;
 #ifdef	RSS
 	u32 rss_hash_config;
 #endif
 #ifdef PCI_IOV
 	enum ixgbe_iov_mode mode;
 #endif
 
 #ifdef	RSS
 	/* Fetch the configured RSS key */
 	rss_getkey((uint8_t *) &rss_key);
 #else
 	/* set up random bits */
 	arc4rand(&rss_key, sizeof(rss_key), 0);
 #endif
 
 	/* Set multiplier for RETA setup and table size based on MAC */
 	index_mult = 0x1;
 	table_size = 128;
 	switch (adapter->hw.mac.type) {
 	case ixgbe_mac_82598EB:
 		index_mult = 0x11;
 		break;
 	case ixgbe_mac_X550:
 	case ixgbe_mac_X550EM_x:
 		table_size = 512;
 		break;
 	default:
 		break;
 	}
 
 	/* Set up the redirection table */
 	for (int i = 0, j = 0; i < table_size; i++, j++) {
 		if (j == adapter->num_queues) j = 0;
 #ifdef	RSS
 		/*
 		 * Fetch the RSS bucket id for the given indirection entry.
 		 * Cap it at the number of configured buckets (which is
 		 * num_queues.)
 		 */
 		queue_id = rss_get_indirection_to_bucket(i);
 		queue_id = queue_id % adapter->num_queues;
 #else
 		queue_id = (j * index_mult);
 #endif
 		/*
 		 * The low 8 bits are for hash value (n+0);
 		 * The next 8 bits are for hash value (n+1), etc.
 		 */
 		reta = reta >> 8;
 		reta = reta | ( ((uint32_t) queue_id) << 24);
 		if ((i & 3) == 3) {
 			if (i < 128)
 				IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
 			else
 				IXGBE_WRITE_REG(hw, IXGBE_ERETA((i >> 2) - 32), reta);
 			reta = 0;
 		}
 	}
 
 	/* Now fill our hash function seeds */
 	for (int i = 0; i < 10; i++)
 		IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rss_key[i]);
 
 	/* Perform hash on these packet types */
 #ifdef	RSS
 	mrqc = IXGBE_MRQC_RSSEN;
 	rss_hash_config = rss_gethashconfig();
 	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4)
 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4;
 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4)
 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP;
 	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6)
 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6;
 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6)
 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
 	if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX)
 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX;
 	if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6_EX)
 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP;
 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4)
 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4_EX)
 		device_printf(adapter->dev,
 		    "%s: RSS_HASHTYPE_RSS_UDP_IPV4_EX defined, "
 		    "but not supported\n", __func__);
 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6)
 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
 	if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6_EX)
 		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
 #else
 	/*
 	 * Disable UDP - IP fragments aren't currently being handled
 	 * and so we end up with a mix of 2-tuple and 4-tuple
 	 * traffic.
 	 */
 	mrqc = IXGBE_MRQC_RSSEN
 	     | IXGBE_MRQC_RSS_FIELD_IPV4
 	     | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
 	     | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
 	     | IXGBE_MRQC_RSS_FIELD_IPV6_EX
 	     | IXGBE_MRQC_RSS_FIELD_IPV6
 	     | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
 	;
 #endif /* RSS */
 #ifdef PCI_IOV
 	mode = ixgbe_get_iov_mode(adapter);
 	mrqc |= ixgbe_get_mrqc(mode);
 #endif
 	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
 }
 
 
 /*********************************************************************
  *
  *  Setup receive registers and features.
  *
  **********************************************************************/
 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
 
 #define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
 	
 static void
 ixgbe_initialize_receive_units(struct adapter *adapter)
 {
 	struct	rx_ring	*rxr = adapter->rx_rings;
 	struct ixgbe_hw	*hw = &adapter->hw;
 	struct ifnet   *ifp = adapter->ifp;
 	u32		bufsz, fctrl, srrctl, rxcsum;
 	u32		hlreg;
 
 	/*
 	 * Make sure receives are disabled while
 	 * setting up the descriptor ring
 	 */
 	ixgbe_disable_rx(hw);
 
 	/* Enable broadcasts */
 	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
 	fctrl |= IXGBE_FCTRL_BAM;
 	if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
 		fctrl |= IXGBE_FCTRL_DPF;
 		fctrl |= IXGBE_FCTRL_PMCF;
 	}
 	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
 
 	/* Set for Jumbo Frames? */
 	hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
 	if (ifp->if_mtu > ETHERMTU)
 		hlreg |= IXGBE_HLREG0_JUMBOEN;
 	else
 		hlreg &= ~IXGBE_HLREG0_JUMBOEN;
 #ifdef DEV_NETMAP
 	/* crcstrip is conditional in netmap (in RDRXCTL too ?) */
 	if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
 		hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
 	else
 		hlreg |= IXGBE_HLREG0_RXCRCSTRP;
 #endif /* DEV_NETMAP */
 	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
 
 	bufsz = (adapter->rx_mbuf_sz +
 	    BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
 
 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
 		u64 rdba = rxr->rxdma.dma_paddr;
 		int j = rxr->me;
 
 		/* Setup the Base and Length of the Rx Descriptor Ring */
 		IXGBE_WRITE_REG(hw, IXGBE_RDBAL(j),
 			       (rdba & 0x00000000ffffffffULL));
 		IXGBE_WRITE_REG(hw, IXGBE_RDBAH(j), (rdba >> 32));
 		IXGBE_WRITE_REG(hw, IXGBE_RDLEN(j),
 		    adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
 
 		/* Set up the SRRCTL register */
 		srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(j));
 		srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
 		srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
 		srrctl |= bufsz;
 		srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
 
 		/*
 		 * Set DROP_EN iff we have no flow control and >1 queue.
 		 * Note that srrctl was cleared shortly before during reset,
 		 * so we do not need to clear the bit, but do it just in case
 		 * this code is moved elsewhere.
 		 */
 		if (adapter->num_queues > 1 &&
 		    adapter->hw.fc.requested_mode == ixgbe_fc_none) {
 			srrctl |= IXGBE_SRRCTL_DROP_EN;
 		} else {
 			srrctl &= ~IXGBE_SRRCTL_DROP_EN;
 		}
 
 		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(j), srrctl);
 
 		/* Setup the HW Rx Head and Tail Descriptor Pointers */
 		IXGBE_WRITE_REG(hw, IXGBE_RDH(j), 0);
 		IXGBE_WRITE_REG(hw, IXGBE_RDT(j), 0);
 
 		/* Set the driver rx tail address */
 		rxr->tail =  IXGBE_RDT(rxr->me);
 	}
 
 	if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
 		u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
 			      IXGBE_PSRTYPE_UDPHDR |
 			      IXGBE_PSRTYPE_IPV4HDR |
 			      IXGBE_PSRTYPE_IPV6HDR;
 		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
 	}
 
 	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
 
 	ixgbe_initialize_rss_mapping(adapter);
 
 	if (adapter->num_queues > 1) {
 		/* RSS and RX IPP Checksum are mutually exclusive */
 		rxcsum |= IXGBE_RXCSUM_PCSD;
 	}
 
 	if (ifp->if_capenable & IFCAP_RXCSUM)
 		rxcsum |= IXGBE_RXCSUM_PCSD;
 
 	/* This is useful for calculating UDP/IP fragment checksums */
 	if (!(rxcsum & IXGBE_RXCSUM_PCSD))
 		rxcsum |= IXGBE_RXCSUM_IPPCSE;
 
 	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
 
 	return;
 }
 
 
 /*
 ** This routine is run via an vlan config EVENT,
 ** it enables us to use the HW Filter table since
 ** we can get the vlan id. This just creates the
 ** entry in the soft version of the VFTA, init will
 ** repopulate the real table.
 */
 static void
 ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
 {
 	struct adapter	*adapter = ifp->if_softc;
 	u16		index, bit;
 
 	if (ifp->if_softc !=  arg)   /* Not our event */
 		return;
 
 	if ((vtag == 0) || (vtag > 4095))	/* Invalid */
 		return;
 
 	IXGBE_CORE_LOCK(adapter);
 	index = (vtag >> 5) & 0x7F;
 	bit = vtag & 0x1F;
 	adapter->shadow_vfta[index] |= (1 << bit);
 	++adapter->num_vlans;
 	ixgbe_setup_vlan_hw_support(adapter);
 	IXGBE_CORE_UNLOCK(adapter);
 }
 
 /*
 ** This routine is run via an vlan
 ** unconfig EVENT, remove our entry
 ** in the soft vfta.
 */
 static void
 ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
 {
 	struct adapter	*adapter = ifp->if_softc;
 	u16		index, bit;
 
 	if (ifp->if_softc !=  arg)
 		return;
 
 	if ((vtag == 0) || (vtag > 4095))	/* Invalid */
 		return;
 
 	IXGBE_CORE_LOCK(adapter);
 	index = (vtag >> 5) & 0x7F;
 	bit = vtag & 0x1F;
 	adapter->shadow_vfta[index] &= ~(1 << bit);
 	--adapter->num_vlans;
 	/* Re-init to load the changes */
 	ixgbe_setup_vlan_hw_support(adapter);
 	IXGBE_CORE_UNLOCK(adapter);
 }
 
 static void
 ixgbe_setup_vlan_hw_support(struct adapter *adapter)
 {
 	struct ifnet 	*ifp = adapter->ifp;
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct rx_ring	*rxr;
 	u32		ctrl;
 
 
 	/*
 	** We get here thru init_locked, meaning
 	** a soft reset, this has already cleared
 	** the VFTA and other state, so if there
 	** have been no vlan's registered do nothing.
 	*/
 	if (adapter->num_vlans == 0)
 		return;
 
 	/* Setup the queues for vlans */
 	for (int i = 0; i < adapter->num_queues; i++) {
 		rxr = &adapter->rx_rings[i];
 		/* On 82599 the VLAN enable is per/queue in RXDCTL */
 		if (hw->mac.type != ixgbe_mac_82598EB) {
 			ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me));
 			ctrl |= IXGBE_RXDCTL_VME;
 			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxr->me), ctrl);
 		}
 		rxr->vtag_strip = TRUE;
 	}
 
 	if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)
 		return;
 	/*
 	** A soft reset zero's out the VFTA, so
 	** we need to repopulate it now.
 	*/
 	for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
 		if (adapter->shadow_vfta[i] != 0)
 			IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
 			    adapter->shadow_vfta[i]);
 
 	ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
 	/* Enable the Filter Table if enabled */
 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
 		ctrl &= ~IXGBE_VLNCTRL_CFIEN;
 		ctrl |= IXGBE_VLNCTRL_VFE;
 	}
 	if (hw->mac.type == ixgbe_mac_82598EB)
 		ctrl |= IXGBE_VLNCTRL_VME;
 	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
 }
 
 static void
 ixgbe_enable_intr(struct adapter *adapter)
 {
 	struct ixgbe_hw	*hw = &adapter->hw;
 	struct ix_queue	*que = adapter->queues;
 	u32		mask, fwsm;
 
 	mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
 	/* Enable Fan Failure detection */
 	if (hw->device_id == IXGBE_DEV_ID_82598AT)
 		    mask |= IXGBE_EIMS_GPI_SDP1;
 
 	switch (adapter->hw.mac.type) {
 		case ixgbe_mac_82599EB:
 			mask |= IXGBE_EIMS_ECC;
 			/* Temperature sensor on some adapters */
 			mask |= IXGBE_EIMS_GPI_SDP0;
 			/* SFP+ (RX_LOS_N & MOD_ABS_N) */
 			mask |= IXGBE_EIMS_GPI_SDP1;
 			mask |= IXGBE_EIMS_GPI_SDP2;
 #ifdef IXGBE_FDIR
 			mask |= IXGBE_EIMS_FLOW_DIR;
 #endif
 #ifdef PCI_IOV
 			mask |= IXGBE_EIMS_MAILBOX;
 #endif
 			break;
 		case ixgbe_mac_X540:
 			/* Detect if Thermal Sensor is enabled */
 			fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
 			if (fwsm & IXGBE_FWSM_TS_ENABLED)
 				mask |= IXGBE_EIMS_TS;
 			mask |= IXGBE_EIMS_ECC;
 #ifdef IXGBE_FDIR
 			mask |= IXGBE_EIMS_FLOW_DIR;
 #endif
 			break;
 		case ixgbe_mac_X550:
 		case ixgbe_mac_X550EM_x:
 			/* MAC thermal sensor is automatically enabled */
 			mask |= IXGBE_EIMS_TS;
 			/* Some devices use SDP0 for important information */
 			if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
 			    hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T)
 				mask |= IXGBE_EIMS_GPI_SDP0_BY_MAC(hw);
 			mask |= IXGBE_EIMS_ECC;
 #ifdef IXGBE_FDIR
 			mask |= IXGBE_EIMS_FLOW_DIR;
 #endif
 #ifdef PCI_IOV
 			mask |= IXGBE_EIMS_MAILBOX;
 #endif
 		/* falls through */
 		default:
 			break;
 	}
 
 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
 
 	/* With MSI-X we use auto clear */
 	if (adapter->msix_mem) {
 		mask = IXGBE_EIMS_ENABLE_MASK;
 		/* Don't autoclear Link */
 		mask &= ~IXGBE_EIMS_OTHER;
 		mask &= ~IXGBE_EIMS_LSC;
 #ifdef PCI_IOV
 		mask &= ~IXGBE_EIMS_MAILBOX;
 #endif
 		IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
 	}
 
 	/*
 	** Now enable all queues, this is done separately to
 	** allow for handling the extended (beyond 32) MSIX
 	** vectors that can be used by 82599
 	*/
         for (int i = 0; i < adapter->num_queues; i++, que++)
                 ixgbe_enable_queue(adapter, que->msix);
 
 	IXGBE_WRITE_FLUSH(hw);
 
 	return;
 }
 
 static void
 ixgbe_disable_intr(struct adapter *adapter)
 {
 	if (adapter->msix_mem)
 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
 	if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
 	} else {
 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
 	}
 	IXGBE_WRITE_FLUSH(&adapter->hw);
 	return;
 }
 
 /*
 ** Get the width and transaction speed of
 ** the slot this adapter is plugged into.
 */
 static void
 ixgbe_get_slot_info(struct adapter *adapter)
 {
 	device_t		dev = adapter->dev;
 	struct ixgbe_hw		*hw = &adapter->hw;
 	struct ixgbe_mac_info	*mac = &hw->mac;
 	u16			link;
 	u32			offset;
 
 	/* For most devices simply call the shared code routine */
 	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
 		ixgbe_get_bus_info(hw);
 		/* These devices don't use PCI-E */
 		switch (hw->mac.type) {
 		case ixgbe_mac_X550EM_x:
 			return;
 		default:
 			goto display;
 		}
 	}
 
 	/*
 	** For the Quad port adapter we need to parse back
 	** up the PCI tree to find the speed of the expansion
 	** slot into which this adapter is plugged. A bit more work.
 	*/
 	dev = device_get_parent(device_get_parent(dev));
 #ifdef IXGBE_DEBUG
 	device_printf(dev, "parent pcib = %x,%x,%x\n",
 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
 #endif
 	dev = device_get_parent(device_get_parent(dev));
 #ifdef IXGBE_DEBUG
 	device_printf(dev, "slot pcib = %x,%x,%x\n",
 	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
 #endif
 	/* Now get the PCI Express Capabilities offset */
 	pci_find_cap(dev, PCIY_EXPRESS, &offset);
 	/* ...and read the Link Status Register */
 	link = pci_read_config(dev, offset + PCIER_LINK_STA, 2);
 	switch (link & IXGBE_PCI_LINK_WIDTH) {
 	case IXGBE_PCI_LINK_WIDTH_1:
 		hw->bus.width = ixgbe_bus_width_pcie_x1;
 		break;
 	case IXGBE_PCI_LINK_WIDTH_2:
 		hw->bus.width = ixgbe_bus_width_pcie_x2;
 		break;
 	case IXGBE_PCI_LINK_WIDTH_4:
 		hw->bus.width = ixgbe_bus_width_pcie_x4;
 		break;
 	case IXGBE_PCI_LINK_WIDTH_8:
 		hw->bus.width = ixgbe_bus_width_pcie_x8;
 		break;
 	default:
 		hw->bus.width = ixgbe_bus_width_unknown;
 		break;
 	}
 
 	switch (link & IXGBE_PCI_LINK_SPEED) {
 	case IXGBE_PCI_LINK_SPEED_2500:
 		hw->bus.speed = ixgbe_bus_speed_2500;
 		break;
 	case IXGBE_PCI_LINK_SPEED_5000:
 		hw->bus.speed = ixgbe_bus_speed_5000;
 		break;
 	case IXGBE_PCI_LINK_SPEED_8000:
 		hw->bus.speed = ixgbe_bus_speed_8000;
 		break;
 	default:
 		hw->bus.speed = ixgbe_bus_speed_unknown;
 		break;
 	}
 
 	mac->ops.set_lan_id(hw);
 
 display:
 	device_printf(dev,"PCI Express Bus: Speed %s %s\n",
 	    ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s":
 	    (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s":
 	    (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s":"Unknown"),
 	    (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
 	    (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
 	    (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
 	    ("Unknown"));
 
 	if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) &&
 	    ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
 	    (hw->bus.speed == ixgbe_bus_speed_2500))) {
 		device_printf(dev, "PCI-Express bandwidth available"
 		    " for this card\n     is not sufficient for"
 		    " optimal performance.\n");
 		device_printf(dev, "For optimal performance a x8 "
 		    "PCIE, or x4 PCIE Gen2 slot is required.\n");
         }
 	if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) &&
 	    ((hw->bus.width <= ixgbe_bus_width_pcie_x8) &&
 	    (hw->bus.speed < ixgbe_bus_speed_8000))) {
 		device_printf(dev, "PCI-Express bandwidth available"
 		    " for this card\n     is not sufficient for"
 		    " optimal performance.\n");
 		device_printf(dev, "For optimal performance a x8 "
 		    "PCIE Gen3 slot is required.\n");
         }
 
 	return;
 }
 
 
 /*
 ** Setup the correct IVAR register for a particular MSIX interrupt
 **   (yes this is all very magic and confusing :)
 **  - entry is the register array entry
 **  - vector is the MSIX vector for this queue
 **  - type is RX/TX/MISC
 */
 static void
 ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 ivar, index;
 
 	vector |= IXGBE_IVAR_ALLOC_VAL;
 
 	switch (hw->mac.type) {
 
 	case ixgbe_mac_82598EB:
 		if (type == -1)
 			entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
 		else
 			entry += (type * 64);
 		index = (entry >> 2) & 0x1F;
 		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
 		ivar &= ~(0xFF << (8 * (entry & 0x3)));
 		ivar |= (vector << (8 * (entry & 0x3)));
 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
 		break;
 
 	case ixgbe_mac_82599EB:
 	case ixgbe_mac_X540:
 	case ixgbe_mac_X550:
 	case ixgbe_mac_X550EM_x:
 		if (type == -1) { /* MISC IVAR */
 			index = (entry & 1) * 8;
 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
 			ivar &= ~(0xFF << index);
 			ivar |= (vector << index);
 			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
 		} else {	/* RX/TX IVARS */
 			index = (16 * (entry & 1)) + (8 * type);
 			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
 			ivar &= ~(0xFF << index);
 			ivar |= (vector << index);
 			IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
 		}
 
 	default:
 		break;
 	}
 }
 
 static void
 ixgbe_configure_ivars(struct adapter *adapter)
 {
 	struct  ix_queue	*que = adapter->queues;
 	u32			newitr;
 
 	if (ixgbe_max_interrupt_rate > 0)
 		newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
 	else {
 		/*
 		** Disable DMA coalescing if interrupt moderation is
 		** disabled.
 		*/
 		adapter->dmac = 0;
 		newitr = 0;
 	}
 
         for (int i = 0; i < adapter->num_queues; i++, que++) {
 		struct rx_ring *rxr = &adapter->rx_rings[i];
 		struct tx_ring *txr = &adapter->tx_rings[i];
 		/* First the RX queue entry */
                 ixgbe_set_ivar(adapter, rxr->me, que->msix, 0);
 		/* ... and the TX */
 		ixgbe_set_ivar(adapter, txr->me, que->msix, 1);
 		/* Set an Initial EITR value */
                 IXGBE_WRITE_REG(&adapter->hw,
                     IXGBE_EITR(que->msix), newitr);
 	}
 
 	/* For the Link interrupt */
         ixgbe_set_ivar(adapter, 1, adapter->vector, -1);
 }
 
 /*
 ** ixgbe_sfp_probe - called in the local timer to
 ** determine if a port had optics inserted.
 */  
 static bool
 ixgbe_sfp_probe(struct adapter *adapter)
 {
 	struct ixgbe_hw	*hw = &adapter->hw;
 	device_t	dev = adapter->dev;
 	bool		result = FALSE;
 
 	if ((hw->phy.type == ixgbe_phy_nl) &&
 	    (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
 		s32 ret = hw->phy.ops.identify_sfp(hw);
 		if (ret)
                         goto out;
 		ret = hw->phy.ops.reset(hw);
 		if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
 			device_printf(dev, "Unsupported SFP+ module detected!");
 			device_printf(dev, "Reload driver with supported module.\n");
 			adapter->sfp_probe = FALSE;
                         goto out;
 		} else
 			device_printf(dev, "SFP+ module detected!\n");
 		/* We now have supported optics */
 		adapter->sfp_probe = FALSE;
 		/* Set the optics type so system reports correctly */
 		ixgbe_setup_optics(adapter);
 		result = TRUE;
 	}
 out:
 	return (result);
 }
 
 /*
 ** Tasklet handler for MSIX Link interrupts
 **  - do outside interrupt since it might sleep
 */
 static void
 ixgbe_handle_link(void *context, int pending)
 {
 	struct adapter  *adapter = context;
 	struct ixgbe_hw *hw = &adapter->hw;
 
 	ixgbe_check_link(hw,
 	    &adapter->link_speed, &adapter->link_up, 0);
 	ixgbe_update_link_status(adapter);
 
 	/* Re-enable link interrupts */
 	IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_LSC);
 }
 
 /*
 ** Tasklet for handling SFP module interrupts
 */
 static void
 ixgbe_handle_mod(void *context, int pending)
 {
 	struct adapter  *adapter = context;
 	struct ixgbe_hw *hw = &adapter->hw;
+	enum ixgbe_phy_type orig_type = hw->phy.type;
 	device_t	dev = adapter->dev;
 	u32 err;
 
+	IXGBE_CORE_LOCK(adapter);
+
+	/* Check to see if the PHY type changed */
+	if (hw->phy.ops.identify) {
+		hw->phy.type = ixgbe_phy_unknown;
+		hw->phy.ops.identify(hw);
+	}
+
+	if (hw->phy.type != orig_type) {
+		device_printf(dev, "Detected phy_type %d\n", hw->phy.type);
+
+		if (hw->phy.type == ixgbe_phy_none) {
+			hw->phy.sfp_type = ixgbe_sfp_type_unknown;
+			goto out;
+		}
+
+		/* Try to do the initialization that was skipped before */
+		if (hw->phy.ops.init)
+			hw->phy.ops.init(hw);
+		if (hw->phy.ops.reset)
+			hw->phy.ops.reset(hw);
+	}
+
 	err = hw->phy.ops.identify_sfp(hw);
 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
 		device_printf(dev,
 		    "Unsupported SFP+ module type was detected.\n");
-		return;
+		goto out;
 	}
 
 	err = hw->mac.ops.setup_sfp(hw);
 	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
 		device_printf(dev,
 		    "Setup failure - unsupported SFP+ module type.\n");
-		return;
+		goto out;
 	}
-	taskqueue_enqueue(adapter->tq, &adapter->msf_task);
+	if (hw->phy.multispeed_fiber)
+		taskqueue_enqueue(adapter->tq, &adapter->msf_task);
+out:
+	/* Update media type */
+	switch (hw->mac.ops.get_media_type(hw)) {
+		case ixgbe_media_type_fiber:
+			adapter->optics = IFM_10G_SR;
+			break;
+		case ixgbe_media_type_copper:
+			adapter->optics = IFM_10G_TWINAX;
+			break;
+		case ixgbe_media_type_cx4:
+			adapter->optics = IFM_10G_CX4;
+			break;
+		default:
+			adapter->optics = 0;
+			break;
+	}
+
+	IXGBE_CORE_UNLOCK(adapter);
 	return;
 }
 
 
 /*
 ** Tasklet for handling MSF (multispeed fiber) interrupts
 */
 static void
 ixgbe_handle_msf(void *context, int pending)
 {
 	struct adapter  *adapter = context;
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 autoneg;
 	bool negotiate;
 
+	IXGBE_CORE_LOCK(adapter);
 	/* get_supported_phy_layer will call hw->phy.ops.identify_sfp() */
 	adapter->phy_layer = ixgbe_get_supported_physical_layer(hw);
 
 	autoneg = hw->phy.autoneg_advertised;
 	if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
 		hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
 	if (hw->mac.ops.setup_link)
 		hw->mac.ops.setup_link(hw, autoneg, TRUE);
 
 	/* Adjust media types shown in ifconfig */
 	ifmedia_removeall(&adapter->media);
 	ixgbe_add_media_types(adapter);
+	IXGBE_CORE_UNLOCK(adapter);
 	return;
 }
 
 /*
 ** Tasklet for handling interrupts from an external PHY
 */
 static void
 ixgbe_handle_phy(void *context, int pending)
 {
 	struct adapter  *adapter = context;
 	struct ixgbe_hw *hw = &adapter->hw;
 	int error;
 
 	error = hw->phy.ops.handle_lasi(hw);
 	if (error == IXGBE_ERR_OVERTEMP)
 		device_printf(adapter->dev,
 		    "CRITICAL: EXTERNAL PHY OVER TEMP!! "
 		    " PHY will downshift to lower power state!\n");
 	else if (error)
 		device_printf(adapter->dev,
 		    "Error handling LASI interrupt: %d\n",
 		    error);
 	return;
 }
 
 #ifdef IXGBE_FDIR
 /*
 ** Tasklet for reinitializing the Flow Director filter table
 */
 static void
 ixgbe_reinit_fdir(void *context, int pending)
 {
 	struct adapter  *adapter = context;
 	struct ifnet   *ifp = adapter->ifp;
 
 	if (adapter->fdir_reinit != 1) /* Shouldn't happen */
 		return;
 	ixgbe_reinit_fdir_tables_82599(&adapter->hw);
 	adapter->fdir_reinit = 0;
 	/* re-enable flow director interrupts */
 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
 	/* Restart the interface */
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
 	return;
 }
 #endif
 
 /*********************************************************************
  *
  *  Configure DMA Coalescing
  *
  **********************************************************************/
 static void
 ixgbe_config_dmac(struct adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct ixgbe_dmac_config *dcfg = &hw->mac.dmac_config;
 
 	if (hw->mac.type < ixgbe_mac_X550 ||
 	    !hw->mac.ops.dmac_config)
 		return;
 
 	if (dcfg->watchdog_timer ^ adapter->dmac ||
 	    dcfg->link_speed ^ adapter->link_speed) {
 		dcfg->watchdog_timer = adapter->dmac;
 		dcfg->fcoe_en = false;
 		dcfg->link_speed = adapter->link_speed;
 		dcfg->num_tcs = 1;
 		
 		INIT_DEBUGOUT2("dmac settings: watchdog %d, link speed %d\n",
 		    dcfg->watchdog_timer, dcfg->link_speed);
 
 		hw->mac.ops.dmac_config(hw);
 	}
 }
 
 /*
  * Checks whether the adapter's ports are capable of
  * Wake On LAN by reading the adapter's NVM.
  *
  * Sets each port's hw->wol_enabled value depending
  * on the value read here.
  */
 static void
 ixgbe_check_wol_support(struct adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	u16 dev_caps = 0;
 
 	/* Find out WoL support for port */
 	adapter->wol_support = hw->wol_enabled = 0;
 	ixgbe_get_device_caps(hw, &dev_caps);
 	if ((dev_caps & IXGBE_DEVICE_CAPS_WOL_PORT0_1) ||
 	    ((dev_caps & IXGBE_DEVICE_CAPS_WOL_PORT0) &&
 	      hw->bus.func == 0))
 		adapter->wol_support = hw->wol_enabled = 1;
 
 	/* Save initial wake up filter configuration */
 	adapter->wufc = IXGBE_READ_REG(hw, IXGBE_WUFC);
 
 	return;
 }
 
 /*
  * Prepare the adapter/port for LPLU and/or WoL
  */
 static int
 ixgbe_setup_low_power_mode(struct adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	device_t dev = adapter->dev;
 	s32 error = 0;
 
 	mtx_assert(&adapter->core_mtx, MA_OWNED);
 
 	/* Limit power management flow to X550EM baseT */
 	if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T
 	    && hw->phy.ops.enter_lplu) {
 		/* Turn off support for APM wakeup. (Using ACPI instead) */
 		IXGBE_WRITE_REG(hw, IXGBE_GRC,
 		    IXGBE_READ_REG(hw, IXGBE_GRC) & ~(u32)2);
 
 		/*
 		 * Clear Wake Up Status register to prevent any previous wakeup
 		 * events from waking us up immediately after we suspend.
 		 */
 		IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff);
 
 		/*
 		 * Program the Wakeup Filter Control register with user filter
 		 * settings
 		 */
 		IXGBE_WRITE_REG(hw, IXGBE_WUFC, adapter->wufc);
 
 		/* Enable wakeups and power management in Wakeup Control */
 		IXGBE_WRITE_REG(hw, IXGBE_WUC,
 		    IXGBE_WUC_WKEN | IXGBE_WUC_PME_EN);
 
 		/* X550EM baseT adapters need a special LPLU flow */
 		hw->phy.reset_disable = true;
 		ixgbe_stop(adapter);
 		error = hw->phy.ops.enter_lplu(hw);
 		if (error)
 			device_printf(dev,
 			    "Error entering LPLU: %d\n", error);
 		hw->phy.reset_disable = false;
 	} else {
 		/* Just stop for other adapters */
 		ixgbe_stop(adapter);
 	}
 
 	return error;
 }
 
 /**********************************************************************
  *
  *  Update the board statistics counters.
  *
  **********************************************************************/
 static void
 ixgbe_update_stats_counters(struct adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 missed_rx = 0, bprc, lxon, lxoff, total;
 	u64 total_missed_rx = 0;
 
 	adapter->stats.pf.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
 	adapter->stats.pf.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
 	adapter->stats.pf.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
 	adapter->stats.pf.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
 
 	for (int i = 0; i < 16; i++) {
 		adapter->stats.pf.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
 		adapter->stats.pf.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
 		adapter->stats.pf.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
 	}
 	adapter->stats.pf.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
 	adapter->stats.pf.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
 	adapter->stats.pf.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
 
 	/* Hardware workaround, gprc counts missed packets */
 	adapter->stats.pf.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
 	adapter->stats.pf.gprc -= missed_rx;
 
 	if (hw->mac.type != ixgbe_mac_82598EB) {
 		adapter->stats.pf.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
 		    ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
 		adapter->stats.pf.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
 		    ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
 		adapter->stats.pf.tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
 		    ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
 		adapter->stats.pf.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
 		adapter->stats.pf.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
 	} else {
 		adapter->stats.pf.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
 		adapter->stats.pf.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
 		/* 82598 only has a counter in the high register */
 		adapter->stats.pf.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
 		adapter->stats.pf.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
 		adapter->stats.pf.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
 	}
 
 	/*
 	 * Workaround: mprc hardware is incorrectly counting
 	 * broadcasts, so for now we subtract those.
 	 */
 	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
 	adapter->stats.pf.bprc += bprc;
 	adapter->stats.pf.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
 	if (hw->mac.type == ixgbe_mac_82598EB)
 		adapter->stats.pf.mprc -= bprc;
 
 	adapter->stats.pf.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
 	adapter->stats.pf.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
 	adapter->stats.pf.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
 	adapter->stats.pf.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
 	adapter->stats.pf.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
 	adapter->stats.pf.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
 
 	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
 	adapter->stats.pf.lxontxc += lxon;
 	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
 	adapter->stats.pf.lxofftxc += lxoff;
 	total = lxon + lxoff;
 
 	adapter->stats.pf.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
 	adapter->stats.pf.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
 	adapter->stats.pf.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
 	adapter->stats.pf.gptc -= total;
 	adapter->stats.pf.mptc -= total;
 	adapter->stats.pf.ptc64 -= total;
 	adapter->stats.pf.gotc -= total * ETHER_MIN_LEN;
 
 	adapter->stats.pf.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
 	adapter->stats.pf.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
 	adapter->stats.pf.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
 	adapter->stats.pf.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
 	adapter->stats.pf.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
 	adapter->stats.pf.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
 	adapter->stats.pf.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
 	adapter->stats.pf.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
 	adapter->stats.pf.tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
 	adapter->stats.pf.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
 	adapter->stats.pf.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
 	adapter->stats.pf.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
 	adapter->stats.pf.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
 	adapter->stats.pf.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
 	adapter->stats.pf.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
 	adapter->stats.pf.xec += IXGBE_READ_REG(hw, IXGBE_XEC);
 	adapter->stats.pf.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
 	adapter->stats.pf.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
 	/* Only read FCOE on 82599 */
 	if (hw->mac.type != ixgbe_mac_82598EB) {
 		adapter->stats.pf.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
 		adapter->stats.pf.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
 		adapter->stats.pf.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
 		adapter->stats.pf.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
 		adapter->stats.pf.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
 	}
 
 	/* Fill out the OS statistics structure */
 	IXGBE_SET_IPACKETS(adapter, adapter->stats.pf.gprc);
 	IXGBE_SET_OPACKETS(adapter, adapter->stats.pf.gptc);
 	IXGBE_SET_IBYTES(adapter, adapter->stats.pf.gorc);
 	IXGBE_SET_OBYTES(adapter, adapter->stats.pf.gotc);
 	IXGBE_SET_IMCASTS(adapter, adapter->stats.pf.mprc);
 	IXGBE_SET_OMCASTS(adapter, adapter->stats.pf.mptc);
 	IXGBE_SET_COLLISIONS(adapter, 0);
 	IXGBE_SET_IQDROPS(adapter, total_missed_rx);
 	IXGBE_SET_IERRORS(adapter, adapter->stats.pf.crcerrs
 	    + adapter->stats.pf.rlec);
 }
 
 #if __FreeBSD_version >= 1100036
 static uint64_t
 ixgbe_get_counter(struct ifnet *ifp, ift_counter cnt)
 {
 	struct adapter *adapter;
 	struct tx_ring *txr;
 	uint64_t rv;
 
 	adapter = if_getsoftc(ifp);
 
 	switch (cnt) {
 	case IFCOUNTER_IPACKETS:
 		return (adapter->ipackets);
 	case IFCOUNTER_OPACKETS:
 		return (adapter->opackets);
 	case IFCOUNTER_IBYTES:
 		return (adapter->ibytes);
 	case IFCOUNTER_OBYTES:
 		return (adapter->obytes);
 	case IFCOUNTER_IMCASTS:
 		return (adapter->imcasts);
 	case IFCOUNTER_OMCASTS:
 		return (adapter->omcasts);
 	case IFCOUNTER_COLLISIONS:
 		return (0);
 	case IFCOUNTER_IQDROPS:
 		return (adapter->iqdrops);
 	case IFCOUNTER_OQDROPS:
 		rv = 0;
 		txr = adapter->tx_rings;
 		for (int i = 0; i < adapter->num_queues; i++, txr++)
 			rv += txr->br->br_drops;
 		return (rv);
 	case IFCOUNTER_IERRORS:
 		return (adapter->ierrors);
 	default:
 		return (if_get_counter_default(ifp, cnt));
 	}
 }
 #endif
 
 /** ixgbe_sysctl_tdh_handler - Handler function
  *  Retrieves the TDH value from the hardware
  */
 static int 
 ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 
 	struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
 	if (!txr) return 0;
 
 	unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me));
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error || !req->newptr)
 		return error;
 	return 0;
 }
 
 /** ixgbe_sysctl_tdt_handler - Handler function
  *  Retrieves the TDT value from the hardware
  */
 static int 
 ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 
 	struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
 	if (!txr) return 0;
 
 	unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me));
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error || !req->newptr)
 		return error;
 	return 0;
 }
 
 /** ixgbe_sysctl_rdh_handler - Handler function
  *  Retrieves the RDH value from the hardware
  */
 static int 
 ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 
 	struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
 	if (!rxr) return 0;
 
 	unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me));
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error || !req->newptr)
 		return error;
 	return 0;
 }
 
 /** ixgbe_sysctl_rdt_handler - Handler function
  *  Retrieves the RDT value from the hardware
  */
 static int 
 ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 
 	struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
 	if (!rxr) return 0;
 
 	unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me));
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error || !req->newptr)
 		return error;
 	return 0;
 }
 
 static int
 ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	struct ix_queue *que = ((struct ix_queue *)oidp->oid_arg1);
 	unsigned int reg, usec, rate;
 
 	reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
 	usec = ((reg & 0x0FF8) >> 3);
 	if (usec > 0)
 		rate = 500000 / usec;
 	else
 		rate = 0;
 	error = sysctl_handle_int(oidp, &rate, 0, req);
 	if (error || !req->newptr)
 		return error;
 	reg &= ~0xfff; /* default, no limitation */
 	ixgbe_max_interrupt_rate = 0;
 	if (rate > 0 && rate < 500000) {
 		if (rate < 1000)
 			rate = 1000;
 		ixgbe_max_interrupt_rate = rate;
 		reg |= ((4000000/rate) & 0xff8 );
 	}
 	IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
 	return 0;
 }
 
 static void
 ixgbe_add_device_sysctls(struct adapter *adapter)
 {
 	device_t dev = adapter->dev;
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct sysctl_oid_list *child;
 	struct sysctl_ctx_list *ctx;
 
 	ctx = device_get_sysctl_ctx(dev);
 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
 
 	/* Sysctls for all devices */
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "fc",
 			CTLTYPE_INT | CTLFLAG_RW, adapter, 0,
 			ixgbe_set_flowcntl, "I", IXGBE_SYSCTL_DESC_SET_FC);
 
         SYSCTL_ADD_INT(ctx, child, OID_AUTO, "enable_aim",
 			CTLFLAG_RW,
 			&ixgbe_enable_aim, 1, "Interrupt Moderation");
 
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "advertise_speed",
 			CTLTYPE_INT | CTLFLAG_RW, adapter, 0,
 			ixgbe_set_advertise, "I", IXGBE_SYSCTL_DESC_ADV_SPEED);
 
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "thermal_test",
 			CTLTYPE_INT | CTLFLAG_RW, adapter, 0,
 			ixgbe_sysctl_thermal_test, "I", "Thermal Test");
 
 #ifdef IXGBE_DEBUG
 	/* testing sysctls (for all devices) */
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "power_state",
 			CTLTYPE_INT | CTLFLAG_RW, adapter, 0,
 			ixgbe_sysctl_power_state, "I", "PCI Power State");
 
 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "print_rss_config",
 			CTLTYPE_STRING | CTLFLAG_RD, adapter, 0,
 			ixgbe_sysctl_print_rss_config, "A", "Prints RSS Configuration");
 #endif
 	/* for X550 series devices */
 	if (hw->mac.type >= ixgbe_mac_X550)
 		SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "dmac",
 				CTLTYPE_INT | CTLFLAG_RW, adapter, 0,
 				ixgbe_sysctl_dmac, "I", "DMA Coalesce");
 
 	/* for X552 backplane devices */
 	if (hw->device_id == IXGBE_DEV_ID_X550EM_X_KR) {
 		struct sysctl_oid *eee_node;
 		struct sysctl_oid_list *eee_list;
 
 		eee_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "eee",
 					   CTLFLAG_RD, NULL,
 					   "Energy Efficient Ethernet sysctls");
 		eee_list = SYSCTL_CHILDREN(eee_node);
 
 		SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "enable",
 				CTLTYPE_INT | CTLFLAG_RW, adapter, 0,
 				ixgbe_sysctl_eee_enable, "I",
 				"Enable or Disable EEE");
 
 		SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "negotiated",
 				CTLTYPE_INT | CTLFLAG_RD, adapter, 0,
 				ixgbe_sysctl_eee_negotiated, "I",
 				"EEE negotiated on link");
 
 		SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "tx_lpi_status",
 				CTLTYPE_INT | CTLFLAG_RD, adapter, 0,
 				ixgbe_sysctl_eee_tx_lpi_status, "I",
 				"Whether or not TX link is in LPI state");
 
 		SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "rx_lpi_status",
 				CTLTYPE_INT | CTLFLAG_RD, adapter, 0,
 				ixgbe_sysctl_eee_rx_lpi_status, "I",
 				"Whether or not RX link is in LPI state");
 
 		SYSCTL_ADD_PROC(ctx, eee_list, OID_AUTO, "tx_lpi_delay",
 				CTLTYPE_INT | CTLFLAG_RD, adapter, 0,
 				ixgbe_sysctl_eee_tx_lpi_delay, "I",
 				"TX LPI entry delay in microseconds");
 	}
 
 	/* for WoL-capable devices */
 	if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) {
 		SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "wol_enable",
 				CTLTYPE_INT | CTLFLAG_RW, adapter, 0,
 				ixgbe_sysctl_wol_enable, "I",
 				"Enable/Disable Wake on LAN");
 
 		SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "wufc",
 				CTLTYPE_INT | CTLFLAG_RW, adapter, 0,
 				ixgbe_sysctl_wufc, "I",
 				"Enable/Disable Wake Up Filters");
 	}
 
 	/* for X552/X557-AT devices */
 	if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) {
 		struct sysctl_oid *phy_node;
 		struct sysctl_oid_list *phy_list;
 
 		phy_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "phy",
 					   CTLFLAG_RD, NULL,
 					   "External PHY sysctls");
 		phy_list = SYSCTL_CHILDREN(phy_node);
 
 		SYSCTL_ADD_PROC(ctx, phy_list, OID_AUTO, "temp",
 				CTLTYPE_INT | CTLFLAG_RD, adapter, 0,
 				ixgbe_sysctl_phy_temp, "I",
 				"Current External PHY Temperature (Celsius)");
 
 		SYSCTL_ADD_PROC(ctx, phy_list, OID_AUTO, "overtemp_occurred",
 				CTLTYPE_INT | CTLFLAG_RD, adapter, 0,
 				ixgbe_sysctl_phy_overtemp_occurred, "I",
 				"External PHY High Temperature Event Occurred");
 	}
 }
 
 /*
  * Add sysctl variables, one per statistic, to the system.
  */
 static void
 ixgbe_add_hw_stats(struct adapter *adapter)
 {
 	device_t dev = adapter->dev;
 
 	struct tx_ring *txr = adapter->tx_rings;
 	struct rx_ring *rxr = adapter->rx_rings;
 
 	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
 	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
 	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
 	struct ixgbe_hw_stats *stats = &adapter->stats.pf;
 
 	struct sysctl_oid *stat_node, *queue_node;
 	struct sysctl_oid_list *stat_list, *queue_list;
 
 #define QUEUE_NAME_LEN 32
 	char namebuf[QUEUE_NAME_LEN];
 
 	/* Driver Statistics */
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
 			CTLFLAG_RD, &adapter->dropped_pkts,
 			"Driver dropped packets");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_failed",
 			CTLFLAG_RD, &adapter->mbuf_defrag_failed,
 			"m_defrag() failed");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events",
 			CTLFLAG_RD, &adapter->watchdog_events,
 			"Watchdog timeouts");
 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
 			CTLFLAG_RD, &adapter->link_irq,
 			"Link MSIX IRQ Handled");
 
 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
 					    CTLFLAG_RD, NULL, "Queue Name");
 		queue_list = SYSCTL_CHILDREN(queue_node);
 
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
 				CTLTYPE_UINT | CTLFLAG_RW, &adapter->queues[i],
 				sizeof(&adapter->queues[i]),
 				ixgbe_sysctl_interrupt_rate_handler, "IU",
 				"Interrupt Rate");
 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs",
 				CTLFLAG_RD, &(adapter->queues[i].irqs),
 				"irqs on this queue");
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 
 				CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
 				ixgbe_sysctl_tdh_handler, "IU",
 				"Transmit Descriptor Head");
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 
 				CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
 				ixgbe_sysctl_tdt_handler, "IU",
 				"Transmit Descriptor Tail");
 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tso_tx",
 				CTLFLAG_RD, &txr->tso_tx,
 				"TSO");
 		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_tx_dma_setup",
 				CTLFLAG_RD, &txr->no_tx_dma_setup,
 				"Driver tx dma failure in xmit");
 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
 				CTLFLAG_RD, &txr->no_desc_avail,
 				"Queue No Descriptor Available");
 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
 				CTLFLAG_RD, &txr->total_packets,
 				"Queue Packets Transmitted");
 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "br_drops",
 				CTLFLAG_RD, &txr->br->br_drops,
 				"Packets dropped in buf_ring");
 	}
 
 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 
 					    CTLFLAG_RD, NULL, "Queue Name");
 		queue_list = SYSCTL_CHILDREN(queue_node);
 
 		struct lro_ctrl *lro = &rxr->lro;
 
 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 
 					    CTLFLAG_RD, NULL, "Queue Name");
 		queue_list = SYSCTL_CHILDREN(queue_node);
 
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 
 				CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr),
 				ixgbe_sysctl_rdh_handler, "IU",
 				"Receive Descriptor Head");
 		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 
 				CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr),
 				ixgbe_sysctl_rdt_handler, "IU",
 				"Receive Descriptor Tail");
 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets",
 				CTLFLAG_RD, &rxr->rx_packets,
 				"Queue Packets Received");
 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
 				CTLFLAG_RD, &rxr->rx_bytes,
 				"Queue Bytes Received");
 		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_copies",
 				CTLFLAG_RD, &rxr->rx_copies,
 				"Copied RX Frames");
 		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
 				CTLFLAG_RD, &lro->lro_queued, 0,
 				"LRO Queued");
 		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
 				CTLFLAG_RD, &lro->lro_flushed, 0,
 				"LRO Flushed");
 	}
 
 	/* MAC stats get the own sub node */
 
 	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 
 				    CTLFLAG_RD, NULL, "MAC Statistics");
 	stat_list = SYSCTL_CHILDREN(stat_node);
 
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
 			CTLFLAG_RD, &stats->crcerrs,
 			"CRC Errors");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
 			CTLFLAG_RD, &stats->illerrc,
 			"Illegal Byte Errors");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
 			CTLFLAG_RD, &stats->errbc,
 			"Byte Errors");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
 			CTLFLAG_RD, &stats->mspdc,
 			"MAC Short Packets Discarded");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
 			CTLFLAG_RD, &stats->mlfc,
 			"MAC Local Faults");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
 			CTLFLAG_RD, &stats->mrfc,
 			"MAC Remote Faults");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
 			CTLFLAG_RD, &stats->rlec,
 			"Receive Length Errors");
 
 	/* Flow Control stats */
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
 			CTLFLAG_RD, &stats->lxontxc,
 			"Link XON Transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
 			CTLFLAG_RD, &stats->lxonrxc,
 			"Link XON Received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
 			CTLFLAG_RD, &stats->lxofftxc,
 			"Link XOFF Transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
 			CTLFLAG_RD, &stats->lxoffrxc,
 			"Link XOFF Received");
 
 	/* Packet Reception Stats */
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
 			CTLFLAG_RD, &stats->tor, 
 			"Total Octets Received"); 
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
 			CTLFLAG_RD, &stats->gorc, 
 			"Good Octets Received"); 
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
 			CTLFLAG_RD, &stats->tpr,
 			"Total Packets Received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
 			CTLFLAG_RD, &stats->gprc,
 			"Good Packets Received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
 			CTLFLAG_RD, &stats->mprc,
 			"Multicast Packets Received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
 			CTLFLAG_RD, &stats->bprc,
 			"Broadcast Packets Received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
 			CTLFLAG_RD, &stats->prc64,
 			"64 byte frames received ");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
 			CTLFLAG_RD, &stats->prc127,
 			"65-127 byte frames received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
 			CTLFLAG_RD, &stats->prc255,
 			"128-255 byte frames received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
 			CTLFLAG_RD, &stats->prc511,
 			"256-511 byte frames received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
 			CTLFLAG_RD, &stats->prc1023,
 			"512-1023 byte frames received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
 			CTLFLAG_RD, &stats->prc1522,
 			"1023-1522 byte frames received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
 			CTLFLAG_RD, &stats->ruc,
 			"Receive Undersized");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
 			CTLFLAG_RD, &stats->rfc,
 			"Fragmented Packets Received ");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
 			CTLFLAG_RD, &stats->roc,
 			"Oversized Packets Received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
 			CTLFLAG_RD, &stats->rjc,
 			"Received Jabber");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
 			CTLFLAG_RD, &stats->mngprc,
 			"Management Packets Received");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
 			CTLFLAG_RD, &stats->mngptc,
 			"Management Packets Dropped");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
 			CTLFLAG_RD, &stats->xec,
 			"Checksum Errors");
 
 	/* Packet Transmission Stats */
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
 			CTLFLAG_RD, &stats->gotc, 
 			"Good Octets Transmitted"); 
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
 			CTLFLAG_RD, &stats->tpt,
 			"Total Packets Transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
 			CTLFLAG_RD, &stats->gptc,
 			"Good Packets Transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
 			CTLFLAG_RD, &stats->bptc,
 			"Broadcast Packets Transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
 			CTLFLAG_RD, &stats->mptc,
 			"Multicast Packets Transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
 			CTLFLAG_RD, &stats->mngptc,
 			"Management Packets Transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
 			CTLFLAG_RD, &stats->ptc64,
 			"64 byte frames transmitted ");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
 			CTLFLAG_RD, &stats->ptc127,
 			"65-127 byte frames transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
 			CTLFLAG_RD, &stats->ptc255,
 			"128-255 byte frames transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
 			CTLFLAG_RD, &stats->ptc511,
 			"256-511 byte frames transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
 			CTLFLAG_RD, &stats->ptc1023,
 			"512-1023 byte frames transmitted");
 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
 			CTLFLAG_RD, &stats->ptc1522,
 			"1024-1522 byte frames transmitted");
 }
 
 static void
 ixgbe_set_sysctl_value(struct adapter *adapter, const char *name,
     const char *description, int *limit, int value)
 {
 	*limit = value;
 	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
 	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
 	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
 }
 
 /*
 ** Set flow control using sysctl:
 ** Flow control values:
 ** 	0 - off
 **	1 - rx pause
 **	2 - tx pause
 **	3 - full
 */
 static int
 ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS)
 {
 	int error, last;
 	struct adapter *adapter = (struct adapter *) arg1;
 
 	last = adapter->fc;
 	error = sysctl_handle_int(oidp, &adapter->fc, 0, req);
 	if ((error) || (req->newptr == NULL))
 		return (error);
 
 	/* Don't bother if it's not changed */
 	if (adapter->fc == last)
 		return (0);
 
 	switch (adapter->fc) {
 		case ixgbe_fc_rx_pause:
 		case ixgbe_fc_tx_pause:
 		case ixgbe_fc_full:
 			adapter->hw.fc.requested_mode = adapter->fc;
 			if (adapter->num_queues > 1)
 				ixgbe_disable_rx_drop(adapter);
 			break;
 		case ixgbe_fc_none:
 			adapter->hw.fc.requested_mode = ixgbe_fc_none;
 			if (adapter->num_queues > 1)
 				ixgbe_enable_rx_drop(adapter);
 			break;
 		default:
 			adapter->fc = last;
 			return (EINVAL);
 	}
 	/* Don't autoneg if forcing a value */
 	adapter->hw.fc.disable_fc_autoneg = TRUE;
 	ixgbe_fc_enable(&adapter->hw);
 	return error;
 }
 
 /*
 ** Control advertised link speed:
 **	Flags:
 **	0x1 - advertise 100 Mb
 **	0x2 - advertise 1G
 **	0x4 - advertise 10G
 */
 static int
 ixgbe_set_advertise(SYSCTL_HANDLER_ARGS)
 {
 	int			error = 0, requested;
 	struct adapter		*adapter;
 	device_t		dev;
 	struct ixgbe_hw		*hw;
 	ixgbe_link_speed	speed = 0;
 
 	adapter = (struct adapter *) arg1;
 	dev = adapter->dev;
 	hw = &adapter->hw;
 
 	requested = adapter->advertise;
 	error = sysctl_handle_int(oidp, &requested, 0, req);
 	if ((error) || (req->newptr == NULL))
 		return (error);
 
 	/* No speed changes for backplane media */
 	if (hw->phy.media_type == ixgbe_media_type_backplane)
 		return (ENODEV);
 
 	/* Checks to validate new value */
 	if (adapter->advertise == requested) /* no change */
 		return (0);
 
 	if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
 	    (hw->phy.multispeed_fiber))) {
 		device_printf(dev,
 		    "Advertised speed can only be set on copper or "
 		    "multispeed fiber media types.\n");
 		return (EINVAL);
 	}
 
 	if (requested < 0x1 || requested > 0x7) {
 		device_printf(dev,
 		    "Invalid advertised speed; valid modes are 0x1 through 0x7\n");
 		return (EINVAL);
 	}
 
 	if ((requested & 0x1)
 	    && (hw->mac.type != ixgbe_mac_X540)
 	    && (hw->mac.type != ixgbe_mac_X550)) {
 		device_printf(dev, "Set Advertise: 100Mb on X540/X550 only\n");
 		return (EINVAL);
 	}
 
 	/* Set new value and report new advertised mode */
 	if (requested & 0x1)
 		speed |= IXGBE_LINK_SPEED_100_FULL;
 	if (requested & 0x2)
 		speed |= IXGBE_LINK_SPEED_1GB_FULL;
 	if (requested & 0x4)
 		speed |= IXGBE_LINK_SPEED_10GB_FULL;
 
 	hw->mac.autotry_restart = TRUE;
 	hw->mac.ops.setup_link(hw, speed, TRUE);
 	adapter->advertise = requested;
 
 	return (error);
 }
 
 /*
  * The following two sysctls are for X552/X557-AT devices;
  * they deal with the external PHY used in them.
  */
 static int
 ixgbe_sysctl_phy_temp(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter	*adapter = (struct adapter *) arg1;
 	struct ixgbe_hw *hw = &adapter->hw;
 	u16 reg;
 
 	if (hw->device_id != IXGBE_DEV_ID_X550EM_X_10G_T) {
 		device_printf(adapter->dev,
 		    "Device has no supported external thermal sensor.\n");
 		return (ENODEV);
 	}
 
 	if (hw->phy.ops.read_reg(hw, IXGBE_PHY_CURRENT_TEMP,
 				      IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
 				      &reg)) {
 		device_printf(adapter->dev,
 		    "Error reading from PHY's current temperature register\n");
 		return (EAGAIN);
 	}
 
 	/* Shift temp for output */
 	reg = reg >> 8;
 
 	return (sysctl_handle_int(oidp, NULL, reg, req));
 }
 
 /*
  * Reports whether the current PHY temperature is over
  * the overtemp threshold.
  *  - This is reported directly from the PHY
  */
 static int
 ixgbe_sysctl_phy_overtemp_occurred(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter	*adapter = (struct adapter *) arg1;
 	struct ixgbe_hw *hw = &adapter->hw;
 	u16 reg;
 
 	if (hw->device_id != IXGBE_DEV_ID_X550EM_X_10G_T) {
 		device_printf(adapter->dev,
 		    "Device has no supported external thermal sensor.\n");
 		return (ENODEV);
 	}
 
 	if (hw->phy.ops.read_reg(hw, IXGBE_PHY_OVERTEMP_STATUS,
 				      IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
 				      &reg)) {
 		device_printf(adapter->dev,
 		    "Error reading from PHY's temperature status register\n");
 		return (EAGAIN);
 	}
 
 	/* Get occurrence bit */
 	reg = !!(reg & 0x4000);
 	return (sysctl_handle_int(oidp, 0, reg, req));
 }
 
 /*
 ** Thermal Shutdown Trigger (internal MAC)
 **   - Set this to 1 to cause an overtemp event to occur
 */
 static int
 ixgbe_sysctl_thermal_test(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter	*adapter = (struct adapter *) arg1;
 	struct ixgbe_hw *hw = &adapter->hw;
 	int error, fire = 0;
 
 	error = sysctl_handle_int(oidp, &fire, 0, req);
 	if ((error) || (req->newptr == NULL))
 		return (error);
 
 	if (fire) {
 		u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS);
 		reg |= IXGBE_EICR_TS;
 		IXGBE_WRITE_REG(hw, IXGBE_EICS, reg);
 	}
 
 	return (0);
 }
 
 /*
 ** Manage DMA Coalescing.
 ** Control values:
 ** 	0/1 - off / on (use default value of 1000)
 **
 **	Legal timer values are:
 **	50,100,250,500,1000,2000,5000,10000
 **
 **	Turning off interrupt moderation will also turn this off.
 */
 static int
 ixgbe_sysctl_dmac(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *adapter = (struct adapter *) arg1;
 	struct ifnet *ifp = adapter->ifp;
 	int		error;
 	u32		newval;
 
 	newval = adapter->dmac;
 	error = sysctl_handle_int(oidp, &newval, 0, req);
 	if ((error) || (req->newptr == NULL))
 		return (error);
 
 	switch (newval) {
 	case 0:
 		/* Disabled */
 		adapter->dmac = 0;
 		break;
 	case 1:
 		/* Enable and use default */
 		adapter->dmac = 1000;
 		break;
 	case 50:
 	case 100:
 	case 250:
 	case 500:
 	case 1000:
 	case 2000:
 	case 5000:
 	case 10000:
 		/* Legal values - allow */
 		adapter->dmac = newval;
 		break;
 	default:
 		/* Do nothing, illegal value */
 		return (EINVAL);
 	}
 
 	/* Re-initialize hardware if it's already running */
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 		ixgbe_init(adapter);
 
 	return (0);
 }
 
 #ifdef IXGBE_DEBUG
 /**
  * Sysctl to test power states
  * Values:
  *	0      - set device to D0
  *	3      - set device to D3
  *	(none) - get current device power state
  */
 static int
 ixgbe_sysctl_power_state(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *adapter = (struct adapter *) arg1;
 	device_t dev =  adapter->dev;
 	int curr_ps, new_ps, error = 0;
 
 	curr_ps = new_ps = pci_get_powerstate(dev);
 
 	error = sysctl_handle_int(oidp, &new_ps, 0, req);
 	if ((error) || (req->newptr == NULL))
 		return (error);
 
 	if (new_ps == curr_ps)
 		return (0);
 
 	if (new_ps == 3 && curr_ps == 0)
 		error = DEVICE_SUSPEND(dev);
 	else if (new_ps == 0 && curr_ps == 3)
 		error = DEVICE_RESUME(dev);
 	else
 		return (EINVAL);
 
 	device_printf(dev, "New state: %d\n", pci_get_powerstate(dev));
 
 	return (error);
 }
 #endif
 /*
  * Sysctl to enable/disable the WoL capability, if supported by the adapter.
  * Values:
  *	0 - disabled
  *	1 - enabled
  */
 static int
 ixgbe_sysctl_wol_enable(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *adapter = (struct adapter *) arg1;
 	struct ixgbe_hw *hw = &adapter->hw;
 	int new_wol_enabled;
 	int error = 0;
 
 	new_wol_enabled = hw->wol_enabled;
 	error = sysctl_handle_int(oidp, &new_wol_enabled, 0, req);
 	if ((error) || (req->newptr == NULL))
 		return (error);
 	new_wol_enabled = !!(new_wol_enabled);
 	if (new_wol_enabled == hw->wol_enabled)
 		return (0);
 
 	if (new_wol_enabled > 0 && !adapter->wol_support)
 		return (ENODEV);
 	else
 		hw->wol_enabled = new_wol_enabled;
 
 	return (0);
 }
 
 /*
  * Sysctl to enable/disable the Energy Efficient Ethernet capability,
  * if supported by the adapter.
  * Values:
  *	0 - disabled
  *	1 - enabled
  */
 static int
 ixgbe_sysctl_eee_enable(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *adapter = (struct adapter *) arg1;
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct ifnet *ifp = adapter->ifp;
 	int new_eee_enabled, error = 0;
 
 	new_eee_enabled = adapter->eee_enabled;
 	error = sysctl_handle_int(oidp, &new_eee_enabled, 0, req);
 	if ((error) || (req->newptr == NULL))
 		return (error);
 	new_eee_enabled = !!(new_eee_enabled);
 	if (new_eee_enabled == adapter->eee_enabled)
 		return (0);
 
 	if (new_eee_enabled > 0 && !hw->mac.ops.setup_eee)
 		return (ENODEV);
 	else
 		adapter->eee_enabled = new_eee_enabled;
 
 	/* Re-initialize hardware if it's already running */
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
 		ixgbe_init(adapter);
 
 	return (0);
 }
 
 /*
  * Read-only sysctl indicating whether EEE support was negotiated
  * on the link.
  */
 static int
 ixgbe_sysctl_eee_negotiated(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *adapter = (struct adapter *) arg1;
 	struct ixgbe_hw *hw = &adapter->hw;
 	bool status;
 
 	status = !!(IXGBE_READ_REG(hw, IXGBE_EEE_STAT) & IXGBE_EEE_STAT_NEG);
 
 	return (sysctl_handle_int(oidp, 0, status, req));
 }
 
 /*
  * Read-only sysctl indicating whether RX Link is in LPI state.
  */
 static int
 ixgbe_sysctl_eee_rx_lpi_status(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *adapter = (struct adapter *) arg1;
 	struct ixgbe_hw *hw = &adapter->hw;
 	bool status;
 
 	status = !!(IXGBE_READ_REG(hw, IXGBE_EEE_STAT) &
 	    IXGBE_EEE_RX_LPI_STATUS);
 
 	return (sysctl_handle_int(oidp, 0, status, req));
 }
 
 /*
  * Read-only sysctl indicating whether TX Link is in LPI state.
  */
 static int
 ixgbe_sysctl_eee_tx_lpi_status(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *adapter = (struct adapter *) arg1;
 	struct ixgbe_hw *hw = &adapter->hw;
 	bool status;
 
 	status = !!(IXGBE_READ_REG(hw, IXGBE_EEE_STAT) &
 	    IXGBE_EEE_TX_LPI_STATUS);
 
 	return (sysctl_handle_int(oidp, 0, status, req));
 }
 
 /*
  * Read-only sysctl indicating TX Link LPI delay
  */
 static int
 ixgbe_sysctl_eee_tx_lpi_delay(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *adapter = (struct adapter *) arg1;
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 reg;
 
 	reg = IXGBE_READ_REG(hw, IXGBE_EEE_SU);
 
 	return (sysctl_handle_int(oidp, 0, reg >> 26, req));
 }
 
 /*
  * Sysctl to enable/disable the types of packets that the
  * adapter will wake up on upon receipt.
  * WUFC - Wake Up Filter Control
  * Flags:
  *	0x1  - Link Status Change
  *	0x2  - Magic Packet
  *	0x4  - Direct Exact
  *	0x8  - Directed Multicast
  *	0x10 - Broadcast
  *	0x20 - ARP/IPv4 Request Packet
  *	0x40 - Direct IPv4 Packet
  *	0x80 - Direct IPv6 Packet
  *
  * Setting another flag will cause the sysctl to return an
  * error.
  */
 static int
 ixgbe_sysctl_wufc(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *adapter = (struct adapter *) arg1;
 	int error = 0;
 	u32 new_wufc;
 
 	new_wufc = adapter->wufc;
 
 	error = sysctl_handle_int(oidp, &new_wufc, 0, req);
 	if ((error) || (req->newptr == NULL))
 		return (error);
 	if (new_wufc == adapter->wufc)
 		return (0);
 
 	if (new_wufc & 0xffffff00)
 		return (EINVAL);
 	else {
 		new_wufc &= 0xff;
 		new_wufc |= (0xffffff & adapter->wufc);
 		adapter->wufc = new_wufc;
 	}
 
 	return (0);
 }
 
 #ifdef IXGBE_DEBUG
 static int
 ixgbe_sysctl_print_rss_config(SYSCTL_HANDLER_ARGS)
 {
 	struct adapter *adapter = (struct adapter *)arg1;
 	struct ixgbe_hw *hw = &adapter->hw;
 	device_t dev = adapter->dev;
 	int error = 0, reta_size;
 	struct sbuf *buf;
 	u32 reg;
 
 	buf = sbuf_new_for_sysctl(NULL, NULL, 128, req);
 	if (!buf) {
 		device_printf(dev, "Could not allocate sbuf for output.\n");
 		return (ENOMEM);
 	}
 
 	// TODO: use sbufs to make a string to print out
 	/* Set multiplier for RETA setup and table size based on MAC */
 	switch (adapter->hw.mac.type) {
 	case ixgbe_mac_X550:
 	case ixgbe_mac_X550EM_x:
 		reta_size = 128;
 		break;
 	default:
 		reta_size = 32;
 		break;
 	}
 
 	/* Print out the redirection table */
 	sbuf_cat(buf, "\n");
 	for (int i = 0; i < reta_size; i++) {
 		if (i < 32) {
 			reg = IXGBE_READ_REG(hw, IXGBE_RETA(i));
 			sbuf_printf(buf, "RETA(%2d): 0x%08x\n", i, reg);
 		} else {
 			reg = IXGBE_READ_REG(hw, IXGBE_ERETA(i - 32));
 			sbuf_printf(buf, "ERETA(%2d): 0x%08x\n", i - 32, reg);
 		}
 	}
 
 	// TODO: print more config
 
 	error = sbuf_finish(buf);
 	if (error)
 		device_printf(dev, "Error finishing sbuf: %d\n", error);
 
 	sbuf_delete(buf);
 	return (0);
 }
 #endif /* IXGBE_DEBUG */
 
 /*
 ** Enable the hardware to drop packets when the buffer is
 ** full. This is useful when multiqueue,so that no single
 ** queue being full stalls the entire RX engine. We only
 ** enable this when Multiqueue AND when Flow Control is 
 ** disabled.
 */
 static void
 ixgbe_enable_rx_drop(struct adapter *adapter)
 {
         struct ixgbe_hw *hw = &adapter->hw;
 
 	for (int i = 0; i < adapter->num_queues; i++) {
 		struct rx_ring *rxr = &adapter->rx_rings[i];
         	u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxr->me));
         	srrctl |= IXGBE_SRRCTL_DROP_EN;
         	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxr->me), srrctl);
 	}
 #ifdef PCI_IOV
 	/* enable drop for each vf */
 	for (int i = 0; i < adapter->num_vfs; i++) {
 		IXGBE_WRITE_REG(hw, IXGBE_QDE,
 		    (IXGBE_QDE_WRITE | (i << IXGBE_QDE_IDX_SHIFT) |
 		    IXGBE_QDE_ENABLE));
 	}
 #endif
 }
 
 static void
 ixgbe_disable_rx_drop(struct adapter *adapter)
 {
         struct ixgbe_hw *hw = &adapter->hw;
 
 	for (int i = 0; i < adapter->num_queues; i++) {
 		struct rx_ring *rxr = &adapter->rx_rings[i];
         	u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxr->me));
         	srrctl &= ~IXGBE_SRRCTL_DROP_EN;
         	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxr->me), srrctl);
 	}
 #ifdef PCI_IOV
 	/* disable drop for each vf */
 	for (int i = 0; i < adapter->num_vfs; i++) {
 		IXGBE_WRITE_REG(hw, IXGBE_QDE,
 		    (IXGBE_QDE_WRITE | (i << IXGBE_QDE_IDX_SHIFT)));
 	}
 #endif
 }
 
 static void
 ixgbe_rearm_queues(struct adapter *adapter, u64 queues)
 {
 	u32 mask;
 
 	switch (adapter->hw.mac.type) {
 	case ixgbe_mac_82598EB:
 		mask = (IXGBE_EIMS_RTX_QUEUE & queues);
 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask);
 		break;
 	case ixgbe_mac_82599EB:
 	case ixgbe_mac_X540:
 	case ixgbe_mac_X550:
 	case ixgbe_mac_X550EM_x:
 		mask = (queues & 0xFFFFFFFF);
 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask);
 		mask = (queues >> 32);
 		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(1), mask);
 		break;
 	default:
 		break;
 	}
 }
 
 #ifdef PCI_IOV
 
 /*
 ** Support functions for SRIOV/VF management
 */
 
 static void
 ixgbe_ping_all_vfs(struct adapter *adapter)
 {
 	struct ixgbe_vf *vf;
 
 	for (int i = 0; i < adapter->num_vfs; i++) {
 		vf = &adapter->vfs[i];
 		if (vf->flags & IXGBE_VF_ACTIVE)
 			ixgbe_send_vf_msg(adapter, vf, IXGBE_PF_CONTROL_MSG);
 	}
 }
 
 
 static void
 ixgbe_vf_set_default_vlan(struct adapter *adapter, struct ixgbe_vf *vf,
     uint16_t tag)
 {
 	struct ixgbe_hw *hw;
 	uint32_t vmolr, vmvir;
 
 	hw = &adapter->hw;
 
 	vf->vlan_tag = tag;
 	
 	vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf->pool));
 
 	/* Do not receive packets that pass inexact filters. */
 	vmolr &= ~(IXGBE_VMOLR_ROMPE | IXGBE_VMOLR_ROPE);
 
 	/* Disable Multicast Promicuous Mode. */
 	vmolr &= ~IXGBE_VMOLR_MPE;
 
 	/* Accept broadcasts. */
 	vmolr |= IXGBE_VMOLR_BAM;
 
 	if (tag == 0) {
 		/* Accept non-vlan tagged traffic. */
 		//vmolr |= IXGBE_VMOLR_AUPE;
 
 		/* Allow VM to tag outgoing traffic; no default tag. */
 		vmvir = 0;
 	} else {
 		/* Require vlan-tagged traffic. */
 		vmolr &= ~IXGBE_VMOLR_AUPE;
 
 		/* Tag all traffic with provided vlan tag. */
 		vmvir = (tag | IXGBE_VMVIR_VLANA_DEFAULT);
 	}
 	IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf->pool), vmolr);
 	IXGBE_WRITE_REG(hw, IXGBE_VMVIR(vf->pool), vmvir);
 }
 
 
 static boolean_t
 ixgbe_vf_frame_size_compatible(struct adapter *adapter, struct ixgbe_vf *vf)
 {
 
 	/*
 	 * Frame size compatibility between PF and VF is only a problem on
 	 * 82599-based cards.  X540 and later support any combination of jumbo
 	 * frames on PFs and VFs.
 	 */
 	if (adapter->hw.mac.type != ixgbe_mac_82599EB)
 		return (TRUE);
 
 	switch (vf->api_ver) {
 	case IXGBE_API_VER_1_0:
 	case IXGBE_API_VER_UNKNOWN:
 		/*
 		 * On legacy (1.0 and older) VF versions, we don't support jumbo
 		 * frames on either the PF or the VF.
 		 */
 		if (adapter->max_frame_size > ETHER_MAX_LEN ||
 		    vf->max_frame_size > ETHER_MAX_LEN)
 		    return (FALSE);
 
 		return (TRUE);
 
 		break;
 	case IXGBE_API_VER_1_1:
 	default:
 		/*
 		 * 1.1 or later VF versions always work if they aren't using
 		 * jumbo frames.
 		 */
 		if (vf->max_frame_size <= ETHER_MAX_LEN)
 			return (TRUE);
 
 		/*
 		 * Jumbo frames only work with VFs if the PF is also using jumbo
 		 * frames.
 		 */
 		if (adapter->max_frame_size <= ETHER_MAX_LEN)
 			return (TRUE);
 
 		return (FALSE);
 	
 	}
 }
 
 
 static void
 ixgbe_process_vf_reset(struct adapter *adapter, struct ixgbe_vf *vf)
 {
 	ixgbe_vf_set_default_vlan(adapter, vf, vf->default_vlan);
 
 	// XXX clear multicast addresses
 
 	ixgbe_clear_rar(&adapter->hw, vf->rar_index);
 
 	vf->api_ver = IXGBE_API_VER_UNKNOWN;
 }
 
 
 static void
 ixgbe_vf_enable_transmit(struct adapter *adapter, struct ixgbe_vf *vf)
 {
 	struct ixgbe_hw *hw;
 	uint32_t vf_index, vfte;
 
 	hw = &adapter->hw;
 
 	vf_index = IXGBE_VF_INDEX(vf->pool);
 	vfte = IXGBE_READ_REG(hw, IXGBE_VFTE(vf_index));
 	vfte |= IXGBE_VF_BIT(vf->pool);
 	IXGBE_WRITE_REG(hw, IXGBE_VFTE(vf_index), vfte);
 }
 
 
 static void
 ixgbe_vf_enable_receive(struct adapter *adapter, struct ixgbe_vf *vf)
 {
 	struct ixgbe_hw *hw;
 	uint32_t vf_index, vfre;
 
 	hw = &adapter->hw;
 	
 	vf_index = IXGBE_VF_INDEX(vf->pool);
 	vfre = IXGBE_READ_REG(hw, IXGBE_VFRE(vf_index));
 	if (ixgbe_vf_frame_size_compatible(adapter, vf))
 		vfre |= IXGBE_VF_BIT(vf->pool);
 	else
 		vfre &= ~IXGBE_VF_BIT(vf->pool);
 	IXGBE_WRITE_REG(hw, IXGBE_VFRE(vf_index), vfre);
 }
 
 
 static void
 ixgbe_vf_reset_msg(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg)
 {
 	struct ixgbe_hw *hw;
 	uint32_t ack;
 	uint32_t resp[IXGBE_VF_PERMADDR_MSG_LEN];
 
 	hw = &adapter->hw;
 
 	ixgbe_process_vf_reset(adapter, vf);
 
 	if (ixgbe_validate_mac_addr(vf->ether_addr) == 0) {
 		ixgbe_set_rar(&adapter->hw, vf->rar_index,
 		    vf->ether_addr, vf->pool, TRUE);
 		ack = IXGBE_VT_MSGTYPE_ACK;
 	} else
 		ack = IXGBE_VT_MSGTYPE_NACK;
 
 	ixgbe_vf_enable_transmit(adapter, vf);
 	ixgbe_vf_enable_receive(adapter, vf);
 
 	vf->flags |= IXGBE_VF_CTS;
 
 	resp[0] = IXGBE_VF_RESET | ack | IXGBE_VT_MSGTYPE_CTS;
 	bcopy(vf->ether_addr, &resp[1], ETHER_ADDR_LEN);
 	resp[3] = hw->mac.mc_filter_type;
 	ixgbe_write_mbx(hw, resp, IXGBE_VF_PERMADDR_MSG_LEN, vf->pool);
 }
 
 
 static void
 ixgbe_vf_set_mac(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg)
 {
 	uint8_t *mac;
 
 	mac = (uint8_t*)&msg[1];
 
 	/* Check that the VF has permission to change the MAC address. */
 	if (!(vf->flags & IXGBE_VF_CAP_MAC) && ixgbe_vf_mac_changed(vf, mac)) {
 		ixgbe_send_vf_nack(adapter, vf, msg[0]);
 		return;
 	}
 
 	if (ixgbe_validate_mac_addr(mac) != 0) {
 		ixgbe_send_vf_nack(adapter, vf, msg[0]);
 		return;
 	}
 
 	bcopy(mac, vf->ether_addr, ETHER_ADDR_LEN);
 
 	ixgbe_set_rar(&adapter->hw, vf->rar_index, vf->ether_addr, 
 	    vf->pool, TRUE);
 
 	ixgbe_send_vf_ack(adapter, vf, msg[0]);
 }
 
 
 /*
 ** VF multicast addresses are set by using the appropriate bit in
 ** 1 of 128 32 bit addresses (4096 possible).
 */
 static void
 ixgbe_vf_set_mc_addr(struct adapter *adapter, struct ixgbe_vf *vf, u32 *msg)
 {
 	u16	*list = (u16*)&msg[1];
 	int	entries;
 	u32	vmolr, vec_bit, vec_reg, mta_reg;
 
 	entries = (msg[0] & IXGBE_VT_MSGINFO_MASK) >> IXGBE_VT_MSGINFO_SHIFT;
 	entries = min(entries, IXGBE_MAX_VF_MC);
 
 	vmolr = IXGBE_READ_REG(&adapter->hw, IXGBE_VMOLR(vf->pool));
 
 	vf->num_mc_hashes = entries;
 
 	/* Set the appropriate MTA bit */
 	for (int i = 0; i < entries; i++) {
 		vf->mc_hash[i] = list[i];
 		vec_reg = (vf->mc_hash[i] >> 5) & 0x7F;
                 vec_bit = vf->mc_hash[i] & 0x1F;
                 mta_reg = IXGBE_READ_REG(&adapter->hw, IXGBE_MTA(vec_reg));
                 mta_reg |= (1 << vec_bit);
                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_MTA(vec_reg), mta_reg);
         }
 
 	vmolr |= IXGBE_VMOLR_ROMPE;
 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_VMOLR(vf->pool), vmolr);
 	ixgbe_send_vf_ack(adapter, vf, msg[0]);
 	return;
 }
 
 
 static void
 ixgbe_vf_set_vlan(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg)
 {
 	struct ixgbe_hw *hw;
 	int enable;
 	uint16_t tag;
 
 	hw = &adapter->hw;
 	enable = IXGBE_VT_MSGINFO(msg[0]);
 	tag = msg[1] & IXGBE_VLVF_VLANID_MASK;
 
 	if (!(vf->flags & IXGBE_VF_CAP_VLAN)) {
 		ixgbe_send_vf_nack(adapter, vf, msg[0]);
 		return;
 	}
 
 	/* It is illegal to enable vlan tag 0. */
 	if (tag == 0 && enable != 0){
 		ixgbe_send_vf_nack(adapter, vf, msg[0]);
 		return;
 	}
 	
 	ixgbe_set_vfta(hw, tag, vf->pool, enable);
 	ixgbe_send_vf_ack(adapter, vf, msg[0]);
 }
 
 
 static void
 ixgbe_vf_set_lpe(struct adapter *adapter, struct ixgbe_vf *vf, uint32_t *msg)
 {
 	struct ixgbe_hw *hw;
 	uint32_t vf_max_size, pf_max_size, mhadd;
 
 	hw = &adapter->hw;
 	vf_max_size = msg[1];
 
 	if (vf_max_size < ETHER_CRC_LEN) {
 		/* We intentionally ACK invalid LPE requests. */
 		ixgbe_send_vf_ack(adapter, vf, msg[0]);
 		return;
 	}
 
 	vf_max_size -= ETHER_CRC_LEN;
 
 	if (vf_max_size > IXGBE_MAX_FRAME_SIZE) {
 		/* We intentionally ACK invalid LPE requests. */
 		ixgbe_send_vf_ack(adapter, vf, msg[0]);
 		return;
 	}
 
 	vf->max_frame_size = vf_max_size;
 	ixgbe_update_max_frame(adapter, vf->max_frame_size);
 
 	/*
 	 * We might have to disable reception to this VF if the frame size is
 	 * not compatible with the config on the PF.
 	 */
 	ixgbe_vf_enable_receive(adapter, vf);
 
 	mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
 	pf_max_size = (mhadd & IXGBE_MHADD_MFS_MASK) >> IXGBE_MHADD_MFS_SHIFT;
 
 	if (pf_max_size < adapter->max_frame_size) {
 		mhadd &= ~IXGBE_MHADD_MFS_MASK;
 		mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
 		IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
 	}
 
 	ixgbe_send_vf_ack(adapter, vf, msg[0]);
 }
 
 
 static void
 ixgbe_vf_set_macvlan(struct adapter *adapter, struct ixgbe_vf *vf,
     uint32_t *msg)
 {
 	//XXX implement this
 	ixgbe_send_vf_nack(adapter, vf, msg[0]);
 }
 
 
 static void
 ixgbe_vf_api_negotiate(struct adapter *adapter, struct ixgbe_vf *vf,
     uint32_t *msg)
 {
 
 	switch (msg[1]) {
 	case IXGBE_API_VER_1_0:
 	case IXGBE_API_VER_1_1:
 		vf->api_ver = msg[1];
 		ixgbe_send_vf_ack(adapter, vf, msg[0]);
 		break;
 	default:
 		vf->api_ver = IXGBE_API_VER_UNKNOWN;
 		ixgbe_send_vf_nack(adapter, vf, msg[0]);
 		break;
 	}
 }
 
 
 static void
 ixgbe_vf_get_queues(struct adapter *adapter, struct ixgbe_vf *vf,
     uint32_t *msg)
 {
 	struct ixgbe_hw *hw;
 	uint32_t resp[IXGBE_VF_GET_QUEUES_RESP_LEN];
 	int num_queues;
 
 	hw = &adapter->hw;
 
 	/* GET_QUEUES is not supported on pre-1.1 APIs. */
 	switch (msg[0]) {
 	case IXGBE_API_VER_1_0:
 	case IXGBE_API_VER_UNKNOWN:
 		ixgbe_send_vf_nack(adapter, vf, msg[0]);
 		return;
 	}
 
 	resp[0] = IXGBE_VF_GET_QUEUES | IXGBE_VT_MSGTYPE_ACK | 
 	    IXGBE_VT_MSGTYPE_CTS;
 
 	num_queues = ixgbe_vf_queues(ixgbe_get_iov_mode(adapter));
 	resp[IXGBE_VF_TX_QUEUES] = num_queues;
 	resp[IXGBE_VF_RX_QUEUES] = num_queues;
 	resp[IXGBE_VF_TRANS_VLAN] = (vf->default_vlan != 0);
 	resp[IXGBE_VF_DEF_QUEUE] = 0;
 
 	ixgbe_write_mbx(hw, resp, IXGBE_VF_GET_QUEUES_RESP_LEN, vf->pool);
 }
 
 
 static void
 ixgbe_process_vf_msg(struct adapter *adapter, struct ixgbe_vf *vf)
 {
 	struct ixgbe_hw *hw;
 	uint32_t msg[IXGBE_VFMAILBOX_SIZE];
 	int error;
 
 	hw = &adapter->hw;
 
 	error = ixgbe_read_mbx(hw, msg, IXGBE_VFMAILBOX_SIZE, vf->pool);
 
 	if (error != 0)
 		return;
 
 	CTR3(KTR_MALLOC, "%s: received msg %x from %d",
 	    adapter->ifp->if_xname, msg[0], vf->pool);
 	if (msg[0] == IXGBE_VF_RESET) {
 		ixgbe_vf_reset_msg(adapter, vf, msg);
 		return;
 	}
 
 	if (!(vf->flags & IXGBE_VF_CTS)) {
 		ixgbe_send_vf_nack(adapter, vf, msg[0]);
 		return;
 	}
 
 	switch (msg[0] & IXGBE_VT_MSG_MASK) {
 	case IXGBE_VF_SET_MAC_ADDR:
 		ixgbe_vf_set_mac(adapter, vf, msg);
 		break;
 	case IXGBE_VF_SET_MULTICAST:
 		ixgbe_vf_set_mc_addr(adapter, vf, msg);
 		break;
 	case IXGBE_VF_SET_VLAN:
 		ixgbe_vf_set_vlan(adapter, vf, msg);
 		break;
 	case IXGBE_VF_SET_LPE:
 		ixgbe_vf_set_lpe(adapter, vf, msg);
 		break;
 	case IXGBE_VF_SET_MACVLAN:
 		ixgbe_vf_set_macvlan(adapter, vf, msg);
 		break;
 	case IXGBE_VF_API_NEGOTIATE:
 		ixgbe_vf_api_negotiate(adapter, vf, msg);
 		break;
 	case IXGBE_VF_GET_QUEUES:
 		ixgbe_vf_get_queues(adapter, vf, msg);
 		break;
 	default:
 		ixgbe_send_vf_nack(adapter, vf, msg[0]);
 	}
 }
 
 
 /*
  * Tasklet for handling VF -> PF mailbox messages.
  */
 static void
 ixgbe_handle_mbx(void *context, int pending)
 {
 	struct adapter *adapter;
 	struct ixgbe_hw *hw;
 	struct ixgbe_vf *vf;
 	int i;
 
 	adapter = context;
 	hw = &adapter->hw;
 
 	IXGBE_CORE_LOCK(adapter);
 	for (i = 0; i < adapter->num_vfs; i++) {
 		vf = &adapter->vfs[i];
 
 		if (vf->flags & IXGBE_VF_ACTIVE) {
 			if (ixgbe_check_for_rst(hw, vf->pool) == 0)
 				ixgbe_process_vf_reset(adapter, vf);
 
 			if (ixgbe_check_for_msg(hw, vf->pool) == 0)
 				ixgbe_process_vf_msg(adapter, vf);
 
 			if (ixgbe_check_for_ack(hw, vf->pool) == 0)
 				ixgbe_process_vf_ack(adapter, vf);
 		}
 	}
 	IXGBE_CORE_UNLOCK(adapter);
 }
 
 
 static int
 ixgbe_init_iov(device_t dev, u16 num_vfs, const nvlist_t *config)
 {
 	struct adapter *adapter;
 	enum ixgbe_iov_mode mode;
 
 	adapter = device_get_softc(dev);
 	adapter->num_vfs = num_vfs;
 	mode = ixgbe_get_iov_mode(adapter);
 
 	if (num_vfs > ixgbe_max_vfs(mode)) {
 		adapter->num_vfs = 0;
 		return (ENOSPC);
 	}
 
 	IXGBE_CORE_LOCK(adapter);
 
 	adapter->vfs = malloc(sizeof(*adapter->vfs) * num_vfs, M_IXGBE, 
 	    M_NOWAIT | M_ZERO);
 
 	if (adapter->vfs == NULL) {
 		adapter->num_vfs = 0;
 		IXGBE_CORE_UNLOCK(adapter);
 		return (ENOMEM);
 	}
 
 	ixgbe_init_locked(adapter);
 
 	IXGBE_CORE_UNLOCK(adapter);
 
 	return (0);
 }
 
 
 static void
 ixgbe_uninit_iov(device_t dev)
 {
 	struct ixgbe_hw *hw;
 	struct adapter *adapter;
 	uint32_t pf_reg, vf_reg;
 
 	adapter = device_get_softc(dev);
 	hw = &adapter->hw;
 
 	IXGBE_CORE_LOCK(adapter);
 
 	/* Enable rx/tx for the PF and disable it for all VFs. */
 	pf_reg = IXGBE_VF_INDEX(adapter->pool);
 	IXGBE_WRITE_REG(hw, IXGBE_VFRE(pf_reg),
 	    IXGBE_VF_BIT(adapter->pool));
 	IXGBE_WRITE_REG(hw, IXGBE_VFTE(pf_reg),
 	    IXGBE_VF_BIT(adapter->pool));
 
 	if (pf_reg == 0)
 		vf_reg = 1;
 	else
 		vf_reg = 0;
 	IXGBE_WRITE_REG(hw, IXGBE_VFRE(vf_reg), 0);
 	IXGBE_WRITE_REG(hw, IXGBE_VFTE(vf_reg), 0);
 
 	IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, 0);
 
 	free(adapter->vfs, M_IXGBE);
 	adapter->vfs = NULL;
 	adapter->num_vfs = 0;
 
 	IXGBE_CORE_UNLOCK(adapter);
 }
 
 
 static void
 ixgbe_initialize_iov(struct adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	uint32_t mrqc, mtqc, vt_ctl, vf_reg, gcr_ext, gpie;
 	enum ixgbe_iov_mode mode;
 	int i;
 
 	mode = ixgbe_get_iov_mode(adapter);
 	if (mode == IXGBE_NO_VM)
 		return;
 
 	IXGBE_CORE_LOCK_ASSERT(adapter);
 
 	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
 	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
 
 	switch (mode) {
 	case IXGBE_64_VM:
 		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
 		break;
 	case IXGBE_32_VM:
 		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
 		break;
 	default:
 		panic("Unexpected SR-IOV mode %d", mode);
 	}
 	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
 
 	mtqc = IXGBE_MTQC_VT_ENA;
 	switch (mode) {
 	case IXGBE_64_VM:
 		mtqc |= IXGBE_MTQC_64VF;
 		break;
 	case IXGBE_32_VM:
 		mtqc |= IXGBE_MTQC_32VF;
 		break;
 	default:
 		panic("Unexpected SR-IOV mode %d", mode);
 	}
 	IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
 	
 
 	gcr_ext = IXGBE_READ_REG(hw, IXGBE_GCR_EXT);
 	gcr_ext |= IXGBE_GCR_EXT_MSIX_EN;
 	gcr_ext &= ~IXGBE_GCR_EXT_VT_MODE_MASK;
 	switch (mode) {
 	case IXGBE_64_VM:
 		gcr_ext |= IXGBE_GCR_EXT_VT_MODE_64;
 		break;
 	case IXGBE_32_VM:
 		gcr_ext |= IXGBE_GCR_EXT_VT_MODE_32;
 		break;
 	default:
 		panic("Unexpected SR-IOV mode %d", mode);
 	}
 	IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT, gcr_ext);
 	
 
 	gpie = IXGBE_READ_REG(hw, IXGBE_GPIE);
 	gcr_ext &= ~IXGBE_GPIE_VTMODE_MASK;
 	switch (mode) {
 	case IXGBE_64_VM:
 		gpie |= IXGBE_GPIE_VTMODE_64;
 		break;
 	case IXGBE_32_VM:
 		gpie |= IXGBE_GPIE_VTMODE_32;
 		break;
 	default:
 		panic("Unexpected SR-IOV mode %d", mode);
 	}
 	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
 
 	/* Enable rx/tx for the PF. */
 	vf_reg = IXGBE_VF_INDEX(adapter->pool);
 	IXGBE_WRITE_REG(hw, IXGBE_VFRE(vf_reg), 
 	    IXGBE_VF_BIT(adapter->pool));
 	IXGBE_WRITE_REG(hw, IXGBE_VFTE(vf_reg), 
 	    IXGBE_VF_BIT(adapter->pool));
 
 	/* Allow VM-to-VM communication. */
 	IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
 
 	vt_ctl = IXGBE_VT_CTL_VT_ENABLE | IXGBE_VT_CTL_REPLEN;
 	vt_ctl |= (adapter->pool << IXGBE_VT_CTL_POOL_SHIFT);
 	IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vt_ctl);
 
 	for (i = 0; i < adapter->num_vfs; i++)
 		ixgbe_init_vf(adapter, &adapter->vfs[i]);
 }
 
 
 /*
 ** Check the max frame setting of all active VF's
 */
 static void
 ixgbe_recalculate_max_frame(struct adapter *adapter)
 {
 	struct ixgbe_vf *vf;
 
 	IXGBE_CORE_LOCK_ASSERT(adapter);
 
 	for (int i = 0; i < adapter->num_vfs; i++) {
 		vf = &adapter->vfs[i];
 		if (vf->flags & IXGBE_VF_ACTIVE)
 			ixgbe_update_max_frame(adapter, vf->max_frame_size);
 	}
 }
 
 
 static void
 ixgbe_init_vf(struct adapter *adapter, struct ixgbe_vf *vf)
 {
 	struct ixgbe_hw *hw;
 	uint32_t vf_index, pfmbimr;
 
 	IXGBE_CORE_LOCK_ASSERT(adapter);
 
 	hw = &adapter->hw;
 
 	if (!(vf->flags & IXGBE_VF_ACTIVE))
 		return;
 
 	vf_index = IXGBE_VF_INDEX(vf->pool);
 	pfmbimr = IXGBE_READ_REG(hw, IXGBE_PFMBIMR(vf_index));
 	pfmbimr |= IXGBE_VF_BIT(vf->pool);
 	IXGBE_WRITE_REG(hw, IXGBE_PFMBIMR(vf_index), pfmbimr);
 
 	ixgbe_vf_set_default_vlan(adapter, vf, vf->vlan_tag);
 
 	// XXX multicast addresses
 
 	if (ixgbe_validate_mac_addr(vf->ether_addr) == 0) {
 		ixgbe_set_rar(&adapter->hw, vf->rar_index,
 		    vf->ether_addr, vf->pool, TRUE);
 	}
 
 	ixgbe_vf_enable_transmit(adapter, vf);
 	ixgbe_vf_enable_receive(adapter, vf);
 	
 	ixgbe_send_vf_msg(adapter, vf, IXGBE_PF_CONTROL_MSG);
 }
 
 static int
 ixgbe_add_vf(device_t dev, u16 vfnum, const nvlist_t *config)
 {
 	struct adapter *adapter;
 	struct ixgbe_vf *vf;
 	const void *mac;
 
 	adapter = device_get_softc(dev);
 
 	KASSERT(vfnum < adapter->num_vfs, ("VF index %d is out of range %d",
 	    vfnum, adapter->num_vfs));
 
 	IXGBE_CORE_LOCK(adapter);
 	vf = &adapter->vfs[vfnum];
 	vf->pool= vfnum;
 
 	/* RAR[0] is used by the PF so use vfnum + 1 for VF RAR. */
 	vf->rar_index = vfnum + 1;
 	vf->default_vlan = 0;
 	vf->max_frame_size = ETHER_MAX_LEN;
 	ixgbe_update_max_frame(adapter, vf->max_frame_size);
 
 	if (nvlist_exists_binary(config, "mac-addr")) {
 		mac = nvlist_get_binary(config, "mac-addr", NULL);
 		bcopy(mac, vf->ether_addr, ETHER_ADDR_LEN);
 		if (nvlist_get_bool(config, "allow-set-mac"))
 			vf->flags |= IXGBE_VF_CAP_MAC;
 	} else
 		/*
 		 * If the administrator has not specified a MAC address then
 		 * we must allow the VF to choose one.
 		 */
 		vf->flags |= IXGBE_VF_CAP_MAC;
 
 	vf->flags = IXGBE_VF_ACTIVE;
 
 	ixgbe_init_vf(adapter, vf);
 	IXGBE_CORE_UNLOCK(adapter);
 
 	return (0);
 }
 #endif /* PCI_IOV */
 
Index: projects/release-pkg/sys/dev/netmap/if_em_netmap.h
===================================================================
--- projects/release-pkg/sys/dev/netmap/if_em_netmap.h	(revision 293335)
+++ projects/release-pkg/sys/dev/netmap/if_em_netmap.h	(revision 293336)
@@ -1,329 +1,329 @@
 /*
  * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * $FreeBSD$
  *
  * netmap support for: em.
  *
  * For more details on netmap support please see ixgbe_netmap.h
  */
 
 
 #include <net/netmap.h>
 #include <sys/selinfo.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>    /* vtophys ? */
 #include <dev/netmap/netmap_kern.h>
 
 
 // XXX do we need to block/unblock the tasks ?
 static void
 em_netmap_block_tasks(struct adapter *adapter)
 {
 	if (adapter->msix > 1) { /* MSIX */
 		int i;
 		struct tx_ring *txr = adapter->tx_rings;
 		struct rx_ring *rxr = adapter->rx_rings;
 
 		for (i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
 			taskqueue_block(txr->tq);
 			taskqueue_drain(txr->tq, &txr->tx_task);
 			taskqueue_block(rxr->tq);
 			taskqueue_drain(rxr->tq, &rxr->rx_task);
 		}
 	} else {	/* legacy */
 		taskqueue_block(adapter->tq);
 		taskqueue_drain(adapter->tq, &adapter->link_task);
 		taskqueue_drain(adapter->tq, &adapter->que_task);
 	}
 }
 
 
 static void
 em_netmap_unblock_tasks(struct adapter *adapter)
 {
 	if (adapter->msix > 1) {
 		struct tx_ring *txr = adapter->tx_rings;
 		struct rx_ring *rxr = adapter->rx_rings;
 		int i;
 
 		for (i = 0; i < adapter->num_queues; i++, txr++, rxr++) {
 			taskqueue_unblock(txr->tq);
 			taskqueue_unblock(rxr->tq);
 		}
 	} else { /* legacy */
 		taskqueue_unblock(adapter->tq);
 	}
 }
 
 
 /*
  * Register/unregister. We are already under netmap lock.
  */
 static int
 em_netmap_reg(struct netmap_adapter *na, int onoff)
 {
 	struct ifnet *ifp = na->ifp;
 	struct adapter *adapter = ifp->if_softc;
 
 	EM_CORE_LOCK(adapter);
 	em_disable_intr(adapter);
 
 	/* Tell the stack that the interface is no longer active */
 	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
 
 	em_netmap_block_tasks(adapter);
 	/* enable or disable flags and callbacks in na and ifp */
 	if (onoff) {
 		nm_set_native_flags(na);
 	} else {
 		nm_clear_native_flags(na);
 	}
 	em_init_locked(adapter);	/* also enable intr */
 	em_netmap_unblock_tasks(adapter);
 	EM_CORE_UNLOCK(adapter);
 	return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
 }
 
 
 /*
  * Reconcile kernel and user view of the transmit ring.
  */
 static int
 em_netmap_txsync(struct netmap_kring *kring, int flags)
 {
 	struct netmap_adapter *na = kring->na;
 	struct ifnet *ifp = na->ifp;
 	struct netmap_ring *ring = kring->ring;
 	u_int nm_i;	/* index into the netmap ring */
 	u_int nic_i;	/* index into the NIC ring */
 	u_int n;
 	u_int const lim = kring->nkr_num_slots - 1;
 	u_int const head = kring->rhead;
 	/* generate an interrupt approximately every half ring */
 	u_int report_frequency = kring->nkr_num_slots >> 1;
 
 	/* device-specific */
 	struct adapter *adapter = ifp->if_softc;
 	struct tx_ring *txr = &adapter->tx_rings[kring->ring_id];
 
 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
 			BUS_DMASYNC_POSTREAD);
 
 	/*
 	 * First part: process new packets to send.
 	 */
 
 	nm_i = kring->nr_hwcur;
 	if (nm_i != head) {	/* we have new packets to send */
 		nic_i = netmap_idx_k2n(kring, nm_i);
 		for (n = 0; nm_i != head; n++) {
 			struct netmap_slot *slot = &ring->slot[nm_i];
 			u_int len = slot->len;
 			uint64_t paddr;
 			void *addr = PNMB(na, slot, &paddr);
 
 			/* device-specific */
 			struct e1000_tx_desc *curr = &txr->tx_base[nic_i];
-			struct em_buffer *txbuf = &txr->tx_buffers[nic_i];
+			struct em_txbuffer *txbuf = &txr->tx_buffers[nic_i];
 			int flags = (slot->flags & NS_REPORT ||
 				nic_i == 0 || nic_i == report_frequency) ?
 				E1000_TXD_CMD_RS : 0;
 
 			NM_CHECK_ADDR_LEN(na, addr, len);
 
 			if (slot->flags & NS_BUF_CHANGED) {
 				curr->buffer_addr = htole64(paddr);
 				/* buffer has changed, reload map */
 				netmap_reload_map(na, txr->txtag, txbuf->map, addr);
 			}
 			slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
 
 			/* Fill the slot in the NIC ring. */
 			curr->upper.data = 0;
 			curr->lower.data = htole32(adapter->txd_cmd | len |
 				(E1000_TXD_CMD_EOP | flags) );
 			bus_dmamap_sync(txr->txtag, txbuf->map,
 				BUS_DMASYNC_PREWRITE);
 
 			nm_i = nm_next(nm_i, lim);
 			nic_i = nm_next(nic_i, lim);
 		}
 		kring->nr_hwcur = head;
 
 		/* synchronize the NIC ring */
 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
 			BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 		/* (re)start the tx unit up to slot nic_i (excluded) */
 		E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), nic_i);
 	}
 
 	/*
 	 * Second part: reclaim buffers for completed transmissions.
 	 */
 	if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
 		/* record completed transmissions using TDH */
 		nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(kring->ring_id));
 		if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
 			D("TDH wrap %d", nic_i);
 			nic_i -= kring->nkr_num_slots;
 		}
 		if (nic_i != txr->next_to_clean) {
 			txr->next_to_clean = nic_i;
 			kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
 		}
 	}
 
 	return 0;
 }
 
 
 /*
  * Reconcile kernel and user view of the receive ring.
  */
 static int
 em_netmap_rxsync(struct netmap_kring *kring, int flags)
 {
 	struct netmap_adapter *na = kring->na;
 	struct ifnet *ifp = na->ifp;
 	struct netmap_ring *ring = kring->ring;
 	u_int nm_i;	/* index into the netmap ring */
 	u_int nic_i;	/* index into the NIC ring */
 	u_int n;
 	u_int const lim = kring->nkr_num_slots - 1;
 	u_int const head = kring->rhead;
 	int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
 
 	/* device-specific */
 	struct adapter *adapter = ifp->if_softc;
 	struct rx_ring *rxr = &adapter->rx_rings[kring->ring_id];
 
 	if (head > lim)
 		return netmap_ring_reinit(kring);
 
 	/* XXX check sync modes */
 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
 			BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 	/*
 	 * First part: import newly received packets.
 	 */
 	if (netmap_no_pendintr || force_update) {
 		uint16_t slot_flags = kring->nkr_slot_flags;
 
 		nic_i = rxr->next_to_check;
 		nm_i = netmap_idx_n2k(kring, nic_i);
 
 		for (n = 0; ; n++) { // XXX no need to count
-			struct e1000_rx_desc *curr = &rxr->rx_base[nic_i];
-			uint32_t staterr = le32toh(curr->status);
+			union e1000_rx_desc_extended *curr = &rxr->rx_base[nic_i];
+			uint32_t staterr = le32toh(curr->wb.upper.status_error);
 
 			if ((staterr & E1000_RXD_STAT_DD) == 0)
 				break;
-			ring->slot[nm_i].len = le16toh(curr->length);
+			ring->slot[nm_i].len = le16toh(curr->wb.upper.length);
 			ring->slot[nm_i].flags = slot_flags;
 			bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[nic_i].map,
 				BUS_DMASYNC_POSTREAD);
 			nm_i = nm_next(nm_i, lim);
 			/* make sure next_to_refresh follows next_to_check */
 			rxr->next_to_refresh = nic_i;	// XXX
 			nic_i = nm_next(nic_i, lim);
 		}
 		if (n) { /* update the state variables */
 			rxr->next_to_check = nic_i;
 			kring->nr_hwtail = nm_i;
 		}
 		kring->nr_kflags &= ~NKR_PENDINTR;
 	}
 
 	/*
 	 * Second part: skip past packets that userspace has released.
 	 */
 	nm_i = kring->nr_hwcur;
 	if (nm_i != head) {
 		nic_i = netmap_idx_k2n(kring, nm_i);
 		for (n = 0; nm_i != head; n++) {
 			struct netmap_slot *slot = &ring->slot[nm_i];
 			uint64_t paddr;
 			void *addr = PNMB(na, slot, &paddr);
 
-			struct e1000_rx_desc *curr = &rxr->rx_base[nic_i];
-			struct em_buffer *rxbuf = &rxr->rx_buffers[nic_i];
+			union e1000_rx_desc_extended *curr = &rxr->rx_base[nic_i];
+			struct em_rxbuffer *rxbuf = &rxr->rx_buffers[nic_i];
 
 			if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
 				goto ring_reset;
 
 			if (slot->flags & NS_BUF_CHANGED) {
 				/* buffer has changed, reload map */
-				curr->buffer_addr = htole64(paddr);
+				curr->read.buffer_addr = htole64(paddr);
 				netmap_reload_map(na, rxr->rxtag, rxbuf->map, addr);
 				slot->flags &= ~NS_BUF_CHANGED;
 			}
-			curr->status = 0;
+			curr->wb.upper.status_error = 0;
 			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
 			    BUS_DMASYNC_PREREAD);
 			nm_i = nm_next(nm_i, lim);
 			nic_i = nm_next(nic_i, lim);
 		}
 		kring->nr_hwcur = head;
 
 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 		/*
 		 * IMPORTANT: we must leave one free slot in the ring,
 		 * so move nic_i back by one unit
 		 */
 		nic_i = nm_prev(nic_i, lim);
 		E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), nic_i);
 	}
 
 	return 0;
 
 ring_reset:
 	return netmap_ring_reinit(kring);
 }
 
 
 static void
 em_netmap_attach(struct adapter *adapter)
 {
 	struct netmap_adapter na;
 
 	bzero(&na, sizeof(na));
 
 	na.ifp = adapter->ifp;
 	na.na_flags = NAF_BDG_MAYSLEEP;
 	na.num_tx_desc = adapter->num_tx_desc;
 	na.num_rx_desc = adapter->num_rx_desc;
 	na.nm_txsync = em_netmap_txsync;
 	na.nm_rxsync = em_netmap_rxsync;
 	na.nm_register = em_netmap_reg;
 	na.num_tx_rings = na.num_rx_rings = adapter->num_queues;
 	netmap_attach(&na);
 }
 
 /* end of file */
Index: projects/release-pkg/sys/dev/nvd/nvd.c
===================================================================
--- projects/release-pkg/sys/dev/nvd/nvd.c	(revision 293335)
+++ projects/release-pkg/sys/dev/nvd/nvd.c	(revision 293336)
@@ -1,394 +1,411 @@
 /*-
  * Copyright (C) 2012-2013 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
 
 #include <geom/geom.h>
 #include <geom/geom_disk.h>
 
 #include <dev/nvme/nvme.h>
 
 #define NVD_STR		"nvd"
 
 struct nvd_disk;
 
 static disk_ioctl_t nvd_ioctl;
 static disk_strategy_t nvd_strategy;
 
+static void nvd_done(void *arg, const struct nvme_completion *cpl);
+
 static void *nvd_new_disk(struct nvme_namespace *ns, void *ctrlr);
 static void destroy_geom_disk(struct nvd_disk *ndisk);
 
 static void *nvd_new_controller(struct nvme_controller *ctrlr);
 static void nvd_controller_fail(void *ctrlr);
 
 static int nvd_load(void);
 static void nvd_unload(void);
 
 MALLOC_DEFINE(M_NVD, "nvd", "nvd(4) allocations");
 
 struct nvme_consumer *consumer_handle;
 
 struct nvd_disk {
 
 	struct bio_queue_head	bioq;
 	struct task		bioqtask;
 	struct mtx		bioqlock;
 
 	struct disk		*disk;
 	struct taskqueue	*tq;
 	struct nvme_namespace	*ns;
 
 	uint32_t		cur_depth;
+	uint32_t		ordered_in_flight;
 
 	TAILQ_ENTRY(nvd_disk)	global_tailq;
 	TAILQ_ENTRY(nvd_disk)	ctrlr_tailq;
 };
 
 struct nvd_controller {
 
 	TAILQ_ENTRY(nvd_controller)	tailq;
 	TAILQ_HEAD(, nvd_disk)		disk_head;
 };
 
 static TAILQ_HEAD(, nvd_controller)	ctrlr_head;
 static TAILQ_HEAD(disk_list, nvd_disk)	disk_head;
 
 static int nvd_modevent(module_t mod, int type, void *arg)
 {
 	int error = 0;
 
 	switch (type) {
 	case MOD_LOAD:
 		error = nvd_load();
 		break;
 	case MOD_UNLOAD:
 		nvd_unload();
 		break;
 	default:
 		break;
 	}
 
 	return (error);
 }
 
 moduledata_t nvd_mod = {
 	NVD_STR,
 	(modeventhand_t)nvd_modevent,
 	0
 };
 
 DECLARE_MODULE(nvd, nvd_mod, SI_SUB_DRIVERS, SI_ORDER_ANY);
 MODULE_VERSION(nvd, 1);
 MODULE_DEPEND(nvd, nvme, 1, 1, 1);
 
 static int
 nvd_load()
 {
 
 	TAILQ_INIT(&ctrlr_head);
 	TAILQ_INIT(&disk_head);
 
 	consumer_handle = nvme_register_consumer(nvd_new_disk,
 	    nvd_new_controller, NULL, nvd_controller_fail);
 
 	return (consumer_handle != NULL ? 0 : -1);
 }
 
 static void
 nvd_unload()
 {
 	struct nvd_controller	*ctrlr;
 	struct nvd_disk		*disk;
 
 	while (!TAILQ_EMPTY(&ctrlr_head)) {
 		ctrlr = TAILQ_FIRST(&ctrlr_head);
 		TAILQ_REMOVE(&ctrlr_head, ctrlr, tailq);
 		free(ctrlr, M_NVD);
 	}
 
 	while (!TAILQ_EMPTY(&disk_head)) {
 		disk = TAILQ_FIRST(&disk_head);
 		TAILQ_REMOVE(&disk_head, disk, global_tailq);
 		destroy_geom_disk(disk);
 		free(disk, M_NVD);
 	}
 
 	nvme_unregister_consumer(consumer_handle);
 }
 
+static int
+nvd_bio_submit(struct nvd_disk *ndisk, struct bio *bp)
+{
+	int err;
+
+	bp->bio_driver1 = NULL;
+	atomic_add_int(&ndisk->cur_depth, 1);
+	err = nvme_ns_bio_process(ndisk->ns, bp, nvd_done);
+	if (err) {
+		atomic_add_int(&ndisk->cur_depth, -1);
+		if (__predict_false(bp->bio_flags & BIO_ORDERED))
+			atomic_add_int(&ndisk->ordered_in_flight, -1);
+		bp->bio_error = err;
+		bp->bio_flags |= BIO_ERROR;
+		bp->bio_resid = bp->bio_bcount;
+		biodone(bp);
+		return (-1);
+	}
+
+	return (0);
+}
+
 static void
 nvd_strategy(struct bio *bp)
 {
 	struct nvd_disk *ndisk;
 
 	ndisk = (struct nvd_disk *)bp->bio_disk->d_drv1;
 
+	if (__predict_false(bp->bio_flags & BIO_ORDERED))
+		atomic_add_int(&ndisk->ordered_in_flight, 1);
+
+	if (__predict_true(ndisk->ordered_in_flight == 0)) {
+		nvd_bio_submit(ndisk, bp);
+		return;
+	}
+
+	/*
+	 * There are ordered bios in flight, so we need to submit
+	 *  bios through the task queue to enforce ordering.
+	 */
 	mtx_lock(&ndisk->bioqlock);
 	bioq_insert_tail(&ndisk->bioq, bp);
 	mtx_unlock(&ndisk->bioqlock);
 	taskqueue_enqueue(ndisk->tq, &ndisk->bioqtask);
 }
 
 static int
 nvd_ioctl(struct disk *ndisk, u_long cmd, void *data, int fflag,
     struct thread *td)
 {
 	int ret = 0;
 
 	switch (cmd) {
 	default:
 		ret = EIO;
 	}
 
 	return (ret);
 }
 
 static void
 nvd_done(void *arg, const struct nvme_completion *cpl)
 {
 	struct bio *bp;
 	struct nvd_disk *ndisk;
 
 	bp = (struct bio *)arg;
 
 	ndisk = bp->bio_disk->d_drv1;
 
 	atomic_add_int(&ndisk->cur_depth, -1);
+	if (__predict_false(bp->bio_flags & BIO_ORDERED))
+		atomic_add_int(&ndisk->ordered_in_flight, -1);
 
 	biodone(bp);
 }
 
 static void
 nvd_bioq_process(void *arg, int pending)
 {
 	struct nvd_disk *ndisk = arg;
 	struct bio *bp;
-	int err;
 
 	for (;;) {
 		mtx_lock(&ndisk->bioqlock);
 		bp = bioq_takefirst(&ndisk->bioq);
 		mtx_unlock(&ndisk->bioqlock);
 		if (bp == NULL)
 			break;
 
-#ifdef BIO_ORDERED
-		/*
-		 * BIO_ORDERED flag dictates that all outstanding bios
-		 *  must be completed before processing the bio with
-		 *  BIO_ORDERED flag set.
-		 */
-		if (bp->bio_flags & BIO_ORDERED) {
-			while (ndisk->cur_depth > 0) {
-				pause("nvd flush", 1);
-			}
+		if (nvd_bio_submit(ndisk, bp) != 0) {
+			continue;
 		}
-#endif
 
-		bp->bio_driver1 = NULL;
-		atomic_add_int(&ndisk->cur_depth, 1);
-
-		err = nvme_ns_bio_process(ndisk->ns, bp, nvd_done);
-
-		if (err) {
-			atomic_add_int(&ndisk->cur_depth, -1);
-			bp->bio_error = err;
-			bp->bio_flags |= BIO_ERROR;
-			bp->bio_resid = bp->bio_bcount;
-			biodone(bp);
-		}
-
 #ifdef BIO_ORDERED
 		/*
 		 * BIO_ORDERED flag dictates that the bio with BIO_ORDERED
 		 *  flag set must be completed before proceeding with
 		 *  additional bios.
 		 */
 		if (bp->bio_flags & BIO_ORDERED) {
 			while (ndisk->cur_depth > 0) {
 				pause("nvd flush", 1);
 			}
 		}
 #endif
 	}
 }
 
 static void *
 nvd_new_controller(struct nvme_controller *ctrlr)
 {
 	struct nvd_controller	*nvd_ctrlr;
 
 	nvd_ctrlr = malloc(sizeof(struct nvd_controller), M_NVD,
 	    M_ZERO | M_WAITOK);
 
 	TAILQ_INIT(&nvd_ctrlr->disk_head);
 	TAILQ_INSERT_TAIL(&ctrlr_head, nvd_ctrlr, tailq);
 
 	return (nvd_ctrlr);
 }
 
 static void *
 nvd_new_disk(struct nvme_namespace *ns, void *ctrlr_arg)
 {
 	uint8_t			descr[NVME_MODEL_NUMBER_LENGTH+1];
 	struct nvd_disk		*ndisk;
 	struct disk		*disk;
 	struct nvd_controller	*ctrlr = ctrlr_arg;
 
 	ndisk = malloc(sizeof(struct nvd_disk), M_NVD, M_ZERO | M_WAITOK);
 
 	disk = disk_alloc();
 	disk->d_strategy = nvd_strategy;
 	disk->d_ioctl = nvd_ioctl;
 	disk->d_name = NVD_STR;
 	disk->d_drv1 = ndisk;
 
 	disk->d_maxsize = nvme_ns_get_max_io_xfer_size(ns);
 	disk->d_sectorsize = nvme_ns_get_sector_size(ns);
 	disk->d_mediasize = (off_t)nvme_ns_get_size(ns);
 	disk->d_delmaxsize = (off_t)nvme_ns_get_size(ns);
 	disk->d_stripesize = nvme_ns_get_optimal_sector_size(ns);
 
 	if (TAILQ_EMPTY(&disk_head))
 		disk->d_unit = 0;
 	else
 		disk->d_unit =
 		    TAILQ_LAST(&disk_head, disk_list)->disk->d_unit + 1;
 
-	disk->d_flags = 0;
+	disk->d_flags = DISKFLAG_DIRECT_COMPLETION;
 
 	if (nvme_ns_get_flags(ns) & NVME_NS_DEALLOCATE_SUPPORTED)
 		disk->d_flags |= DISKFLAG_CANDELETE;
 
 	if (nvme_ns_get_flags(ns) & NVME_NS_FLUSH_SUPPORTED)
 		disk->d_flags |= DISKFLAG_CANFLUSHCACHE;
 
 /* ifdef used here to ease porting to stable branches at a later point. */
 #ifdef DISKFLAG_UNMAPPED_BIO
 	disk->d_flags |= DISKFLAG_UNMAPPED_BIO;
 #endif
 
 	/*
 	 * d_ident and d_descr are both far bigger than the length of either
 	 *  the serial or model number strings.
 	 */
 	nvme_strvis(disk->d_ident, nvme_ns_get_serial_number(ns),
 	    sizeof(disk->d_ident), NVME_SERIAL_NUMBER_LENGTH);
 
 	nvme_strvis(descr, nvme_ns_get_model_number(ns), sizeof(descr),
 	    NVME_MODEL_NUMBER_LENGTH);
 
 #if __FreeBSD_version >= 900034
 	strlcpy(disk->d_descr, descr, sizeof(descr));
 #endif
 
 	ndisk->ns = ns;
 	ndisk->disk = disk;
 	ndisk->cur_depth = 0;
+	ndisk->ordered_in_flight = 0;
 
 	mtx_init(&ndisk->bioqlock, "NVD bioq lock", NULL, MTX_DEF);
 	bioq_init(&ndisk->bioq);
 
 	TASK_INIT(&ndisk->bioqtask, 0, nvd_bioq_process, ndisk);
 	ndisk->tq = taskqueue_create("nvd_taskq", M_WAITOK,
 	    taskqueue_thread_enqueue, &ndisk->tq);
 	taskqueue_start_threads(&ndisk->tq, 1, PI_DISK, "nvd taskq");
 
 	TAILQ_INSERT_TAIL(&disk_head, ndisk, global_tailq);
 	TAILQ_INSERT_TAIL(&ctrlr->disk_head, ndisk, ctrlr_tailq);
 
 	disk_create(disk, DISK_VERSION);
 
 	printf(NVD_STR"%u: <%s> NVMe namespace\n", disk->d_unit, descr);
 	printf(NVD_STR"%u: %juMB (%ju %u byte sectors)\n", disk->d_unit,
 		(uintmax_t)disk->d_mediasize / (1024*1024),
 		(uintmax_t)disk->d_mediasize / disk->d_sectorsize,
 		disk->d_sectorsize);
 
 	return (NULL);
 }
 
 static void
 destroy_geom_disk(struct nvd_disk *ndisk)
 {
 	struct bio	*bp;
 	struct disk	*disk;
 	uint32_t	unit;
 	int		cnt = 0;
 
 	disk = ndisk->disk;
 	unit = disk->d_unit;
 	taskqueue_free(ndisk->tq);
 
 	disk_destroy(ndisk->disk);
 
 	mtx_lock(&ndisk->bioqlock);
 	for (;;) {
 		bp = bioq_takefirst(&ndisk->bioq);
 		if (bp == NULL)
 			break;
 		bp->bio_error = EIO;
 		bp->bio_flags |= BIO_ERROR;
 		bp->bio_resid = bp->bio_bcount;
 		cnt++;
 		biodone(bp);
 	}
 
 	printf(NVD_STR"%u: lost device - %d outstanding\n", unit, cnt);
 	printf(NVD_STR"%u: removing device entry\n", unit);
 
 	mtx_unlock(&ndisk->bioqlock);
 
 	mtx_destroy(&ndisk->bioqlock);
 }
 
 static void
 nvd_controller_fail(void *ctrlr_arg)
 {
 	struct nvd_controller	*ctrlr = ctrlr_arg;
 	struct nvd_disk		*disk;
 
 	while (!TAILQ_EMPTY(&ctrlr->disk_head)) {
 		disk = TAILQ_FIRST(&ctrlr->disk_head);
 		TAILQ_REMOVE(&disk_head, disk, global_tailq);
 		TAILQ_REMOVE(&ctrlr->disk_head, disk, ctrlr_tailq);
 		destroy_geom_disk(disk);
 		free(disk, M_NVD);
 	}
 
 	TAILQ_REMOVE(&ctrlr_head, ctrlr, tailq);
 	free(ctrlr, M_NVD);
 }
 
Index: projects/release-pkg/sys/dev/nvme/nvme.c
===================================================================
--- projects/release-pkg/sys/dev/nvme/nvme.c	(revision 293335)
+++ projects/release-pkg/sys/dev/nvme/nvme.c	(revision 293336)
@@ -1,456 +1,454 @@
 /*-
  * Copyright (C) 2012-2014 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/module.h>
 
 #include <vm/uma.h>
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 
 #include "nvme_private.h"
 
 struct nvme_consumer {
 	uint32_t		id;
 	nvme_cons_ns_fn_t	ns_fn;
 	nvme_cons_ctrlr_fn_t	ctrlr_fn;
 	nvme_cons_async_fn_t	async_fn;
 	nvme_cons_fail_fn_t	fail_fn;
 };
 
 struct nvme_consumer nvme_consumer[NVME_MAX_CONSUMERS];
 #define	INVALID_CONSUMER_ID	0xFFFF
 
 uma_zone_t	nvme_request_zone;
 int32_t		nvme_retry_count;
 
 MALLOC_DEFINE(M_NVME, "nvme", "nvme(4) memory allocations");
 
 static int    nvme_probe(device_t);
 static int    nvme_attach(device_t);
 static int    nvme_detach(device_t);
 static int    nvme_modevent(module_t mod, int type, void *arg);
 
 static devclass_t nvme_devclass;
 
 static device_method_t nvme_pci_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,     nvme_probe),
 	DEVMETHOD(device_attach,    nvme_attach),
 	DEVMETHOD(device_detach,    nvme_detach),
 	{ 0, 0 }
 };
 
 static driver_t nvme_pci_driver = {
 	"nvme",
 	nvme_pci_methods,
 	sizeof(struct nvme_controller),
 };
 
 DRIVER_MODULE(nvme, pci, nvme_pci_driver, nvme_devclass, nvme_modevent, 0);
 MODULE_VERSION(nvme, 1);
 
 static struct _pcsid
 {
 	uint32_t	devid;
 	int		match_subdevice;
 	uint16_t	subdevice;
 	const char	*desc;
 } pci_ids[] = {
 	{ 0x01118086,		0, 0, "NVMe Controller"  },
 	{ IDT32_PCI_ID,		0, 0, "IDT NVMe Controller (32 channel)"  },
 	{ IDT8_PCI_ID,		0, 0, "IDT NVMe Controller (8 channel)" },
 	{ 0x09538086,		1, 0x3702, "DC P3700 SSD" },
 	{ 0x09538086,		1, 0x3703, "DC P3700 SSD [2.5\" SFF]" },
 	{ 0x09538086,		1, 0x3704, "DC P3500 SSD [Add-in Card]" },
 	{ 0x09538086,		1, 0x3705, "DC P3500 SSD [2.5\" SFF]" },
 	{ 0x09538086,		1, 0x3709, "DC P3600 SSD [Add-in Card]" },
 	{ 0x09538086,		1, 0x370a, "DC P3600 SSD [2.5\" SFF]" },
 	{ 0x00000000,		0, 0, NULL  }
 };
 
 static int
 nvme_match(uint32_t devid, uint16_t subdevice, struct _pcsid *ep)
 {
 	if (devid != ep->devid)
 		return 0;
 
 	if (!ep->match_subdevice)
 		return 1;
 
 	if (subdevice == ep->subdevice)
 		return 1;
 	else
 		return 0;
 }
 
 static int
 nvme_probe (device_t device)
 {
 	struct _pcsid	*ep;
 	uint32_t	devid;
 	uint16_t	subdevice;
 
 	devid = pci_get_devid(device);
 	subdevice = pci_get_subdevice(device);
 	ep = pci_ids;
 
 	while (ep->devid) {
 		if (nvme_match(devid, subdevice, ep))
 			break;
 		++ep;
 	}
 
 	if (ep->desc) {
 		device_set_desc(device, ep->desc);
 		return (BUS_PROBE_DEFAULT);
 	}
 
 #if defined(PCIS_STORAGE_NVM)
 	if (pci_get_class(device)    == PCIC_STORAGE &&
 	    pci_get_subclass(device) == PCIS_STORAGE_NVM &&
 	    pci_get_progif(device)   == PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0) {
 		device_set_desc(device, "Generic NVMe Device");
 		return (BUS_PROBE_GENERIC);
 	}
 #endif
 
 	return (ENXIO);
 }
 
 static void
 nvme_init(void)
 {
 	uint32_t	i;
 
 	nvme_request_zone = uma_zcreate("nvme_request",
 	    sizeof(struct nvme_request), NULL, NULL, NULL, NULL, 0, 0);
 
 	for (i = 0; i < NVME_MAX_CONSUMERS; i++)
 		nvme_consumer[i].id = INVALID_CONSUMER_ID;
 }
 
 SYSINIT(nvme_register, SI_SUB_DRIVERS, SI_ORDER_SECOND, nvme_init, NULL);
 
 static void
 nvme_uninit(void)
 {
 	uma_zdestroy(nvme_request_zone);
 }
 
 SYSUNINIT(nvme_unregister, SI_SUB_DRIVERS, SI_ORDER_SECOND, nvme_uninit, NULL);
 
 static void
 nvme_load(void)
 {
 }
 
 static void
 nvme_unload(void)
 {
 }
 
 static void
 nvme_shutdown(void)
 {
 	device_t		*devlist;
 	struct nvme_controller	*ctrlr;
 	int			dev, devcount;
 
 	if (devclass_get_devices(nvme_devclass, &devlist, &devcount))
 		return;
 
 	for (dev = 0; dev < devcount; dev++) {
 		ctrlr = DEVICE2SOFTC(devlist[dev]);
 		nvme_ctrlr_shutdown(ctrlr);
 	}
 
 	free(devlist, M_TEMP);
 }
 
 static int
 nvme_modevent(module_t mod, int type, void *arg)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 		nvme_load();
 		break;
 	case MOD_UNLOAD:
 		nvme_unload();
 		break;
 	case MOD_SHUTDOWN:
 		nvme_shutdown();
 		break;
 	default:
 		break;
 	}
 
 	return (0);
 }
 
 void
 nvme_dump_command(struct nvme_command *cmd)
 {
 	printf(
 "opc:%x f:%x r1:%x cid:%x nsid:%x r2:%x r3:%x mptr:%jx prp1:%jx prp2:%jx cdw:%x %x %x %x %x %x\n",
 	    cmd->opc, cmd->fuse, cmd->rsvd1, cmd->cid, cmd->nsid,
 	    cmd->rsvd2, cmd->rsvd3,
 	    (uintmax_t)cmd->mptr, (uintmax_t)cmd->prp1, (uintmax_t)cmd->prp2,
 	    cmd->cdw10, cmd->cdw11, cmd->cdw12, cmd->cdw13, cmd->cdw14,
 	    cmd->cdw15);
 }
 
 void
 nvme_dump_completion(struct nvme_completion *cpl)
 {
 	printf("cdw0:%08x sqhd:%04x sqid:%04x "
 	    "cid:%04x p:%x sc:%02x sct:%x m:%x dnr:%x\n",
 	    cpl->cdw0, cpl->sqhd, cpl->sqid,
 	    cpl->cid, cpl->status.p, cpl->status.sc, cpl->status.sct,
 	    cpl->status.m, cpl->status.dnr);
 }
 
 static int
 nvme_attach(device_t dev)
 {
 	struct nvme_controller	*ctrlr = DEVICE2SOFTC(dev);
 	int			status;
 
 	status = nvme_ctrlr_construct(ctrlr, dev);
 
 	if (status != 0) {
 		nvme_ctrlr_destruct(ctrlr, dev);
 		return (status);
 	}
 
 	/*
 	 * Reset controller twice to ensure we do a transition from cc.en==1
 	 *  to cc.en==0.  This is because we don't really know what status
 	 *  the controller was left in when boot handed off to OS.
 	 */
 	status = nvme_ctrlr_hw_reset(ctrlr);
 	if (status != 0) {
 		nvme_ctrlr_destruct(ctrlr, dev);
 		return (status);
 	}
 
 	status = nvme_ctrlr_hw_reset(ctrlr);
 	if (status != 0) {
 		nvme_ctrlr_destruct(ctrlr, dev);
 		return (status);
 	}
 
-	nvme_sysctl_initialize_ctrlr(ctrlr);
-
 	pci_enable_busmaster(dev);
 
 	ctrlr->config_hook.ich_func = nvme_ctrlr_start_config_hook;
 	ctrlr->config_hook.ich_arg = ctrlr;
 
 	config_intrhook_establish(&ctrlr->config_hook);
 
 	return (0);
 }
 
 static int
 nvme_detach (device_t dev)
 {
 	struct nvme_controller	*ctrlr = DEVICE2SOFTC(dev);
 
 	nvme_ctrlr_destruct(ctrlr, dev);
 	pci_disable_busmaster(dev);
 	return (0);
 }
 
 static void
 nvme_notify(struct nvme_consumer *cons,
 	    struct nvme_controller *ctrlr)
 {
 	struct nvme_namespace	*ns;
 	void			*ctrlr_cookie;
 	int			cmpset, ns_idx;
 
 	/*
 	 * The consumer may register itself after the nvme devices
 	 *  have registered with the kernel, but before the
 	 *  driver has completed initialization.  In that case,
 	 *  return here, and when initialization completes, the
 	 *  controller will make sure the consumer gets notified.
 	 */
 	if (!ctrlr->is_initialized)
 		return;
 
 	cmpset = atomic_cmpset_32(&ctrlr->notification_sent, 0, 1);
 
 	if (cmpset == 0)
 		return;
 
 	if (cons->ctrlr_fn != NULL)
 		ctrlr_cookie = (*cons->ctrlr_fn)(ctrlr);
 	else
 		ctrlr_cookie = NULL;
 	ctrlr->cons_cookie[cons->id] = ctrlr_cookie;
 	if (ctrlr->is_failed) {
 		if (cons->fail_fn != NULL)
 			(*cons->fail_fn)(ctrlr_cookie);
 		/*
 		 * Do not notify consumers about the namespaces of a
 		 *  failed controller.
 		 */
 		return;
 	}
 	for (ns_idx = 0; ns_idx < ctrlr->cdata.nn; ns_idx++) {
 		ns = &ctrlr->ns[ns_idx];
 		if (cons->ns_fn != NULL)
 			ns->cons_cookie[cons->id] =
 			    (*cons->ns_fn)(ns, ctrlr_cookie);
 	}
 }
 
 void
 nvme_notify_new_controller(struct nvme_controller *ctrlr)
 {
 	int i;
 
 	for (i = 0; i < NVME_MAX_CONSUMERS; i++) {
 		if (nvme_consumer[i].id != INVALID_CONSUMER_ID) {
 			nvme_notify(&nvme_consumer[i], ctrlr);
 		}
 	}
 }
 
 static void
 nvme_notify_new_consumer(struct nvme_consumer *cons)
 {
 	device_t		*devlist;
 	struct nvme_controller	*ctrlr;
 	int			dev_idx, devcount;
 
 	if (devclass_get_devices(nvme_devclass, &devlist, &devcount))
 		return;
 
 	for (dev_idx = 0; dev_idx < devcount; dev_idx++) {
 		ctrlr = DEVICE2SOFTC(devlist[dev_idx]);
 		nvme_notify(cons, ctrlr);
 	}
 
 	free(devlist, M_TEMP);
 }
 
 void
 nvme_notify_async_consumers(struct nvme_controller *ctrlr,
 			    const struct nvme_completion *async_cpl,
 			    uint32_t log_page_id, void *log_page_buffer,
 			    uint32_t log_page_size)
 {
 	struct nvme_consumer	*cons;
 	uint32_t		i;
 
 	for (i = 0; i < NVME_MAX_CONSUMERS; i++) {
 		cons = &nvme_consumer[i];
 		if (cons->id != INVALID_CONSUMER_ID && cons->async_fn != NULL)
 			(*cons->async_fn)(ctrlr->cons_cookie[i], async_cpl,
 			    log_page_id, log_page_buffer, log_page_size);
 	}
 }
 
 void
 nvme_notify_fail_consumers(struct nvme_controller *ctrlr)
 {
 	struct nvme_consumer	*cons;
 	uint32_t		i;
 
 	/*
 	 * This controller failed during initialization (i.e. IDENTIFY
 	 *  command failed or timed out).  Do not notify any nvme
 	 *  consumers of the failure here, since the consumer does not
 	 *  even know about the controller yet.
 	 */
 	if (!ctrlr->is_initialized)
 		return;
 
 	for (i = 0; i < NVME_MAX_CONSUMERS; i++) {
 		cons = &nvme_consumer[i];
 		if (cons->id != INVALID_CONSUMER_ID && cons->fail_fn != NULL)
 			cons->fail_fn(ctrlr->cons_cookie[i]);
 	}
 }
 
 struct nvme_consumer *
 nvme_register_consumer(nvme_cons_ns_fn_t ns_fn, nvme_cons_ctrlr_fn_t ctrlr_fn,
 		       nvme_cons_async_fn_t async_fn,
 		       nvme_cons_fail_fn_t fail_fn)
 {
 	int i;
 
 	/*
 	 * TODO: add locking around consumer registration.  Not an issue
 	 *  right now since we only have one nvme consumer - nvd(4).
 	 */
 	for (i = 0; i < NVME_MAX_CONSUMERS; i++)
 		if (nvme_consumer[i].id == INVALID_CONSUMER_ID) {
 			nvme_consumer[i].id = i;
 			nvme_consumer[i].ns_fn = ns_fn;
 			nvme_consumer[i].ctrlr_fn = ctrlr_fn;
 			nvme_consumer[i].async_fn = async_fn;
 			nvme_consumer[i].fail_fn = fail_fn;
 
 			nvme_notify_new_consumer(&nvme_consumer[i]);
 			return (&nvme_consumer[i]);
 		}
 
 	printf("nvme(4): consumer not registered - no slots available\n");
 	return (NULL);
 }
 
 void
 nvme_unregister_consumer(struct nvme_consumer *consumer)
 {
 
 	consumer->id = INVALID_CONSUMER_ID;
 }
 
 void
 nvme_completion_poll_cb(void *arg, const struct nvme_completion *cpl)
 {
 	struct nvme_completion_poll_status	*status = arg;
 
 	/*
 	 * Copy status into the argument passed by the caller, so that
 	 *  the caller can check the status to determine if the
 	 *  the request passed or failed.
 	 */
 	memcpy(&status->cpl, cpl, sizeof(*cpl));
 	wmb();
 	status->done = TRUE;
 }
Index: projects/release-pkg/sys/dev/nvme/nvme_ctrlr.c
===================================================================
--- projects/release-pkg/sys/dev/nvme/nvme_ctrlr.c	(revision 293335)
+++ projects/release-pkg/sys/dev/nvme/nvme_ctrlr.c	(revision 293336)
@@ -1,1211 +1,1215 @@
 /*-
  * Copyright (C) 2012-2015 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/buf.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
 #include <sys/ioccom.h>
 #include <sys/proc.h>
 #include <sys/smp.h>
 #include <sys/uio.h>
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 
 #include "nvme_private.h"
 
+/*
+ * Used for calculating number of CPUs to assign to each core and number of I/O
+ *  queues to allocate per controller.
+ */
+#define NVME_CEILING(num, div)	((((num) - 1) / (div)) + 1)
+
 static void nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
 						struct nvme_async_event_request *aer);
+static void nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr);
 
 static int
 nvme_ctrlr_allocate_bar(struct nvme_controller *ctrlr)
 {
 
 	ctrlr->resource_id = PCIR_BAR(0);
 
 	ctrlr->resource = bus_alloc_resource(ctrlr->dev, SYS_RES_MEMORY,
 	    &ctrlr->resource_id, 0, ~0, 1, RF_ACTIVE);
 
 	if(ctrlr->resource == NULL) {
 		nvme_printf(ctrlr, "unable to allocate pci resource\n");
 		return (ENOMEM);
 	}
 
 	ctrlr->bus_tag = rman_get_bustag(ctrlr->resource);
 	ctrlr->bus_handle = rman_get_bushandle(ctrlr->resource);
 	ctrlr->regs = (struct nvme_registers *)ctrlr->bus_handle;
 
 	/*
 	 * The NVMe spec allows for the MSI-X table to be placed behind
 	 *  BAR 4/5, separate from the control/doorbell registers.  Always
 	 *  try to map this bar, because it must be mapped prior to calling
 	 *  pci_alloc_msix().  If the table isn't behind BAR 4/5,
 	 *  bus_alloc_resource() will just return NULL which is OK.
 	 */
 	ctrlr->bar4_resource_id = PCIR_BAR(4);
 	ctrlr->bar4_resource = bus_alloc_resource(ctrlr->dev, SYS_RES_MEMORY,
 	    &ctrlr->bar4_resource_id, 0, ~0, 1, RF_ACTIVE);
 
 	return (0);
 }
 
 static void
 nvme_ctrlr_construct_admin_qpair(struct nvme_controller *ctrlr)
 {
 	struct nvme_qpair	*qpair;
 	uint32_t		num_entries;
 
 	qpair = &ctrlr->adminq;
 
 	num_entries = NVME_ADMIN_ENTRIES;
 	TUNABLE_INT_FETCH("hw.nvme.admin_entries", &num_entries);
 	/*
 	 * If admin_entries was overridden to an invalid value, revert it
 	 *  back to our default value.
 	 */
 	if (num_entries < NVME_MIN_ADMIN_ENTRIES ||
 	    num_entries > NVME_MAX_ADMIN_ENTRIES) {
 		nvme_printf(ctrlr, "invalid hw.nvme.admin_entries=%d "
 		    "specified\n", num_entries);
 		num_entries = NVME_ADMIN_ENTRIES;
 	}
 
 	/*
 	 * The admin queue's max xfer size is treated differently than the
 	 *  max I/O xfer size.  16KB is sufficient here - maybe even less?
 	 */
 	nvme_qpair_construct(qpair, 
 			     0, /* qpair ID */
 			     0, /* vector */
 			     num_entries,
 			     NVME_ADMIN_TRACKERS,
 			     ctrlr);
 }
 
 static int
 nvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr)
 {
 	struct nvme_qpair	*qpair;
 	union cap_lo_register	cap_lo;
 	int			i, num_entries, num_trackers;
 
 	num_entries = NVME_IO_ENTRIES;
 	TUNABLE_INT_FETCH("hw.nvme.io_entries", &num_entries);
 
 	/*
 	 * NVMe spec sets a hard limit of 64K max entries, but
 	 *  devices may specify a smaller limit, so we need to check
 	 *  the MQES field in the capabilities register.
 	 */
 	cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo);
 	num_entries = min(num_entries, cap_lo.bits.mqes+1);
 
 	num_trackers = NVME_IO_TRACKERS;
 	TUNABLE_INT_FETCH("hw.nvme.io_trackers", &num_trackers);
 
 	num_trackers = max(num_trackers, NVME_MIN_IO_TRACKERS);
 	num_trackers = min(num_trackers, NVME_MAX_IO_TRACKERS);
 	/*
 	 * No need to have more trackers than entries in the submit queue.
 	 *  Note also that for a queue size of N, we can only have (N-1)
 	 *  commands outstanding, hence the "-1" here.
 	 */
 	num_trackers = min(num_trackers, (num_entries-1));
 
+	/*
+	 * This was calculated previously when setting up interrupts, but
+	 *  a controller could theoretically support fewer I/O queues than
+	 *  MSI-X vectors.  So calculate again here just to be safe.
+	 */
+	ctrlr->num_cpus_per_ioq = NVME_CEILING(mp_ncpus, ctrlr->num_io_queues);
+
 	ctrlr->ioq = malloc(ctrlr->num_io_queues * sizeof(struct nvme_qpair),
 	    M_NVME, M_ZERO | M_WAITOK);
 
 	for (i = 0; i < ctrlr->num_io_queues; i++) {
 		qpair = &ctrlr->ioq[i];
 
 		/*
 		 * Admin queue has ID=0. IO queues start at ID=1 -
 		 *  hence the 'i+1' here.
 		 *
 		 * For I/O queues, use the controller-wide max_xfer_size
 		 *  calculated in nvme_attach().
 		 */
 		nvme_qpair_construct(qpair,
 				     i+1, /* qpair ID */
 				     ctrlr->msix_enabled ? i+1 : 0, /* vector */
 				     num_entries,
 				     num_trackers,
 				     ctrlr);
 
-		if (ctrlr->per_cpu_io_queues)
-			bus_bind_intr(ctrlr->dev, qpair->res, i);
+		/*
+		 * Do not bother binding interrupts if we only have one I/O
+		 *  interrupt thread for this controller.
+		 */
+		if (ctrlr->num_io_queues > 1)
+			bus_bind_intr(ctrlr->dev, qpair->res,
+			    i * ctrlr->num_cpus_per_ioq);
 	}
 
 	return (0);
 }
 
 static void
 nvme_ctrlr_fail(struct nvme_controller *ctrlr)
 {
 	int i;
 
 	ctrlr->is_failed = TRUE;
 	nvme_qpair_fail(&ctrlr->adminq);
 	for (i = 0; i < ctrlr->num_io_queues; i++)
 		nvme_qpair_fail(&ctrlr->ioq[i]);
 	nvme_notify_fail_consumers(ctrlr);
 }
 
 void
 nvme_ctrlr_post_failed_request(struct nvme_controller *ctrlr,
     struct nvme_request *req)
 {
 
 	mtx_lock(&ctrlr->lock);
 	STAILQ_INSERT_TAIL(&ctrlr->fail_req, req, stailq);
 	mtx_unlock(&ctrlr->lock);
 	taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->fail_req_task);
 }
 
 static void
 nvme_ctrlr_fail_req_task(void *arg, int pending)
 {
 	struct nvme_controller	*ctrlr = arg;
 	struct nvme_request	*req;
 
 	mtx_lock(&ctrlr->lock);
 	while (!STAILQ_EMPTY(&ctrlr->fail_req)) {
 		req = STAILQ_FIRST(&ctrlr->fail_req);
 		STAILQ_REMOVE_HEAD(&ctrlr->fail_req, stailq);
 		nvme_qpair_manual_complete_request(req->qpair, req,
 		    NVME_SCT_GENERIC, NVME_SC_ABORTED_BY_REQUEST, TRUE);
 	}
 	mtx_unlock(&ctrlr->lock);
 }
 
 static int
 nvme_ctrlr_wait_for_ready(struct nvme_controller *ctrlr, int desired_val)
 {
 	int ms_waited;
 	union cc_register cc;
 	union csts_register csts;
 
 	cc.raw = nvme_mmio_read_4(ctrlr, cc);
 	csts.raw = nvme_mmio_read_4(ctrlr, csts);
 
 	if (cc.bits.en != desired_val) {
 		nvme_printf(ctrlr, "%s called with desired_val = %d "
 		    "but cc.en = %d\n", __func__, desired_val, cc.bits.en);
 		return (ENXIO);
 	}
 
 	ms_waited = 0;
 
 	while (csts.bits.rdy != desired_val) {
 		DELAY(1000);
 		if (ms_waited++ > ctrlr->ready_timeout_in_ms) {
 			nvme_printf(ctrlr, "controller ready did not become %d "
 			    "within %d ms\n", desired_val, ctrlr->ready_timeout_in_ms);
 			return (ENXIO);
 		}
 		csts.raw = nvme_mmio_read_4(ctrlr, csts);
 	}
 
 	return (0);
 }
 
 static void
 nvme_ctrlr_disable(struct nvme_controller *ctrlr)
 {
 	union cc_register cc;
 	union csts_register csts;
 
 	cc.raw = nvme_mmio_read_4(ctrlr, cc);
 	csts.raw = nvme_mmio_read_4(ctrlr, csts);
 
 	if (cc.bits.en == 1 && csts.bits.rdy == 0)
 		nvme_ctrlr_wait_for_ready(ctrlr, 1);
 
 	cc.bits.en = 0;
 	nvme_mmio_write_4(ctrlr, cc, cc.raw);
 	DELAY(5000);
 	nvme_ctrlr_wait_for_ready(ctrlr, 0);
 }
 
 static int
 nvme_ctrlr_enable(struct nvme_controller *ctrlr)
 {
 	union cc_register	cc;
 	union csts_register	csts;
 	union aqa_register	aqa;
 
 	cc.raw = nvme_mmio_read_4(ctrlr, cc);
 	csts.raw = nvme_mmio_read_4(ctrlr, csts);
 
 	if (cc.bits.en == 1) {
 		if (csts.bits.rdy == 1)
 			return (0);
 		else
 			return (nvme_ctrlr_wait_for_ready(ctrlr, 1));
 	}
 
 	nvme_mmio_write_8(ctrlr, asq, ctrlr->adminq.cmd_bus_addr);
 	DELAY(5000);
 	nvme_mmio_write_8(ctrlr, acq, ctrlr->adminq.cpl_bus_addr);
 	DELAY(5000);
 
 	aqa.raw = 0;
 	/* acqs and asqs are 0-based. */
 	aqa.bits.acqs = ctrlr->adminq.num_entries-1;
 	aqa.bits.asqs = ctrlr->adminq.num_entries-1;
 	nvme_mmio_write_4(ctrlr, aqa, aqa.raw);
 	DELAY(5000);
 
 	cc.bits.en = 1;
 	cc.bits.css = 0;
 	cc.bits.ams = 0;
 	cc.bits.shn = 0;
 	cc.bits.iosqes = 6; /* SQ entry size == 64 == 2^6 */
 	cc.bits.iocqes = 4; /* CQ entry size == 16 == 2^4 */
 
 	/* This evaluates to 0, which is according to spec. */
 	cc.bits.mps = (PAGE_SIZE >> 13);
 
 	nvme_mmio_write_4(ctrlr, cc, cc.raw);
 	DELAY(5000);
 
 	return (nvme_ctrlr_wait_for_ready(ctrlr, 1));
 }
 
 int
 nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr)
 {
 	int i;
 
 	nvme_admin_qpair_disable(&ctrlr->adminq);
-	for (i = 0; i < ctrlr->num_io_queues; i++)
-		nvme_io_qpair_disable(&ctrlr->ioq[i]);
+	/*
+	 * I/O queues are not allocated before the initial HW
+	 *  reset, so do not try to disable them.  Use is_initialized
+	 *  to determine if this is the initial HW reset.
+	 */
+	if (ctrlr->is_initialized) {
+		for (i = 0; i < ctrlr->num_io_queues; i++)
+			nvme_io_qpair_disable(&ctrlr->ioq[i]);
+	}
 
 	DELAY(100*1000);
 
 	nvme_ctrlr_disable(ctrlr);
 	return (nvme_ctrlr_enable(ctrlr));
 }
 
 void
 nvme_ctrlr_reset(struct nvme_controller *ctrlr)
 {
 	int cmpset;
 
 	cmpset = atomic_cmpset_32(&ctrlr->is_resetting, 0, 1);
 
 	if (cmpset == 0 || ctrlr->is_failed)
 		/*
 		 * Controller is already resetting or has failed.  Return
 		 *  immediately since there is no need to kick off another
 		 *  reset in these cases.
 		 */
 		return;
 
 	taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->reset_task);
 }
 
 static int
 nvme_ctrlr_identify(struct nvme_controller *ctrlr)
 {
 	struct nvme_completion_poll_status	status;
 
 	status.done = FALSE;
 	nvme_ctrlr_cmd_identify_controller(ctrlr, &ctrlr->cdata,
 	    nvme_completion_poll_cb, &status);
 	while (status.done == FALSE)
 		pause("nvme", 1);
 	if (nvme_completion_is_error(&status.cpl)) {
 		nvme_printf(ctrlr, "nvme_identify_controller failed!\n");
 		return (ENXIO);
 	}
 
 	/*
 	 * Use MDTS to ensure our default max_xfer_size doesn't exceed what the
 	 *  controller supports.
 	 */
 	if (ctrlr->cdata.mdts > 0)
 		ctrlr->max_xfer_size = min(ctrlr->max_xfer_size,
 		    ctrlr->min_page_size * (1 << (ctrlr->cdata.mdts)));
 
 	return (0);
 }
 
 static int
 nvme_ctrlr_set_num_qpairs(struct nvme_controller *ctrlr)
 {
 	struct nvme_completion_poll_status	status;
-	int					cq_allocated, i, sq_allocated;
+	int					cq_allocated, sq_allocated;
 
 	status.done = FALSE;
 	nvme_ctrlr_cmd_set_num_queues(ctrlr, ctrlr->num_io_queues,
 	    nvme_completion_poll_cb, &status);
 	while (status.done == FALSE)
 		pause("nvme", 1);
 	if (nvme_completion_is_error(&status.cpl)) {
 		nvme_printf(ctrlr, "nvme_set_num_queues failed!\n");
 		return (ENXIO);
 	}
 
 	/*
 	 * Data in cdw0 is 0-based.
 	 * Lower 16-bits indicate number of submission queues allocated.
 	 * Upper 16-bits indicate number of completion queues allocated.
 	 */
 	sq_allocated = (status.cpl.cdw0 & 0xFFFF) + 1;
 	cq_allocated = (status.cpl.cdw0 >> 16) + 1;
 
 	/*
-	 * Check that the controller was able to allocate the number of
-	 *  queues we requested.  If not, revert to one IO queue pair.
+	 * Controller may allocate more queues than we requested,
+	 *  so use the minimum of the number requested and what was
+	 *  actually allocated.
 	 */
-	if (sq_allocated < ctrlr->num_io_queues ||
-	    cq_allocated < ctrlr->num_io_queues) {
+	ctrlr->num_io_queues = min(ctrlr->num_io_queues, sq_allocated);
+	ctrlr->num_io_queues = min(ctrlr->num_io_queues, cq_allocated);
 
-		/*
-		 * Destroy extra IO queue pairs that were created at
-		 *  controller construction time but are no longer
-		 *  needed.  This will only happen when a controller
-		 *  supports fewer queues than MSI-X vectors.  This
-		 *  is not the normal case, but does occur with the
-		 *  Chatham prototype board.
-		 */
-		for (i = 1; i < ctrlr->num_io_queues; i++)
-			nvme_io_qpair_destroy(&ctrlr->ioq[i]);
-
-		ctrlr->num_io_queues = 1;
-		ctrlr->per_cpu_io_queues = 0;
-	}
-
 	return (0);
 }
 
 static int
 nvme_ctrlr_create_qpairs(struct nvme_controller *ctrlr)
 {
 	struct nvme_completion_poll_status	status;
 	struct nvme_qpair			*qpair;
 	int					i;
 
 	for (i = 0; i < ctrlr->num_io_queues; i++) {
 		qpair = &ctrlr->ioq[i];
 
 		status.done = FALSE;
 		nvme_ctrlr_cmd_create_io_cq(ctrlr, qpair, qpair->vector,
 		    nvme_completion_poll_cb, &status);
 		while (status.done == FALSE)
 			pause("nvme", 1);
 		if (nvme_completion_is_error(&status.cpl)) {
 			nvme_printf(ctrlr, "nvme_create_io_cq failed!\n");
 			return (ENXIO);
 		}
 
 		status.done = FALSE;
 		nvme_ctrlr_cmd_create_io_sq(qpair->ctrlr, qpair,
 		    nvme_completion_poll_cb, &status);
 		while (status.done == FALSE)
 			pause("nvme", 1);
 		if (nvme_completion_is_error(&status.cpl)) {
 			nvme_printf(ctrlr, "nvme_create_io_sq failed!\n");
 			return (ENXIO);
 		}
 	}
 
 	return (0);
 }
 
 static int
 nvme_ctrlr_construct_namespaces(struct nvme_controller *ctrlr)
 {
 	struct nvme_namespace	*ns;
 	int			i, status;
 
 	for (i = 0; i < ctrlr->cdata.nn; i++) {
 		ns = &ctrlr->ns[i];
 		status = nvme_ns_construct(ns, i+1, ctrlr);
 		if (status != 0)
 			return (status);
 	}
 
 	return (0);
 }
 
 static boolean_t
 is_log_page_id_valid(uint8_t page_id)
 {
 
 	switch (page_id) {
 	case NVME_LOG_ERROR:
 	case NVME_LOG_HEALTH_INFORMATION:
 	case NVME_LOG_FIRMWARE_SLOT:
 		return (TRUE);
 	}
 
 	return (FALSE);
 }
 
 static uint32_t
 nvme_ctrlr_get_log_page_size(struct nvme_controller *ctrlr, uint8_t page_id)
 {
 	uint32_t	log_page_size;
 
 	switch (page_id) {
 	case NVME_LOG_ERROR:
 		log_page_size = min(
 		    sizeof(struct nvme_error_information_entry) *
 		    ctrlr->cdata.elpe,
 		    NVME_MAX_AER_LOG_SIZE);
 		break;
 	case NVME_LOG_HEALTH_INFORMATION:
 		log_page_size = sizeof(struct nvme_health_information_page);
 		break;
 	case NVME_LOG_FIRMWARE_SLOT:
 		log_page_size = sizeof(struct nvme_firmware_page);
 		break;
 	default:
 		log_page_size = 0;
 		break;
 	}
 
 	return (log_page_size);
 }
 
 static void
 nvme_ctrlr_log_critical_warnings(struct nvme_controller *ctrlr,
     union nvme_critical_warning_state state)
 {
 
 	if (state.bits.available_spare == 1)
 		nvme_printf(ctrlr, "available spare space below threshold\n");
 
 	if (state.bits.temperature == 1)
 		nvme_printf(ctrlr, "temperature above threshold\n");
 
 	if (state.bits.device_reliability == 1)
 		nvme_printf(ctrlr, "device reliability degraded\n");
 
 	if (state.bits.read_only == 1)
 		nvme_printf(ctrlr, "media placed in read only mode\n");
 
 	if (state.bits.volatile_memory_backup == 1)
 		nvme_printf(ctrlr, "volatile memory backup device failed\n");
 
 	if (state.bits.reserved != 0)
 		nvme_printf(ctrlr,
 		    "unknown critical warning(s): state = 0x%02x\n", state.raw);
 }
 
 static void
 nvme_ctrlr_async_event_log_page_cb(void *arg, const struct nvme_completion *cpl)
 {
 	struct nvme_async_event_request		*aer = arg;
 	struct nvme_health_information_page	*health_info;
 
 	/*
 	 * If the log page fetch for some reason completed with an error,
 	 *  don't pass log page data to the consumers.  In practice, this case
 	 *  should never happen.
 	 */
 	if (nvme_completion_is_error(cpl))
 		nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
 		    aer->log_page_id, NULL, 0);
 	else {
 		if (aer->log_page_id == NVME_LOG_HEALTH_INFORMATION) {
 			health_info = (struct nvme_health_information_page *)
 			    aer->log_page_buffer;
 			nvme_ctrlr_log_critical_warnings(aer->ctrlr,
 			    health_info->critical_warning);
 			/*
 			 * Critical warnings reported through the
 			 *  SMART/health log page are persistent, so
 			 *  clear the associated bits in the async event
 			 *  config so that we do not receive repeated
 			 *  notifications for the same event.
 			 */
 			aer->ctrlr->async_event_config.raw &=
 			    ~health_info->critical_warning.raw;
 			nvme_ctrlr_cmd_set_async_event_config(aer->ctrlr,
 			    aer->ctrlr->async_event_config, NULL, NULL);
 		}
 
 
 		/*
 		 * Pass the cpl data from the original async event completion,
 		 *  not the log page fetch.
 		 */
 		nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
 		    aer->log_page_id, aer->log_page_buffer, aer->log_page_size);
 	}
 
 	/*
 	 * Repost another asynchronous event request to replace the one
 	 *  that just completed.
 	 */
 	nvme_ctrlr_construct_and_submit_aer(aer->ctrlr, aer);
 }
 
 static void
 nvme_ctrlr_async_event_cb(void *arg, const struct nvme_completion *cpl)
 {
 	struct nvme_async_event_request	*aer = arg;
 
 	if (nvme_completion_is_error(cpl)) {
 		/*
 		 *  Do not retry failed async event requests.  This avoids
 		 *  infinite loops where a new async event request is submitted
 		 *  to replace the one just failed, only to fail again and
 		 *  perpetuate the loop.
 		 */
 		return;
 	}
 
 	/* Associated log page is in bits 23:16 of completion entry dw0. */
 	aer->log_page_id = (cpl->cdw0 & 0xFF0000) >> 16;
 
 	nvme_printf(aer->ctrlr, "async event occurred (log page id=0x%x)\n",
 	    aer->log_page_id);
 
 	if (is_log_page_id_valid(aer->log_page_id)) {
 		aer->log_page_size = nvme_ctrlr_get_log_page_size(aer->ctrlr,
 		    aer->log_page_id);
 		memcpy(&aer->cpl, cpl, sizeof(*cpl));
 		nvme_ctrlr_cmd_get_log_page(aer->ctrlr, aer->log_page_id,
 		    NVME_GLOBAL_NAMESPACE_TAG, aer->log_page_buffer,
 		    aer->log_page_size, nvme_ctrlr_async_event_log_page_cb,
 		    aer);
 		/* Wait to notify consumers until after log page is fetched. */
 	} else {
 		nvme_notify_async_consumers(aer->ctrlr, cpl, aer->log_page_id,
 		    NULL, 0);
 
 		/*
 		 * Repost another asynchronous event request to replace the one
 		 *  that just completed.
 		 */
 		nvme_ctrlr_construct_and_submit_aer(aer->ctrlr, aer);
 	}
 }
 
 static void
 nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
     struct nvme_async_event_request *aer)
 {
 	struct nvme_request *req;
 
 	aer->ctrlr = ctrlr;
 	req = nvme_allocate_request_null(nvme_ctrlr_async_event_cb, aer);
 	aer->req = req;
 
 	/*
 	 * Disable timeout here, since asynchronous event requests should by
 	 *  nature never be timed out.
 	 */
 	req->timeout = FALSE;
 	req->cmd.opc = NVME_OPC_ASYNC_EVENT_REQUEST;
 	nvme_ctrlr_submit_admin_request(ctrlr, req);
 }
 
 static void
 nvme_ctrlr_configure_aer(struct nvme_controller *ctrlr)
 {
 	struct nvme_completion_poll_status	status;
 	struct nvme_async_event_request		*aer;
 	uint32_t				i;
 
 	ctrlr->async_event_config.raw = 0xFF;
 	ctrlr->async_event_config.bits.reserved = 0;
 
 	status.done = FALSE;
 	nvme_ctrlr_cmd_get_feature(ctrlr, NVME_FEAT_TEMPERATURE_THRESHOLD,
 	    0, NULL, 0, nvme_completion_poll_cb, &status);
 	while (status.done == FALSE)
 		pause("nvme", 1);
 	if (nvme_completion_is_error(&status.cpl) ||
 	    (status.cpl.cdw0 & 0xFFFF) == 0xFFFF ||
 	    (status.cpl.cdw0 & 0xFFFF) == 0x0000) {
 		nvme_printf(ctrlr, "temperature threshold not supported\n");
 		ctrlr->async_event_config.bits.temperature = 0;
 	}
 
 	nvme_ctrlr_cmd_set_async_event_config(ctrlr,
 	    ctrlr->async_event_config, NULL, NULL);
 
 	/* aerl is a zero-based value, so we need to add 1 here. */
 	ctrlr->num_aers = min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl+1));
 
 	for (i = 0; i < ctrlr->num_aers; i++) {
 		aer = &ctrlr->aer[i];
 		nvme_ctrlr_construct_and_submit_aer(ctrlr, aer);
 	}
 }
 
 static void
 nvme_ctrlr_configure_int_coalescing(struct nvme_controller *ctrlr)
 {
 
 	ctrlr->int_coal_time = 0;
 	TUNABLE_INT_FETCH("hw.nvme.int_coal_time",
 	    &ctrlr->int_coal_time);
 
 	ctrlr->int_coal_threshold = 0;
 	TUNABLE_INT_FETCH("hw.nvme.int_coal_threshold",
 	    &ctrlr->int_coal_threshold);
 
 	nvme_ctrlr_cmd_set_interrupt_coalescing(ctrlr, ctrlr->int_coal_time,
 	    ctrlr->int_coal_threshold, NULL, NULL);
 }
 
 static void
 nvme_ctrlr_start(void *ctrlr_arg)
 {
 	struct nvme_controller *ctrlr = ctrlr_arg;
+	uint32_t old_num_io_queues;
 	int i;
 
-	nvme_qpair_reset(&ctrlr->adminq);
+	/*
+	 * Only reset adminq here when we are restarting the
+	 *  controller after a reset.  During initialization,
+	 *  we have already submitted admin commands to get
+	 *  the number of I/O queues supported, so cannot reset
+	 *  the adminq again here.
+	 */
+	if (ctrlr->is_resetting) {
+		nvme_qpair_reset(&ctrlr->adminq);
+	}
+
 	for (i = 0; i < ctrlr->num_io_queues; i++)
 		nvme_qpair_reset(&ctrlr->ioq[i]);
 
 	nvme_admin_qpair_enable(&ctrlr->adminq);
 
 	if (nvme_ctrlr_identify(ctrlr) != 0) {
 		nvme_ctrlr_fail(ctrlr);
 		return;
 	}
 
+	/*
+	 * The number of qpairs are determined during controller initialization,
+	 *  including using NVMe SET_FEATURES/NUMBER_OF_QUEUES to determine the
+	 *  HW limit.  We call SET_FEATURES again here so that it gets called
+	 *  after any reset for controllers that depend on the driver to
+	 *  explicit specify how many queues it will use.  This value should
+	 *  never change between resets, so panic if somehow that does happen.
+	 */
+	old_num_io_queues = ctrlr->num_io_queues;
 	if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0) {
 		nvme_ctrlr_fail(ctrlr);
 		return;
 	}
 
+	if (old_num_io_queues != ctrlr->num_io_queues) {
+		panic("num_io_queues changed from %u to %u", old_num_io_queues,
+		    ctrlr->num_io_queues);
+	}
+
 	if (nvme_ctrlr_create_qpairs(ctrlr) != 0) {
 		nvme_ctrlr_fail(ctrlr);
 		return;
 	}
 
 	if (nvme_ctrlr_construct_namespaces(ctrlr) != 0) {
 		nvme_ctrlr_fail(ctrlr);
 		return;
 	}
 
 	nvme_ctrlr_configure_aer(ctrlr);
 	nvme_ctrlr_configure_int_coalescing(ctrlr);
 
 	for (i = 0; i < ctrlr->num_io_queues; i++)
 		nvme_io_qpair_enable(&ctrlr->ioq[i]);
 }
 
 void
 nvme_ctrlr_start_config_hook(void *arg)
 {
 	struct nvme_controller *ctrlr = arg;
 
-	nvme_ctrlr_start(ctrlr);
+	nvme_qpair_reset(&ctrlr->adminq);
+	nvme_admin_qpair_enable(&ctrlr->adminq);
+
+	if (nvme_ctrlr_set_num_qpairs(ctrlr) == 0 &&
+	    nvme_ctrlr_construct_io_qpairs(ctrlr) == 0)
+		nvme_ctrlr_start(ctrlr);
+	else
+		nvme_ctrlr_fail(ctrlr);
+
+	nvme_sysctl_initialize_ctrlr(ctrlr);
 	config_intrhook_disestablish(&ctrlr->config_hook);
 
 	ctrlr->is_initialized = 1;
 	nvme_notify_new_controller(ctrlr);
 }
 
 static void
 nvme_ctrlr_reset_task(void *arg, int pending)
 {
 	struct nvme_controller	*ctrlr = arg;
 	int			status;
 
 	nvme_printf(ctrlr, "resetting controller\n");
 	status = nvme_ctrlr_hw_reset(ctrlr);
 	/*
 	 * Use pause instead of DELAY, so that we yield to any nvme interrupt
 	 *  handlers on this CPU that were blocked on a qpair lock. We want
 	 *  all nvme interrupts completed before proceeding with restarting the
 	 *  controller.
 	 *
 	 * XXX - any way to guarantee the interrupt handlers have quiesced?
 	 */
 	pause("nvmereset", hz / 10);
 	if (status == 0)
 		nvme_ctrlr_start(ctrlr);
 	else
 		nvme_ctrlr_fail(ctrlr);
 
 	atomic_cmpset_32(&ctrlr->is_resetting, 1, 0);
 }
 
 static void
 nvme_ctrlr_intx_handler(void *arg)
 {
 	struct nvme_controller *ctrlr = arg;
 
 	nvme_mmio_write_4(ctrlr, intms, 1);
 
 	nvme_qpair_process_completions(&ctrlr->adminq);
 
 	if (ctrlr->ioq[0].cpl)
 		nvme_qpair_process_completions(&ctrlr->ioq[0]);
 
 	nvme_mmio_write_4(ctrlr, intmc, 1);
 }
 
 static int
 nvme_ctrlr_configure_intx(struct nvme_controller *ctrlr)
 {
 
+	ctrlr->msix_enabled = 0;
 	ctrlr->num_io_queues = 1;
-	ctrlr->per_cpu_io_queues = 0;
+	ctrlr->num_cpus_per_ioq = mp_ncpus;
 	ctrlr->rid = 0;
 	ctrlr->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
 	    &ctrlr->rid, RF_SHAREABLE | RF_ACTIVE);
 
 	if (ctrlr->res == NULL) {
 		nvme_printf(ctrlr, "unable to allocate shared IRQ\n");
 		return (ENOMEM);
 	}
 
 	bus_setup_intr(ctrlr->dev, ctrlr->res,
 	    INTR_TYPE_MISC | INTR_MPSAFE, NULL, nvme_ctrlr_intx_handler,
 	    ctrlr, &ctrlr->tag);
 
 	if (ctrlr->tag == NULL) {
 		nvme_printf(ctrlr, "unable to setup intx handler\n");
 		return (ENOMEM);
 	}
 
 	return (0);
 }
 
 static void
 nvme_pt_done(void *arg, const struct nvme_completion *cpl)
 {
 	struct nvme_pt_command *pt = arg;
 
 	bzero(&pt->cpl, sizeof(pt->cpl));
 	pt->cpl.cdw0 = cpl->cdw0;
 	pt->cpl.status = cpl->status;
 	pt->cpl.status.p = 0;
 
 	mtx_lock(pt->driver_lock);
 	wakeup(pt);
 	mtx_unlock(pt->driver_lock);
 }
 
 int
 nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr,
     struct nvme_pt_command *pt, uint32_t nsid, int is_user_buffer,
     int is_admin_cmd)
 {
 	struct nvme_request	*req;
 	struct mtx		*mtx;
 	struct buf		*buf = NULL;
 	int			ret = 0;
 
 	if (pt->len > 0) {
 		if (pt->len > ctrlr->max_xfer_size) {
 			nvme_printf(ctrlr, "pt->len (%d) "
 			    "exceeds max_xfer_size (%d)\n", pt->len,
 			    ctrlr->max_xfer_size);
 			return EIO;
 		}
 		if (is_user_buffer) {
 			/*
 			 * Ensure the user buffer is wired for the duration of
 			 *  this passthrough command.
 			 */
 			PHOLD(curproc);
 			buf = getpbuf(NULL);
 			buf->b_data = pt->buf;
 			buf->b_bufsize = pt->len;
 			buf->b_iocmd = pt->is_read ? BIO_READ : BIO_WRITE;
 #ifdef NVME_UNMAPPED_BIO_SUPPORT
 			if (vmapbuf(buf, 1) < 0) {
 #else
 			if (vmapbuf(buf) < 0) {
 #endif
 				ret = EFAULT;
 				goto err;
 			}
 			req = nvme_allocate_request_vaddr(buf->b_data, pt->len, 
 			    nvme_pt_done, pt);
 		} else
 			req = nvme_allocate_request_vaddr(pt->buf, pt->len,
 			    nvme_pt_done, pt);
 	} else
 		req = nvme_allocate_request_null(nvme_pt_done, pt);
 
 	req->cmd.opc	= pt->cmd.opc;
 	req->cmd.cdw10	= pt->cmd.cdw10;
 	req->cmd.cdw11	= pt->cmd.cdw11;
 	req->cmd.cdw12	= pt->cmd.cdw12;
 	req->cmd.cdw13	= pt->cmd.cdw13;
 	req->cmd.cdw14	= pt->cmd.cdw14;
 	req->cmd.cdw15	= pt->cmd.cdw15;
 
 	req->cmd.nsid = nsid;
 
 	if (is_admin_cmd)
 		mtx = &ctrlr->lock;
 	else
 		mtx = &ctrlr->ns[nsid-1].lock;
 
 	mtx_lock(mtx);
 	pt->driver_lock = mtx;
 
 	if (is_admin_cmd)
 		nvme_ctrlr_submit_admin_request(ctrlr, req);
 	else
 		nvme_ctrlr_submit_io_request(ctrlr, req);
 
 	mtx_sleep(pt, mtx, PRIBIO, "nvme_pt", 0);
 	mtx_unlock(mtx);
 
 	pt->driver_lock = NULL;
 
 err:
 	if (buf != NULL) {
 		relpbuf(buf, NULL);
 		PRELE(curproc);
 	}
 
 	return (ret);
 }
 
 static int
 nvme_ctrlr_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
     struct thread *td)
 {
 	struct nvme_controller			*ctrlr;
 	struct nvme_pt_command			*pt;
 
 	ctrlr = cdev->si_drv1;
 
 	switch (cmd) {
 	case NVME_RESET_CONTROLLER:
 		nvme_ctrlr_reset(ctrlr);
 		break;
 	case NVME_PASSTHROUGH_CMD:
 		pt = (struct nvme_pt_command *)arg;
 		return (nvme_ctrlr_passthrough_cmd(ctrlr, pt, pt->cmd.nsid,
 		    1 /* is_user_buffer */, 1 /* is_admin_cmd */));
 	default:
 		return (ENOTTY);
 	}
 
 	return (0);
 }
 
 static struct cdevsw nvme_ctrlr_cdevsw = {
 	.d_version =	D_VERSION,
 	.d_flags =	0,
 	.d_ioctl =	nvme_ctrlr_ioctl
 };
 
+static void
+nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr)
+{
+	device_t	dev;
+	int		per_cpu_io_queues;
+	int		num_vectors_requested, num_vectors_allocated;
+	int		num_vectors_available;
+
+	dev = ctrlr->dev;
+	per_cpu_io_queues = 1;
+	TUNABLE_INT_FETCH("hw.nvme.per_cpu_io_queues", &per_cpu_io_queues);
+
+	ctrlr->force_intx = 0;
+	TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx);
+
+	/*
+	 * FreeBSD currently cannot allocate more than about 190 vectors at
+	 *  boot, meaning that systems with high core count and many devices
+	 *  requesting per-CPU interrupt vectors will not get their full
+	 *  allotment.  So first, try to allocate as many as we may need to
+	 *  understand what is available, then immediately release them.
+	 *  Then figure out how many of those we will actually use, based on
+	 *  assigning an equal number of cores to each I/O queue.
+	 */
+
+	/* One vector for per core I/O queue, plus one vector for admin queue. */
+	num_vectors_available = min(pci_msix_count(dev), mp_ncpus + 1);
+	if (pci_alloc_msix(dev, &num_vectors_available) != 0) {
+		num_vectors_available = 0;
+	}
+	pci_release_msi(dev);
+
+	if (ctrlr->force_intx || num_vectors_available < 2) {
+		nvme_ctrlr_configure_intx(ctrlr);
+		return;
+	}
+
+	if (per_cpu_io_queues)
+		ctrlr->num_cpus_per_ioq = NVME_CEILING(mp_ncpus, num_vectors_available + 1);
+	else
+		ctrlr->num_cpus_per_ioq = mp_ncpus;
+
+	ctrlr->num_io_queues = NVME_CEILING(mp_ncpus, ctrlr->num_cpus_per_ioq);
+	num_vectors_requested = ctrlr->num_io_queues + 1;
+	num_vectors_allocated = num_vectors_requested;
+
+	/*
+	 * Now just allocate the number of vectors we need.  This should
+	 *  succeed, since we previously called pci_alloc_msix()
+	 *  successfully returning at least this many vectors, but just to
+	 *  be safe, if something goes wrong just revert to INTx.
+	 */
+	if (pci_alloc_msix(dev, &num_vectors_allocated) != 0) {
+		nvme_ctrlr_configure_intx(ctrlr);
+		return;
+	}
+
+	if (num_vectors_allocated < num_vectors_requested) {
+		pci_release_msi(dev);
+		nvme_ctrlr_configure_intx(ctrlr);
+		return;
+	}
+
+	ctrlr->msix_enabled = 1;
+}
+
 int
 nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev)
 {
 	union cap_lo_register	cap_lo;
 	union cap_hi_register	cap_hi;
-	int			i, per_cpu_io_queues, rid;
-	int			num_vectors_requested, num_vectors_allocated;
 	int			status, timeout_period;
 
 	ctrlr->dev = dev;
 
 	mtx_init(&ctrlr->lock, "nvme ctrlr lock", NULL, MTX_DEF);
 
 	status = nvme_ctrlr_allocate_bar(ctrlr);
 
 	if (status != 0)
 		return (status);
 
 	/*
 	 * Software emulators may set the doorbell stride to something
 	 *  other than zero, but this driver is not set up to handle that.
 	 */
 	cap_hi.raw = nvme_mmio_read_4(ctrlr, cap_hi);
 	if (cap_hi.bits.dstrd != 0)
 		return (ENXIO);
 
 	ctrlr->min_page_size = 1 << (12 + cap_hi.bits.mpsmin);
 
 	/* Get ready timeout value from controller, in units of 500ms. */
 	cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo);
 	ctrlr->ready_timeout_in_ms = cap_lo.bits.to * 500;
 
 	timeout_period = NVME_DEFAULT_TIMEOUT_PERIOD;
 	TUNABLE_INT_FETCH("hw.nvme.timeout_period", &timeout_period);
 	timeout_period = min(timeout_period, NVME_MAX_TIMEOUT_PERIOD);
 	timeout_period = max(timeout_period, NVME_MIN_TIMEOUT_PERIOD);
 	ctrlr->timeout_period = timeout_period;
 
 	nvme_retry_count = NVME_DEFAULT_RETRY_COUNT;
 	TUNABLE_INT_FETCH("hw.nvme.retry_count", &nvme_retry_count);
 
-	per_cpu_io_queues = 1;
-	TUNABLE_INT_FETCH("hw.nvme.per_cpu_io_queues", &per_cpu_io_queues);
-	ctrlr->per_cpu_io_queues = per_cpu_io_queues ? TRUE : FALSE;
-
-	if (ctrlr->per_cpu_io_queues)
-		ctrlr->num_io_queues = mp_ncpus;
-	else
-		ctrlr->num_io_queues = 1;
-
-	ctrlr->force_intx = 0;
-	TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx);
-
 	ctrlr->enable_aborts = 0;
 	TUNABLE_INT_FETCH("hw.nvme.enable_aborts", &ctrlr->enable_aborts);
 
-	ctrlr->msix_enabled = 1;
+	nvme_ctrlr_setup_interrupts(ctrlr);
 
-	if (ctrlr->force_intx) {
-		ctrlr->msix_enabled = 0;
-		goto intx;
-	}
-
-	/* One vector per IO queue, plus one vector for admin queue. */
-	num_vectors_requested = ctrlr->num_io_queues + 1;
-
-	/*
-	 * If we cannot even allocate 2 vectors (one for admin, one for
-	 *  I/O), then revert to INTx.
-	 */
-	if (pci_msix_count(dev) < 2) {
-		ctrlr->msix_enabled = 0;
-		goto intx;
-	} else if (pci_msix_count(dev) < num_vectors_requested) {
-		ctrlr->per_cpu_io_queues = FALSE;
-		ctrlr->num_io_queues = 1;
-		num_vectors_requested = 2; /* one for admin, one for I/O */
-	}
-
-	num_vectors_allocated = num_vectors_requested;
-	if (pci_alloc_msix(dev, &num_vectors_allocated) != 0) {
-		ctrlr->msix_enabled = 0;
-		goto intx;
-	} else if (num_vectors_allocated < num_vectors_requested) {
-		if (num_vectors_allocated < 2) {
-			pci_release_msi(dev);
-			ctrlr->msix_enabled = 0;
-			goto intx;
-		} else {
-			ctrlr->per_cpu_io_queues = FALSE;
-			ctrlr->num_io_queues = 1;
-			/*
-			 * Release whatever vectors were allocated, and just
-			 *  reallocate the two needed for the admin and single
-			 *  I/O qpair.
-			 */
-			num_vectors_allocated = 2;
-			pci_release_msi(dev);
-			if (pci_alloc_msix(dev, &num_vectors_allocated) != 0)
-				panic("could not reallocate any vectors\n");
-			if (num_vectors_allocated != 2)
-				panic("could not reallocate 2 vectors\n");
-		}
-	}
-
-	/*
-	 * On earlier FreeBSD releases, there are reports that
-	 *  pci_alloc_msix() can return successfully with all vectors
-	 *  requested, but a subsequent bus_alloc_resource_any()
-	 *  for one of those vectors fails.  This issue occurs more
-	 *  readily with multiple devices using per-CPU vectors.
-	 * To workaround this issue, try to allocate the resources now,
-	 *  and fall back to INTx if we cannot allocate all of them.
-	 *  This issue cannot be reproduced on more recent versions of
-	 *  FreeBSD which have increased the maximum number of MSI-X
-	 *  vectors, but adding the workaround makes it easier for
-	 *  vendors wishing to import this driver into kernels based on
-	 *  older versions of FreeBSD.
-	 */
-	for (i = 0; i < num_vectors_allocated; i++) {
-		rid = i + 1;
-		ctrlr->msi_res[i] = bus_alloc_resource_any(ctrlr->dev,
-		    SYS_RES_IRQ, &rid, RF_ACTIVE);
-
-		if (ctrlr->msi_res[i] == NULL) {
-			ctrlr->msix_enabled = 0;
-			while (i > 0) {
-				i--;
-				bus_release_resource(ctrlr->dev,
-				    SYS_RES_IRQ,
-				    rman_get_rid(ctrlr->msi_res[i]),
-				    ctrlr->msi_res[i]);
-			}
-			pci_release_msi(dev);
-			nvme_printf(ctrlr, "could not obtain all MSI-X "
-			    "resources, reverting to intx\n");
-			break;
-		}
-	}
-
-intx:
-
-	if (!ctrlr->msix_enabled)
-		nvme_ctrlr_configure_intx(ctrlr);
-
 	ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE;
 	nvme_ctrlr_construct_admin_qpair(ctrlr);
-	status = nvme_ctrlr_construct_io_qpairs(ctrlr);
 
-	if (status != 0)
-		return (status);
-
 	ctrlr->cdev = make_dev(&nvme_ctrlr_cdevsw, device_get_unit(dev),
 	    UID_ROOT, GID_WHEEL, 0600, "nvme%d", device_get_unit(dev));
 
 	if (ctrlr->cdev == NULL)
 		return (ENXIO);
 
 	ctrlr->cdev->si_drv1 = (void *)ctrlr;
 
 	ctrlr->taskqueue = taskqueue_create("nvme_taskq", M_WAITOK,
 	    taskqueue_thread_enqueue, &ctrlr->taskqueue);
 	taskqueue_start_threads(&ctrlr->taskqueue, 1, PI_DISK, "nvme taskq");
 
 	ctrlr->is_resetting = 0;
 	ctrlr->is_initialized = 0;
 	ctrlr->notification_sent = 0;
 	TASK_INIT(&ctrlr->reset_task, 0, nvme_ctrlr_reset_task, ctrlr);
 
 	TASK_INIT(&ctrlr->fail_req_task, 0, nvme_ctrlr_fail_req_task, ctrlr);
 	STAILQ_INIT(&ctrlr->fail_req);
 	ctrlr->is_failed = FALSE;
 
 	return (0);
 }
 
 void
 nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev)
 {
 	int				i;
 
 	/*
 	 *  Notify the controller of a shutdown, even though this is due to
 	 *   a driver unload, not a system shutdown (this path is not invoked
 	 *   during shutdown).  This ensures the controller receives a
 	 *   shutdown notification in case the system is shutdown before
 	 *   reloading the driver.
 	 */
 	nvme_ctrlr_shutdown(ctrlr);
 
 	nvme_ctrlr_disable(ctrlr);
 	taskqueue_free(ctrlr->taskqueue);
 
 	for (i = 0; i < NVME_MAX_NAMESPACES; i++)
 		nvme_ns_destruct(&ctrlr->ns[i]);
 
 	if (ctrlr->cdev)
 		destroy_dev(ctrlr->cdev);
 
 	for (i = 0; i < ctrlr->num_io_queues; i++) {
 		nvme_io_qpair_destroy(&ctrlr->ioq[i]);
 	}
 
 	free(ctrlr->ioq, M_NVME);
 
 	nvme_admin_qpair_destroy(&ctrlr->adminq);
 
 	if (ctrlr->resource != NULL) {
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    ctrlr->resource_id, ctrlr->resource);
 	}
 
 	if (ctrlr->bar4_resource != NULL) {
 		bus_release_resource(dev, SYS_RES_MEMORY,
 		    ctrlr->bar4_resource_id, ctrlr->bar4_resource);
 	}
 
 	if (ctrlr->tag)
 		bus_teardown_intr(ctrlr->dev, ctrlr->res, ctrlr->tag);
 
 	if (ctrlr->res)
 		bus_release_resource(ctrlr->dev, SYS_RES_IRQ,
 		    rman_get_rid(ctrlr->res), ctrlr->res);
 
 	if (ctrlr->msix_enabled)
 		pci_release_msi(dev);
 }
 
 void
 nvme_ctrlr_shutdown(struct nvme_controller *ctrlr)
 {
 	union cc_register	cc;
 	union csts_register	csts;
 	int			ticks = 0;
 
 	cc.raw = nvme_mmio_read_4(ctrlr, cc);
 	cc.bits.shn = NVME_SHN_NORMAL;
 	nvme_mmio_write_4(ctrlr, cc, cc.raw);
 	csts.raw = nvme_mmio_read_4(ctrlr, csts);
 	while ((csts.bits.shst != NVME_SHST_COMPLETE) && (ticks++ < 5*hz)) {
 		pause("nvme shn", 1);
 		csts.raw = nvme_mmio_read_4(ctrlr, csts);
 	}
 	if (csts.bits.shst != NVME_SHST_COMPLETE)
 		nvme_printf(ctrlr, "did not complete shutdown within 5 seconds "
 		    "of notification\n");
 }
 
 void
 nvme_ctrlr_submit_admin_request(struct nvme_controller *ctrlr,
     struct nvme_request *req)
 {
 
 	nvme_qpair_submit_request(&ctrlr->adminq, req);
 }
 
 void
 nvme_ctrlr_submit_io_request(struct nvme_controller *ctrlr,
     struct nvme_request *req)
 {
 	struct nvme_qpair       *qpair;
 
-	if (ctrlr->per_cpu_io_queues)
-		qpair = &ctrlr->ioq[curcpu];
-	else
-		qpair = &ctrlr->ioq[0];
-
+	qpair = &ctrlr->ioq[curcpu / ctrlr->num_cpus_per_ioq];
 	nvme_qpair_submit_request(qpair, req);
 }
 
 device_t
 nvme_ctrlr_get_device(struct nvme_controller *ctrlr)
 {
 
 	return (ctrlr->dev);
 }
 
 const struct nvme_controller_data *
 nvme_ctrlr_get_data(struct nvme_controller *ctrlr)
 {
 
 	return (&ctrlr->cdata);
 }
Index: projects/release-pkg/sys/dev/nvme/nvme_private.h
===================================================================
--- projects/release-pkg/sys/dev/nvme/nvme_private.h	(revision 293335)
+++ projects/release-pkg/sys/dev/nvme/nvme_private.h	(revision 293336)
@@ -1,533 +1,531 @@
 /*-
  * Copyright (C) 2012-2014 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __NVME_PRIVATE_H__
 #define __NVME_PRIVATE_H__
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/bus.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/rman.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
 
 #include <vm/uma.h>
 
 #include <machine/bus.h>
 
 #include "nvme.h"
 
 #define DEVICE2SOFTC(dev) ((struct nvme_controller *) device_get_softc(dev))
 
 MALLOC_DECLARE(M_NVME);
 
 #define IDT32_PCI_ID		0x80d0111d /* 32 channel board */
 #define IDT8_PCI_ID		0x80d2111d /* 8 channel board */
 
 /*
  * For commands requiring more than 2 PRP entries, one PRP will be
  *  embedded in the command (prp1), and the rest of the PRP entries
  *  will be in a list pointed to by the command (prp2).  This means
  *  that real max number of PRP entries we support is 32+1, which
  *  results in a max xfer size of 32*PAGE_SIZE.
  */
 #define NVME_MAX_PRP_LIST_ENTRIES	(NVME_MAX_XFER_SIZE / PAGE_SIZE)
 
 #define NVME_ADMIN_TRACKERS	(16)
 #define NVME_ADMIN_ENTRIES	(128)
 /* min and max are defined in admin queue attributes section of spec */
 #define NVME_MIN_ADMIN_ENTRIES	(2)
 #define NVME_MAX_ADMIN_ENTRIES	(4096)
 
 /*
  * NVME_IO_ENTRIES defines the size of an I/O qpair's submission and completion
  *  queues, while NVME_IO_TRACKERS defines the maximum number of I/O that we
  *  will allow outstanding on an I/O qpair at any time.  The only advantage in
  *  having IO_ENTRIES > IO_TRACKERS is for debugging purposes - when dumping
  *  the contents of the submission and completion queues, it will show a longer
  *  history of data.
  */
 #define NVME_IO_ENTRIES		(256)
 #define NVME_IO_TRACKERS	(128)
 #define NVME_MIN_IO_TRACKERS	(4)
 #define NVME_MAX_IO_TRACKERS	(1024)
 
 /*
  * NVME_MAX_IO_ENTRIES is not defined, since it is specified in CC.MQES
  *  for each controller.
  */
 
 #define NVME_INT_COAL_TIME	(0)	/* disabled */
 #define NVME_INT_COAL_THRESHOLD (0)	/* 0-based */
 
 #define NVME_MAX_NAMESPACES	(16)
 #define NVME_MAX_CONSUMERS	(2)
 #define NVME_MAX_ASYNC_EVENTS	(8)
 
 #define NVME_DEFAULT_TIMEOUT_PERIOD	(30)    /* in seconds */
 #define NVME_MIN_TIMEOUT_PERIOD		(5)
 #define NVME_MAX_TIMEOUT_PERIOD		(120)
 
 #define NVME_DEFAULT_RETRY_COUNT	(4)
 
 /* Maximum log page size to fetch for AERs. */
 #define NVME_MAX_AER_LOG_SIZE		(4096)
 
 /*
  * Define CACHE_LINE_SIZE here for older FreeBSD versions that do not define
  *  it.
  */
 #ifndef CACHE_LINE_SIZE
 #define CACHE_LINE_SIZE		(64)
 #endif
 
 /*
  * Use presence of the BIO_UNMAPPED flag to determine whether unmapped I/O
  *  support and the bus_dmamap_load_bio API are available on the target
  *  kernel.  This will ease porting back to earlier stable branches at a
  *  later point.
  */
 #ifdef BIO_UNMAPPED
 #define NVME_UNMAPPED_BIO_SUPPORT
 #endif
 
 extern uma_zone_t	nvme_request_zone;
 extern int32_t		nvme_retry_count;
 
 struct nvme_completion_poll_status {
 
 	struct nvme_completion	cpl;
 	boolean_t		done;
 };
 
 #define NVME_REQUEST_VADDR	1
 #define NVME_REQUEST_NULL	2 /* For requests with no payload. */
 #define NVME_REQUEST_UIO	3
 #ifdef NVME_UNMAPPED_BIO_SUPPORT
 #define NVME_REQUEST_BIO	4
 #endif
 
 struct nvme_request {
 
 	struct nvme_command		cmd;
 	struct nvme_qpair		*qpair;
 	union {
 		void			*payload;
 		struct bio		*bio;
 	} u;
 	uint32_t			type;
 	uint32_t			payload_size;
 	boolean_t			timeout;
 	nvme_cb_fn_t			cb_fn;
 	void				*cb_arg;
 	int32_t				retries;
 	STAILQ_ENTRY(nvme_request)	stailq;
 };
 
 struct nvme_async_event_request {
 
 	struct nvme_controller		*ctrlr;
 	struct nvme_request		*req;
 	struct nvme_completion		cpl;
 	uint32_t			log_page_id;
 	uint32_t			log_page_size;
 	uint8_t				log_page_buffer[NVME_MAX_AER_LOG_SIZE];
 };
 
 struct nvme_tracker {
 
 	TAILQ_ENTRY(nvme_tracker)	tailq;
 	struct nvme_request		*req;
 	struct nvme_qpair		*qpair;
 	struct callout			timer;
 	bus_dmamap_t			payload_dma_map;
 	uint16_t			cid;
 
 	uint64_t			prp[NVME_MAX_PRP_LIST_ENTRIES];
 	bus_addr_t			prp_bus_addr;
 	bus_dmamap_t			prp_dma_map;
 };
 
 struct nvme_qpair {
 
 	struct nvme_controller	*ctrlr;
 	uint32_t		id;
 	uint32_t		phase;
 
 	uint16_t		vector;
 	int			rid;
 	struct resource		*res;
 	void 			*tag;
 
 	uint32_t		num_entries;
 	uint32_t		num_trackers;
 	uint32_t		sq_tdbl_off;
 	uint32_t		cq_hdbl_off;
 
 	uint32_t		sq_head;
 	uint32_t		sq_tail;
 	uint32_t		cq_head;
 
 	int64_t			num_cmds;
 	int64_t			num_intr_handler_calls;
 
 	struct nvme_command	*cmd;
 	struct nvme_completion	*cpl;
 
 	bus_dma_tag_t		dma_tag;
 	bus_dma_tag_t		dma_tag_payload;
 
 	bus_dmamap_t		cmd_dma_map;
 	uint64_t		cmd_bus_addr;
 
 	bus_dmamap_t		cpl_dma_map;
 	uint64_t		cpl_bus_addr;
 
 	TAILQ_HEAD(, nvme_tracker)	free_tr;
 	TAILQ_HEAD(, nvme_tracker)	outstanding_tr;
 	STAILQ_HEAD(, nvme_request)	queued_req;
 
 	struct nvme_tracker	**act_tr;
 
 	boolean_t		is_enabled;
 
 	struct mtx		lock __aligned(CACHE_LINE_SIZE);
 
 } __aligned(CACHE_LINE_SIZE);
 
 struct nvme_namespace {
 
 	struct nvme_controller		*ctrlr;
 	struct nvme_namespace_data	data;
 	uint16_t			id;
 	uint16_t			flags;
 	struct cdev			*cdev;
 	void				*cons_cookie[NVME_MAX_CONSUMERS];
 	uint32_t			stripesize;
 	struct mtx			lock;
 };
 
 /*
  * One of these per allocated PCI device.
  */
 struct nvme_controller {
 
 	device_t		dev;
 
 	struct mtx		lock;
 
 	uint32_t		ready_timeout_in_ms;
 
 	bus_space_tag_t		bus_tag;
 	bus_space_handle_t	bus_handle;
 	int			resource_id;
 	struct resource		*resource;
 
 	/*
 	 * The NVMe spec allows for the MSI-X table to be placed in BAR 4/5,
 	 *  separate from the control registers which are in BAR 0/1.  These
 	 *  members track the mapping of BAR 4/5 for that reason.
 	 */
 	int			bar4_resource_id;
 	struct resource		*bar4_resource;
 
 	uint32_t		msix_enabled;
 	uint32_t		force_intx;
 	uint32_t		enable_aborts;
 
 	uint32_t		num_io_queues;
-	boolean_t		per_cpu_io_queues;
+	uint32_t		num_cpus_per_ioq;
 
 	/* Fields for tracking progress during controller initialization. */
 	struct intr_config_hook	config_hook;
 	uint32_t		ns_identified;
 	uint32_t		queues_created;
 
 	struct task		reset_task;
 	struct task		fail_req_task;
 	struct taskqueue	*taskqueue;
-
-	struct resource		*msi_res[MAXCPU + 1];
 
 	/* For shared legacy interrupt. */
 	int			rid;
 	struct resource		*res;
 	void			*tag;
 
 	bus_dma_tag_t		hw_desc_tag;
 	bus_dmamap_t		hw_desc_map;
 
 	/** maximum i/o size in bytes */
 	uint32_t		max_xfer_size;
 
 	/** minimum page size supported by this controller in bytes */
 	uint32_t		min_page_size;
 
 	/** interrupt coalescing time period (in microseconds) */
 	uint32_t		int_coal_time;
 
 	/** interrupt coalescing threshold */
 	uint32_t		int_coal_threshold;
 
 	/** timeout period in seconds */
 	uint32_t		timeout_period;
 
 	struct nvme_qpair	adminq;
 	struct nvme_qpair	*ioq;
 
 	struct nvme_registers		*regs;
 
 	struct nvme_controller_data	cdata;
 	struct nvme_namespace		ns[NVME_MAX_NAMESPACES];
 
 	struct cdev			*cdev;
 
 	/** bit mask of warning types currently enabled for async events */
 	union nvme_critical_warning_state	async_event_config;
 
 	uint32_t			num_aers;
 	struct nvme_async_event_request	aer[NVME_MAX_ASYNC_EVENTS];
 
 	void				*cons_cookie[NVME_MAX_CONSUMERS];
 
 	uint32_t			is_resetting;
 	uint32_t			is_initialized;
 	uint32_t			notification_sent;
 
 	boolean_t			is_failed;
 	STAILQ_HEAD(, nvme_request)	fail_req;
 };
 
 #define nvme_mmio_offsetof(reg)						       \
 	offsetof(struct nvme_registers, reg)
 
 #define nvme_mmio_read_4(sc, reg)					       \
 	bus_space_read_4((sc)->bus_tag, (sc)->bus_handle,		       \
 	    nvme_mmio_offsetof(reg))
 
 #define nvme_mmio_write_4(sc, reg, val)					       \
 	bus_space_write_4((sc)->bus_tag, (sc)->bus_handle,		       \
 	    nvme_mmio_offsetof(reg), val)
 
 #define nvme_mmio_write_8(sc, reg, val) \
 	do {								       \
 		bus_space_write_4((sc)->bus_tag, (sc)->bus_handle,	       \
 		    nvme_mmio_offsetof(reg), val & 0xFFFFFFFF); 	       \
 		bus_space_write_4((sc)->bus_tag, (sc)->bus_handle,	       \
 		    nvme_mmio_offsetof(reg)+4,				       \
 		    (val & 0xFFFFFFFF00000000UL) >> 32);		       \
 	} while (0);
 
 #if __FreeBSD_version < 800054
 #define wmb()	__asm volatile("sfence" ::: "memory")
 #define mb()	__asm volatile("mfence" ::: "memory")
 #endif
 
 #define nvme_printf(ctrlr, fmt, args...)	\
     device_printf(ctrlr->dev, fmt, ##args)
 
 void	nvme_ns_test(struct nvme_namespace *ns, u_long cmd, caddr_t arg);
 
 void	nvme_ctrlr_cmd_identify_controller(struct nvme_controller *ctrlr,
 					   void *payload,
 					   nvme_cb_fn_t cb_fn, void *cb_arg);
 void	nvme_ctrlr_cmd_identify_namespace(struct nvme_controller *ctrlr,
 					  uint16_t nsid, void *payload,
 					  nvme_cb_fn_t cb_fn, void *cb_arg);
 void	nvme_ctrlr_cmd_set_interrupt_coalescing(struct nvme_controller *ctrlr,
 						uint32_t microseconds,
 						uint32_t threshold,
 						nvme_cb_fn_t cb_fn,
 						void *cb_arg);
 void	nvme_ctrlr_cmd_get_error_page(struct nvme_controller *ctrlr,
 				      struct nvme_error_information_entry *payload,
 				      uint32_t num_entries, /* 0 = max */
 				      nvme_cb_fn_t cb_fn,
 				      void *cb_arg);
 void	nvme_ctrlr_cmd_get_health_information_page(struct nvme_controller *ctrlr,
 						   uint32_t nsid,
 						   struct nvme_health_information_page *payload,
 						   nvme_cb_fn_t cb_fn,
 						   void *cb_arg);
 void	nvme_ctrlr_cmd_get_firmware_page(struct nvme_controller *ctrlr,
 					 struct nvme_firmware_page *payload,
 					 nvme_cb_fn_t cb_fn,
 					 void *cb_arg);
 void	nvme_ctrlr_cmd_create_io_cq(struct nvme_controller *ctrlr,
 				    struct nvme_qpair *io_que, uint16_t vector,
 				    nvme_cb_fn_t cb_fn, void *cb_arg);
 void	nvme_ctrlr_cmd_create_io_sq(struct nvme_controller *ctrlr,
 				    struct nvme_qpair *io_que,
 				    nvme_cb_fn_t cb_fn, void *cb_arg);
 void	nvme_ctrlr_cmd_delete_io_cq(struct nvme_controller *ctrlr,
 				    struct nvme_qpair *io_que,
 				    nvme_cb_fn_t cb_fn, void *cb_arg);
 void	nvme_ctrlr_cmd_delete_io_sq(struct nvme_controller *ctrlr,
 				    struct nvme_qpair *io_que,
 				    nvme_cb_fn_t cb_fn, void *cb_arg);
 void	nvme_ctrlr_cmd_set_num_queues(struct nvme_controller *ctrlr,
 				      uint32_t num_queues, nvme_cb_fn_t cb_fn,
 				      void *cb_arg);
 void	nvme_ctrlr_cmd_set_async_event_config(struct nvme_controller *ctrlr,
 					      union nvme_critical_warning_state state,
 					      nvme_cb_fn_t cb_fn, void *cb_arg);
 void	nvme_ctrlr_cmd_abort(struct nvme_controller *ctrlr, uint16_t cid,
 			     uint16_t sqid, nvme_cb_fn_t cb_fn, void *cb_arg);
 
 void	nvme_completion_poll_cb(void *arg, const struct nvme_completion *cpl);
 
 int	nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev);
 void	nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev);
 void	nvme_ctrlr_shutdown(struct nvme_controller *ctrlr);
 int	nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr);
 void	nvme_ctrlr_reset(struct nvme_controller *ctrlr);
 /* ctrlr defined as void * to allow use with config_intrhook. */
 void	nvme_ctrlr_start_config_hook(void *ctrlr_arg);
 void	nvme_ctrlr_submit_admin_request(struct nvme_controller *ctrlr,
 					struct nvme_request *req);
 void	nvme_ctrlr_submit_io_request(struct nvme_controller *ctrlr,
 				     struct nvme_request *req);
 void	nvme_ctrlr_post_failed_request(struct nvme_controller *ctrlr,
 				       struct nvme_request *req);
 
 void	nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id,
 			     uint16_t vector, uint32_t num_entries,
 			     uint32_t num_trackers,
 			     struct nvme_controller *ctrlr);
 void	nvme_qpair_submit_tracker(struct nvme_qpair *qpair,
 				  struct nvme_tracker *tr);
 void	nvme_qpair_process_completions(struct nvme_qpair *qpair);
 void	nvme_qpair_submit_request(struct nvme_qpair *qpair,
 				  struct nvme_request *req);
 void	nvme_qpair_reset(struct nvme_qpair *qpair);
 void	nvme_qpair_fail(struct nvme_qpair *qpair);
 void	nvme_qpair_manual_complete_request(struct nvme_qpair *qpair,
 					   struct nvme_request *req,
 					   uint32_t sct, uint32_t sc,
 					   boolean_t print_on_error);
 
 void	nvme_admin_qpair_enable(struct nvme_qpair *qpair);
 void	nvme_admin_qpair_disable(struct nvme_qpair *qpair);
 void	nvme_admin_qpair_destroy(struct nvme_qpair *qpair);
 
 void	nvme_io_qpair_enable(struct nvme_qpair *qpair);
 void	nvme_io_qpair_disable(struct nvme_qpair *qpair);
 void	nvme_io_qpair_destroy(struct nvme_qpair *qpair);
 
 int	nvme_ns_construct(struct nvme_namespace *ns, uint16_t id,
 			  struct nvme_controller *ctrlr);
 void	nvme_ns_destruct(struct nvme_namespace *ns);
 
 void	nvme_sysctl_initialize_ctrlr(struct nvme_controller *ctrlr);
 
 void	nvme_dump_command(struct nvme_command *cmd);
 void	nvme_dump_completion(struct nvme_completion *cpl);
 
 static __inline void
 nvme_single_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
 {
 	uint64_t *bus_addr = (uint64_t *)arg;
 
 	if (error != 0)
 		printf("nvme_single_map err %d\n", error);
 	*bus_addr = seg[0].ds_addr;
 }
 
 static __inline struct nvme_request *
 _nvme_allocate_request(nvme_cb_fn_t cb_fn, void *cb_arg)
 {
 	struct nvme_request *req;
 
 	req = uma_zalloc(nvme_request_zone, M_NOWAIT | M_ZERO);
 	if (req != NULL) {
 		req->cb_fn = cb_fn;
 		req->cb_arg = cb_arg;
 		req->timeout = TRUE;
 	}
 	return (req);
 }
 
 static __inline struct nvme_request *
 nvme_allocate_request_vaddr(void *payload, uint32_t payload_size,
     nvme_cb_fn_t cb_fn, void *cb_arg)
 {
 	struct nvme_request *req;
 
 	req = _nvme_allocate_request(cb_fn, cb_arg);
 	if (req != NULL) {
 		req->type = NVME_REQUEST_VADDR;
 		req->u.payload = payload;
 		req->payload_size = payload_size;
 	}
 	return (req);
 }
 
 static __inline struct nvme_request *
 nvme_allocate_request_null(nvme_cb_fn_t cb_fn, void *cb_arg)
 {
 	struct nvme_request *req;
 
 	req = _nvme_allocate_request(cb_fn, cb_arg);
 	if (req != NULL)
 		req->type = NVME_REQUEST_NULL;
 	return (req);
 }
 
 static __inline struct nvme_request *
 nvme_allocate_request_bio(struct bio *bio, nvme_cb_fn_t cb_fn, void *cb_arg)
 {
 	struct nvme_request *req;
 
 	req = _nvme_allocate_request(cb_fn, cb_arg);
 	if (req != NULL) {
 #ifdef NVME_UNMAPPED_BIO_SUPPORT
 		req->type = NVME_REQUEST_BIO;
 		req->u.bio = bio;
 #else
 		req->type = NVME_REQUEST_VADDR;
 		req->u.payload = bio->bio_data;
 		req->payload_size = bio->bio_bcount;
 #endif
 	}
 	return (req);
 }
 
 #define nvme_free_request(req)	uma_zfree(nvme_request_zone, req)
 
 void	nvme_notify_async_consumers(struct nvme_controller *ctrlr,
 				    const struct nvme_completion *async_cpl,
 				    uint32_t log_page_id, void *log_page_buffer,
 				    uint32_t log_page_size);
 void	nvme_notify_fail_consumers(struct nvme_controller *ctrlr);
 void	nvme_notify_new_controller(struct nvme_controller *ctrlr);
 
 #endif /* __NVME_PRIVATE_H__ */
Index: projects/release-pkg/sys/dev/nvme/nvme_qpair.c
===================================================================
--- projects/release-pkg/sys/dev/nvme/nvme_qpair.c	(revision 293335)
+++ projects/release-pkg/sys/dev/nvme/nvme_qpair.c	(revision 293336)
@@ -1,1002 +1,1003 @@
 /*-
  * Copyright (C) 2012-2014 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/bus.h>
 
 #include <dev/pci/pcivar.h>
 
 #include "nvme_private.h"
 
 static void	_nvme_qpair_submit_request(struct nvme_qpair *qpair,
 					   struct nvme_request *req);
 
 struct nvme_opcode_string {
 
 	uint16_t	opc;
 	const char *	str;
 };
 
 static struct nvme_opcode_string admin_opcode[] = {
 	{ NVME_OPC_DELETE_IO_SQ, "DELETE IO SQ" },
 	{ NVME_OPC_CREATE_IO_SQ, "CREATE IO SQ" },
 	{ NVME_OPC_GET_LOG_PAGE, "GET LOG PAGE" },
 	{ NVME_OPC_DELETE_IO_CQ, "DELETE IO CQ" },
 	{ NVME_OPC_CREATE_IO_CQ, "CREATE IO CQ" },
 	{ NVME_OPC_IDENTIFY, "IDENTIFY" },
 	{ NVME_OPC_ABORT, "ABORT" },
 	{ NVME_OPC_SET_FEATURES, "SET FEATURES" },
 	{ NVME_OPC_GET_FEATURES, "GET FEATURES" },
 	{ NVME_OPC_ASYNC_EVENT_REQUEST, "ASYNC EVENT REQUEST" },
 	{ NVME_OPC_FIRMWARE_ACTIVATE, "FIRMWARE ACTIVATE" },
 	{ NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD, "FIRMWARE IMAGE DOWNLOAD" },
 	{ NVME_OPC_FORMAT_NVM, "FORMAT NVM" },
 	{ NVME_OPC_SECURITY_SEND, "SECURITY SEND" },
 	{ NVME_OPC_SECURITY_RECEIVE, "SECURITY RECEIVE" },
 	{ 0xFFFF, "ADMIN COMMAND" }
 };
 
 static struct nvme_opcode_string io_opcode[] = {
 	{ NVME_OPC_FLUSH, "FLUSH" },
 	{ NVME_OPC_WRITE, "WRITE" },
 	{ NVME_OPC_READ, "READ" },
 	{ NVME_OPC_WRITE_UNCORRECTABLE, "WRITE UNCORRECTABLE" },
 	{ NVME_OPC_COMPARE, "COMPARE" },
 	{ NVME_OPC_DATASET_MANAGEMENT, "DATASET MANAGEMENT" },
 	{ 0xFFFF, "IO COMMAND" }
 };
 
 static const char *
 get_admin_opcode_string(uint16_t opc)
 {
 	struct nvme_opcode_string *entry;
 
 	entry = admin_opcode;
 
 	while (entry->opc != 0xFFFF) {
 		if (entry->opc == opc)
 			return (entry->str);
 		entry++;
 	}
 	return (entry->str);
 }
 
 static const char *
 get_io_opcode_string(uint16_t opc)
 {
 	struct nvme_opcode_string *entry;
 
 	entry = io_opcode;
 
 	while (entry->opc != 0xFFFF) {
 		if (entry->opc == opc)
 			return (entry->str);
 		entry++;
 	}
 	return (entry->str);
 }
 
 
 static void
 nvme_admin_qpair_print_command(struct nvme_qpair *qpair,
     struct nvme_command *cmd)
 {
 
 	nvme_printf(qpair->ctrlr, "%s (%02x) sqid:%d cid:%d nsid:%x "
 	    "cdw10:%08x cdw11:%08x\n",
 	    get_admin_opcode_string(cmd->opc), cmd->opc, qpair->id, cmd->cid,
 	    cmd->nsid, cmd->cdw10, cmd->cdw11);
 }
 
 static void
 nvme_io_qpair_print_command(struct nvme_qpair *qpair,
     struct nvme_command *cmd)
 {
 
 	switch (cmd->opc) {
 	case NVME_OPC_WRITE:
 	case NVME_OPC_READ:
 	case NVME_OPC_WRITE_UNCORRECTABLE:
 	case NVME_OPC_COMPARE:
 		nvme_printf(qpair->ctrlr, "%s sqid:%d cid:%d nsid:%d "
 		    "lba:%llu len:%d\n",
 		    get_io_opcode_string(cmd->opc), qpair->id, cmd->cid,
 		    cmd->nsid,
 		    ((unsigned long long)cmd->cdw11 << 32) + cmd->cdw10,
 		    (cmd->cdw12 & 0xFFFF) + 1);
 		break;
 	case NVME_OPC_FLUSH:
 	case NVME_OPC_DATASET_MANAGEMENT:
 		nvme_printf(qpair->ctrlr, "%s sqid:%d cid:%d nsid:%d\n",
 		    get_io_opcode_string(cmd->opc), qpair->id, cmd->cid,
 		    cmd->nsid);
 		break;
 	default:
 		nvme_printf(qpair->ctrlr, "%s (%02x) sqid:%d cid:%d nsid:%d\n",
 		    get_io_opcode_string(cmd->opc), cmd->opc, qpair->id,
 		    cmd->cid, cmd->nsid);
 		break;
 	}
 }
 
 static void
 nvme_qpair_print_command(struct nvme_qpair *qpair, struct nvme_command *cmd)
 {
 	if (qpair->id == 0)
 		nvme_admin_qpair_print_command(qpair, cmd);
 	else
 		nvme_io_qpair_print_command(qpair, cmd);
 }
 
 struct nvme_status_string {
 
 	uint16_t	sc;
 	const char *	str;
 };
 
 static struct nvme_status_string generic_status[] = {
 	{ NVME_SC_SUCCESS, "SUCCESS" },
 	{ NVME_SC_INVALID_OPCODE, "INVALID OPCODE" },
 	{ NVME_SC_INVALID_FIELD, "INVALID_FIELD" },
 	{ NVME_SC_COMMAND_ID_CONFLICT, "COMMAND ID CONFLICT" },
 	{ NVME_SC_DATA_TRANSFER_ERROR, "DATA TRANSFER ERROR" },
 	{ NVME_SC_ABORTED_POWER_LOSS, "ABORTED - POWER LOSS" },
 	{ NVME_SC_INTERNAL_DEVICE_ERROR, "INTERNAL DEVICE ERROR" },
 	{ NVME_SC_ABORTED_BY_REQUEST, "ABORTED - BY REQUEST" },
 	{ NVME_SC_ABORTED_SQ_DELETION, "ABORTED - SQ DELETION" },
 	{ NVME_SC_ABORTED_FAILED_FUSED, "ABORTED - FAILED FUSED" },
 	{ NVME_SC_ABORTED_MISSING_FUSED, "ABORTED - MISSING FUSED" },
 	{ NVME_SC_INVALID_NAMESPACE_OR_FORMAT, "INVALID NAMESPACE OR FORMAT" },
 	{ NVME_SC_COMMAND_SEQUENCE_ERROR, "COMMAND SEQUENCE ERROR" },
 	{ NVME_SC_LBA_OUT_OF_RANGE, "LBA OUT OF RANGE" },
 	{ NVME_SC_CAPACITY_EXCEEDED, "CAPACITY EXCEEDED" },
 	{ NVME_SC_NAMESPACE_NOT_READY, "NAMESPACE NOT READY" },
 	{ 0xFFFF, "GENERIC" }
 };
 
 static struct nvme_status_string command_specific_status[] = {
 	{ NVME_SC_COMPLETION_QUEUE_INVALID, "INVALID COMPLETION QUEUE" },
 	{ NVME_SC_INVALID_QUEUE_IDENTIFIER, "INVALID QUEUE IDENTIFIER" },
 	{ NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED, "MAX QUEUE SIZE EXCEEDED" },
 	{ NVME_SC_ABORT_COMMAND_LIMIT_EXCEEDED, "ABORT CMD LIMIT EXCEEDED" },
 	{ NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED, "ASYNC LIMIT EXCEEDED" },
 	{ NVME_SC_INVALID_FIRMWARE_SLOT, "INVALID FIRMWARE SLOT" },
 	{ NVME_SC_INVALID_FIRMWARE_IMAGE, "INVALID FIRMWARE IMAGE" },
 	{ NVME_SC_INVALID_INTERRUPT_VECTOR, "INVALID INTERRUPT VECTOR" },
 	{ NVME_SC_INVALID_LOG_PAGE, "INVALID LOG PAGE" },
 	{ NVME_SC_INVALID_FORMAT, "INVALID FORMAT" },
 	{ NVME_SC_FIRMWARE_REQUIRES_RESET, "FIRMWARE REQUIRES RESET" },
 	{ NVME_SC_CONFLICTING_ATTRIBUTES, "CONFLICTING ATTRIBUTES" },
 	{ NVME_SC_INVALID_PROTECTION_INFO, "INVALID PROTECTION INFO" },
 	{ NVME_SC_ATTEMPTED_WRITE_TO_RO_PAGE, "WRITE TO RO PAGE" },
 	{ 0xFFFF, "COMMAND SPECIFIC" }
 };
 
 static struct nvme_status_string media_error_status[] = {
 	{ NVME_SC_WRITE_FAULTS, "WRITE FAULTS" },
 	{ NVME_SC_UNRECOVERED_READ_ERROR, "UNRECOVERED READ ERROR" },
 	{ NVME_SC_GUARD_CHECK_ERROR, "GUARD CHECK ERROR" },
 	{ NVME_SC_APPLICATION_TAG_CHECK_ERROR, "APPLICATION TAG CHECK ERROR" },
 	{ NVME_SC_REFERENCE_TAG_CHECK_ERROR, "REFERENCE TAG CHECK ERROR" },
 	{ NVME_SC_COMPARE_FAILURE, "COMPARE FAILURE" },
 	{ NVME_SC_ACCESS_DENIED, "ACCESS DENIED" },
 	{ 0xFFFF, "MEDIA ERROR" }
 };
 
 static const char *
 get_status_string(uint16_t sct, uint16_t sc)
 {
 	struct nvme_status_string *entry;
 
 	switch (sct) {
 	case NVME_SCT_GENERIC:
 		entry = generic_status;
 		break;
 	case NVME_SCT_COMMAND_SPECIFIC:
 		entry = command_specific_status;
 		break;
 	case NVME_SCT_MEDIA_ERROR:
 		entry = media_error_status;
 		break;
 	case NVME_SCT_VENDOR_SPECIFIC:
 		return ("VENDOR SPECIFIC");
 	default:
 		return ("RESERVED");
 	}
 
 	while (entry->sc != 0xFFFF) {
 		if (entry->sc == sc)
 			return (entry->str);
 		entry++;
 	}
 	return (entry->str);
 }
 
 static void
 nvme_qpair_print_completion(struct nvme_qpair *qpair, 
     struct nvme_completion *cpl)
 {
 	nvme_printf(qpair->ctrlr, "%s (%02x/%02x) sqid:%d cid:%d cdw0:%x\n",
 	    get_status_string(cpl->status.sct, cpl->status.sc),
 	    cpl->status.sct, cpl->status.sc, cpl->sqid, cpl->cid, cpl->cdw0);
 }
 
 static boolean_t
 nvme_completion_is_retry(const struct nvme_completion *cpl)
 {
 	/*
 	 * TODO: spec is not clear how commands that are aborted due
 	 *  to TLER will be marked.  So for now, it seems
 	 *  NAMESPACE_NOT_READY is the only case where we should
 	 *  look at the DNR bit.
 	 */
 	switch (cpl->status.sct) {
 	case NVME_SCT_GENERIC:
 		switch (cpl->status.sc) {
 		case NVME_SC_ABORTED_BY_REQUEST:
 		case NVME_SC_NAMESPACE_NOT_READY:
 			if (cpl->status.dnr)
 				return (0);
 			else
 				return (1);
 		case NVME_SC_INVALID_OPCODE:
 		case NVME_SC_INVALID_FIELD:
 		case NVME_SC_COMMAND_ID_CONFLICT:
 		case NVME_SC_DATA_TRANSFER_ERROR:
 		case NVME_SC_ABORTED_POWER_LOSS:
 		case NVME_SC_INTERNAL_DEVICE_ERROR:
 		case NVME_SC_ABORTED_SQ_DELETION:
 		case NVME_SC_ABORTED_FAILED_FUSED:
 		case NVME_SC_ABORTED_MISSING_FUSED:
 		case NVME_SC_INVALID_NAMESPACE_OR_FORMAT:
 		case NVME_SC_COMMAND_SEQUENCE_ERROR:
 		case NVME_SC_LBA_OUT_OF_RANGE:
 		case NVME_SC_CAPACITY_EXCEEDED:
 		default:
 			return (0);
 		}
 	case NVME_SCT_COMMAND_SPECIFIC:
 	case NVME_SCT_MEDIA_ERROR:
 	case NVME_SCT_VENDOR_SPECIFIC:
 	default:
 		return (0);
 	}
 }
 
 static void
 nvme_qpair_construct_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr,
     uint16_t cid)
 {
 
 	bus_dmamap_create(qpair->dma_tag_payload, 0, &tr->payload_dma_map);
 	bus_dmamap_create(qpair->dma_tag, 0, &tr->prp_dma_map);
 
 	bus_dmamap_load(qpair->dma_tag, tr->prp_dma_map, tr->prp,
 	    sizeof(tr->prp), nvme_single_map, &tr->prp_bus_addr, 0);
 
 	callout_init(&tr->timer, 1);
 	tr->cid = cid;
 	tr->qpair = qpair;
 }
 
 static void
 nvme_qpair_complete_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr,
     struct nvme_completion *cpl, boolean_t print_on_error)
 {
 	struct nvme_request	*req;
 	boolean_t		retry, error;
 
 	req = tr->req;
 	error = nvme_completion_is_error(cpl);
 	retry = error && nvme_completion_is_retry(cpl) &&
 	   req->retries < nvme_retry_count;
 
 	if (error && print_on_error) {
 		nvme_qpair_print_command(qpair, &req->cmd);
 		nvme_qpair_print_completion(qpair, cpl);
 	}
 
 	qpair->act_tr[cpl->cid] = NULL;
 
 	KASSERT(cpl->cid == req->cmd.cid, ("cpl cid does not match cmd cid\n"));
 
 	if (req->cb_fn && !retry)
 		req->cb_fn(req->cb_arg, cpl);
 
 	mtx_lock(&qpair->lock);
 	callout_stop(&tr->timer);
 
 	if (retry) {
 		req->retries++;
 		nvme_qpair_submit_tracker(qpair, tr);
 	} else {
 		if (req->type != NVME_REQUEST_NULL)
 			bus_dmamap_unload(qpair->dma_tag_payload,
 			    tr->payload_dma_map);
 
 		nvme_free_request(req);
 		tr->req = NULL;
 
 		TAILQ_REMOVE(&qpair->outstanding_tr, tr, tailq);
 		TAILQ_INSERT_HEAD(&qpair->free_tr, tr, tailq);
 
 		/*
 		 * If the controller is in the middle of resetting, don't
 		 *  try to submit queued requests here - let the reset logic
 		 *  handle that instead.
 		 */
 		if (!STAILQ_EMPTY(&qpair->queued_req) &&
 		    !qpair->ctrlr->is_resetting) {
 			req = STAILQ_FIRST(&qpair->queued_req);
 			STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
 			_nvme_qpair_submit_request(qpair, req);
 		}
 	}
 
 	mtx_unlock(&qpair->lock);
 }
 
 static void
 nvme_qpair_manual_complete_tracker(struct nvme_qpair *qpair,
     struct nvme_tracker *tr, uint32_t sct, uint32_t sc, uint32_t dnr,
     boolean_t print_on_error)
 {
 	struct nvme_completion	cpl;
 
 	memset(&cpl, 0, sizeof(cpl));
 	cpl.sqid = qpair->id;
 	cpl.cid = tr->cid;
 	cpl.status.sct = sct;
 	cpl.status.sc = sc;
 	cpl.status.dnr = dnr;
 	nvme_qpair_complete_tracker(qpair, tr, &cpl, print_on_error);
 }
 
 void
 nvme_qpair_manual_complete_request(struct nvme_qpair *qpair,
     struct nvme_request *req, uint32_t sct, uint32_t sc,
     boolean_t print_on_error)
 {
 	struct nvme_completion	cpl;
 	boolean_t		error;
 
 	memset(&cpl, 0, sizeof(cpl));
 	cpl.sqid = qpair->id;
 	cpl.status.sct = sct;
 	cpl.status.sc = sc;
 
 	error = nvme_completion_is_error(&cpl);
 
 	if (error && print_on_error) {
 		nvme_qpair_print_command(qpair, &req->cmd);
 		nvme_qpair_print_completion(qpair, &cpl);
 	}
 
 	if (req->cb_fn)
 		req->cb_fn(req->cb_arg, &cpl);
 
 	nvme_free_request(req);
 }
 
 void
 nvme_qpair_process_completions(struct nvme_qpair *qpair)
 {
 	struct nvme_tracker	*tr;
 	struct nvme_completion	*cpl;
 
 	qpair->num_intr_handler_calls++;
 
 	if (!qpair->is_enabled)
 		/*
 		 * qpair is not enabled, likely because a controller reset is
 		 *  is in progress.  Ignore the interrupt - any I/O that was
 		 *  associated with this interrupt will get retried when the
 		 *  reset is complete.
 		 */
 		return;
 
 	while (1) {
 		cpl = &qpair->cpl[qpair->cq_head];
 
 		if (cpl->status.p != qpair->phase)
 			break;
 
 		tr = qpair->act_tr[cpl->cid];
 
 		if (tr != NULL) {
 			nvme_qpair_complete_tracker(qpair, tr, cpl, TRUE);
 			qpair->sq_head = cpl->sqhd;
 		} else {
 			nvme_printf(qpair->ctrlr, 
 			    "cpl does not map to outstanding cmd\n");
 			nvme_dump_completion(cpl);
 			KASSERT(0, ("received completion for unknown cmd\n"));
 		}
 
 		if (++qpair->cq_head == qpair->num_entries) {
 			qpair->cq_head = 0;
 			qpair->phase = !qpair->phase;
 		}
 
 		nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].cq_hdbl,
 		    qpair->cq_head);
 	}
 }
 
 static void
 nvme_qpair_msix_handler(void *arg)
 {
 	struct nvme_qpair *qpair = arg;
 
 	nvme_qpair_process_completions(qpair);
 }
 
 void
 nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id,
     uint16_t vector, uint32_t num_entries, uint32_t num_trackers,
     struct nvme_controller *ctrlr)
 {
 	struct nvme_tracker	*tr;
 	uint32_t		i;
 	int			err;
 
 	qpair->id = id;
 	qpair->vector = vector;
 	qpair->num_entries = num_entries;
 	qpair->num_trackers = num_trackers;
 	qpair->ctrlr = ctrlr;
 
 	if (ctrlr->msix_enabled) {
 
 		/*
 		 * MSI-X vector resource IDs start at 1, so we add one to
 		 *  the queue's vector to get the corresponding rid to use.
 		 */
 		qpair->rid = vector + 1;
-		qpair->res = ctrlr->msi_res[vector];
 
+		qpair->res = bus_alloc_resource_any(ctrlr->dev, SYS_RES_IRQ,
+		    &qpair->rid, RF_ACTIVE);
 		bus_setup_intr(ctrlr->dev, qpair->res,
 		    INTR_TYPE_MISC | INTR_MPSAFE, NULL,
 		    nvme_qpair_msix_handler, qpair, &qpair->tag);
 	}
 
 	mtx_init(&qpair->lock, "nvme qpair lock", NULL, MTX_DEF);
 
 	/* Note: NVMe PRP format is restricted to 4-byte alignment. */
 	err = bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev),
 	    4, PAGE_SIZE, BUS_SPACE_MAXADDR,
 	    BUS_SPACE_MAXADDR, NULL, NULL, NVME_MAX_XFER_SIZE,
 	    (NVME_MAX_XFER_SIZE/PAGE_SIZE)+1, PAGE_SIZE, 0,
 	    NULL, NULL, &qpair->dma_tag_payload);
 	if (err != 0)
 		nvme_printf(ctrlr, "payload tag create failed %d\n", err);
 
 	err = bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev),
 	    4, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
 	    BUS_SPACE_MAXSIZE, 1, BUS_SPACE_MAXSIZE, 0,
 	    NULL, NULL, &qpair->dma_tag);
 	if (err != 0)
 		nvme_printf(ctrlr, "tag create failed %d\n", err);
 
 	qpair->num_cmds = 0;
 	qpair->num_intr_handler_calls = 0;
 
 	qpair->cmd = contigmalloc(qpair->num_entries *
 	    sizeof(struct nvme_command), M_NVME, M_ZERO,
 	    0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
 	qpair->cpl = contigmalloc(qpair->num_entries *
 	    sizeof(struct nvme_completion), M_NVME, M_ZERO,
 	    0, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
 
 	err = bus_dmamap_create(qpair->dma_tag, 0, &qpair->cmd_dma_map);
 	if (err != 0)
 		nvme_printf(ctrlr, "cmd_dma_map create failed %d\n", err);
 
 	err = bus_dmamap_create(qpair->dma_tag, 0, &qpair->cpl_dma_map);
 	if (err != 0)
 		nvme_printf(ctrlr, "cpl_dma_map create failed %d\n", err);
 
 	bus_dmamap_load(qpair->dma_tag, qpair->cmd_dma_map,
 	    qpair->cmd, qpair->num_entries * sizeof(struct nvme_command),
 	    nvme_single_map, &qpair->cmd_bus_addr, 0);
 	bus_dmamap_load(qpair->dma_tag, qpair->cpl_dma_map,
 	    qpair->cpl, qpair->num_entries * sizeof(struct nvme_completion),
 	    nvme_single_map, &qpair->cpl_bus_addr, 0);
 
 	qpair->sq_tdbl_off = nvme_mmio_offsetof(doorbell[id].sq_tdbl);
 	qpair->cq_hdbl_off = nvme_mmio_offsetof(doorbell[id].cq_hdbl);
 
 	TAILQ_INIT(&qpair->free_tr);
 	TAILQ_INIT(&qpair->outstanding_tr);
 	STAILQ_INIT(&qpair->queued_req);
 
 	for (i = 0; i < qpair->num_trackers; i++) {
 		tr = malloc(sizeof(*tr), M_NVME, M_ZERO | M_WAITOK);
 		nvme_qpair_construct_tracker(qpair, tr, i);
 		TAILQ_INSERT_HEAD(&qpair->free_tr, tr, tailq);
 	}
 
 	qpair->act_tr = malloc(sizeof(struct nvme_tracker *) * qpair->num_entries,
 	    M_NVME, M_ZERO | M_WAITOK);
 }
 
 static void
 nvme_qpair_destroy(struct nvme_qpair *qpair)
 {
 	struct nvme_tracker	*tr;
 
 	if (qpair->tag)
 		bus_teardown_intr(qpair->ctrlr->dev, qpair->res, qpair->tag);
 
 	if (qpair->res)
 		bus_release_resource(qpair->ctrlr->dev, SYS_RES_IRQ,
 		    rman_get_rid(qpair->res), qpair->res);
 
 	if (qpair->cmd) {
 		bus_dmamap_unload(qpair->dma_tag, qpair->cmd_dma_map);
 		bus_dmamap_destroy(qpair->dma_tag, qpair->cmd_dma_map);
 		contigfree(qpair->cmd,
 		    qpair->num_entries * sizeof(struct nvme_command), M_NVME);
 	}
 
 	if (qpair->cpl) {
 		bus_dmamap_unload(qpair->dma_tag, qpair->cpl_dma_map);
 		bus_dmamap_destroy(qpair->dma_tag, qpair->cpl_dma_map);
 		contigfree(qpair->cpl,
 		    qpair->num_entries * sizeof(struct nvme_completion),
 		    M_NVME);
 	}
 
 	if (qpair->dma_tag)
 		bus_dma_tag_destroy(qpair->dma_tag);
 
 	if (qpair->dma_tag_payload)
 		bus_dma_tag_destroy(qpair->dma_tag_payload);
 
 	if (qpair->act_tr)
 		free(qpair->act_tr, M_NVME);
 
 	while (!TAILQ_EMPTY(&qpair->free_tr)) {
 		tr = TAILQ_FIRST(&qpair->free_tr);
 		TAILQ_REMOVE(&qpair->free_tr, tr, tailq);
 		bus_dmamap_destroy(qpair->dma_tag, tr->payload_dma_map);
 		bus_dmamap_destroy(qpair->dma_tag, tr->prp_dma_map);
 		free(tr, M_NVME);
 	}
 }
 
 static void
 nvme_admin_qpair_abort_aers(struct nvme_qpair *qpair)
 {
 	struct nvme_tracker	*tr;
 
 	tr = TAILQ_FIRST(&qpair->outstanding_tr);
 	while (tr != NULL) {
 		if (tr->req->cmd.opc == NVME_OPC_ASYNC_EVENT_REQUEST) {
 			nvme_qpair_manual_complete_tracker(qpair, tr,
 			    NVME_SCT_GENERIC, NVME_SC_ABORTED_SQ_DELETION, 0,
 			    FALSE);
 			tr = TAILQ_FIRST(&qpair->outstanding_tr);
 		} else {
 			tr = TAILQ_NEXT(tr, tailq);
 		}
 	}
 }
 
 void
 nvme_admin_qpair_destroy(struct nvme_qpair *qpair)
 {
 
 	nvme_admin_qpair_abort_aers(qpair);
 	nvme_qpair_destroy(qpair);
 }
 
 void
 nvme_io_qpair_destroy(struct nvme_qpair *qpair)
 {
 
 	nvme_qpair_destroy(qpair);
 }
 
 static void
 nvme_abort_complete(void *arg, const struct nvme_completion *status)
 {
 	struct nvme_tracker	*tr = arg;
 
 	/*
 	 * If cdw0 == 1, the controller was not able to abort the command
 	 *  we requested.  We still need to check the active tracker array,
 	 *  to cover race where I/O timed out at same time controller was
 	 *  completing the I/O.
 	 */
 	if (status->cdw0 == 1 && tr->qpair->act_tr[tr->cid] != NULL) {
 		/*
 		 * An I/O has timed out, and the controller was unable to
 		 *  abort it for some reason.  Construct a fake completion
 		 *  status, and then complete the I/O's tracker manually.
 		 */
 		nvme_printf(tr->qpair->ctrlr,
 		    "abort command failed, aborting command manually\n");
 		nvme_qpair_manual_complete_tracker(tr->qpair, tr,
 		    NVME_SCT_GENERIC, NVME_SC_ABORTED_BY_REQUEST, 0, TRUE);
 	}
 }
 
 static void
 nvme_timeout(void *arg)
 {
 	struct nvme_tracker	*tr = arg;
 	struct nvme_qpair	*qpair = tr->qpair;
 	struct nvme_controller	*ctrlr = qpair->ctrlr;
 	union csts_register	csts;
 
 	/* Read csts to get value of cfs - controller fatal status. */
 	csts.raw = nvme_mmio_read_4(ctrlr, csts);
 
 	if (ctrlr->enable_aborts && csts.bits.cfs == 0) {
 		/*
 		 * If aborts are enabled, only use them if the controller is
 		 *  not reporting fatal status.
 		 */
 		nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id,
 		    nvme_abort_complete, tr);
 	} else
 		nvme_ctrlr_reset(ctrlr);
 }
 
 void
 nvme_qpair_submit_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr)
 {
 	struct nvme_request	*req;
 	struct nvme_controller	*ctrlr;
 
 	mtx_assert(&qpair->lock, MA_OWNED);
 
 	req = tr->req;
 	req->cmd.cid = tr->cid;
 	qpair->act_tr[tr->cid] = tr;
 	ctrlr = qpair->ctrlr;
 
 	if (req->timeout)
 #if __FreeBSD_version >= 800030
 		callout_reset_curcpu(&tr->timer, ctrlr->timeout_period * hz,
 		    nvme_timeout, tr);
 #else
 		callout_reset(&tr->timer, ctrlr->timeout_period * hz,
 		    nvme_timeout, tr);
 #endif
 
 	/* Copy the command from the tracker to the submission queue. */
 	memcpy(&qpair->cmd[qpair->sq_tail], &req->cmd, sizeof(req->cmd));
 
 	if (++qpair->sq_tail == qpair->num_entries)
 		qpair->sq_tail = 0;
 
 	wmb();
 	nvme_mmio_write_4(qpair->ctrlr, doorbell[qpair->id].sq_tdbl,
 	    qpair->sq_tail);
 
 	qpair->num_cmds++;
 }
 
 static void
 nvme_payload_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
 {
 	struct nvme_tracker 	*tr = arg;
 	uint32_t		cur_nseg;
 
 	/*
 	 * If the mapping operation failed, return immediately.  The caller
 	 *  is responsible for detecting the error status and failing the
 	 *  tracker manually.
 	 */
 	if (error != 0) {
 		nvme_printf(tr->qpair->ctrlr,
 		    "nvme_payload_map err %d\n", error);
 		return;
 	}
 
 	/*
 	 * Note that we specified PAGE_SIZE for alignment and max
 	 *  segment size when creating the bus dma tags.  So here
 	 *  we can safely just transfer each segment to its
 	 *  associated PRP entry.
 	 */
 	tr->req->cmd.prp1 = seg[0].ds_addr;
 
 	if (nseg == 2) {
 		tr->req->cmd.prp2 = seg[1].ds_addr;
 	} else if (nseg > 2) {
 		cur_nseg = 1;
 		tr->req->cmd.prp2 = (uint64_t)tr->prp_bus_addr;
 		while (cur_nseg < nseg) {
 			tr->prp[cur_nseg-1] =
 			    (uint64_t)seg[cur_nseg].ds_addr;
 			cur_nseg++;
 		}
 	} else {
 		/*
 		 * prp2 should not be used by the controller
 		 *  since there is only one segment, but set
 		 *  to 0 just to be safe.
 		 */
 		tr->req->cmd.prp2 = 0;
 	}
 
 	nvme_qpair_submit_tracker(tr->qpair, tr);
 }
 
 static void
 _nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req)
 {
 	struct nvme_tracker	*tr;
 	int			err = 0;
 
 	mtx_assert(&qpair->lock, MA_OWNED);
 
 	tr = TAILQ_FIRST(&qpair->free_tr);
 	req->qpair = qpair;
 
 	if (tr == NULL || !qpair->is_enabled) {
 		/*
 		 * No tracker is available, or the qpair is disabled due to
 		 *  an in-progress controller-level reset or controller
 		 *  failure.
 		 */
 
 		if (qpair->ctrlr->is_failed) {
 			/*
 			 * The controller has failed.  Post the request to a
 			 *  task where it will be aborted, so that we do not
 			 *  invoke the request's callback in the context
 			 *  of the submission.
 			 */
 			nvme_ctrlr_post_failed_request(qpair->ctrlr, req);
 		} else {
 			/*
 			 * Put the request on the qpair's request queue to be
 			 *  processed when a tracker frees up via a command
 			 *  completion or when the controller reset is
 			 *  completed.
 			 */
 			STAILQ_INSERT_TAIL(&qpair->queued_req, req, stailq);
 		}
 		return;
 	}
 
 	TAILQ_REMOVE(&qpair->free_tr, tr, tailq);
 	TAILQ_INSERT_TAIL(&qpair->outstanding_tr, tr, tailq);
 	tr->req = req;
 
 	switch (req->type) {
 	case NVME_REQUEST_VADDR:
 		KASSERT(req->payload_size <= qpair->ctrlr->max_xfer_size,
 		    ("payload_size (%d) exceeds max_xfer_size (%d)\n",
 		    req->payload_size, qpair->ctrlr->max_xfer_size));
 		err = bus_dmamap_load(tr->qpair->dma_tag_payload,
 		    tr->payload_dma_map, req->u.payload, req->payload_size,
 		    nvme_payload_map, tr, 0);
 		if (err != 0)
 			nvme_printf(qpair->ctrlr,
 			    "bus_dmamap_load returned 0x%x!\n", err);
 		break;
 	case NVME_REQUEST_NULL:
 		nvme_qpair_submit_tracker(tr->qpair, tr);
 		break;
 #ifdef NVME_UNMAPPED_BIO_SUPPORT
 	case NVME_REQUEST_BIO:
 		KASSERT(req->u.bio->bio_bcount <= qpair->ctrlr->max_xfer_size,
 		    ("bio->bio_bcount (%jd) exceeds max_xfer_size (%d)\n",
 		    (intmax_t)req->u.bio->bio_bcount,
 		    qpair->ctrlr->max_xfer_size));
 		err = bus_dmamap_load_bio(tr->qpair->dma_tag_payload,
 		    tr->payload_dma_map, req->u.bio, nvme_payload_map, tr, 0);
 		if (err != 0)
 			nvme_printf(qpair->ctrlr,
 			    "bus_dmamap_load_bio returned 0x%x!\n", err);
 		break;
 #endif
 	default:
 		panic("unknown nvme request type 0x%x\n", req->type);
 		break;
 	}
 
 	if (err != 0) {
 		/*
 		 * The dmamap operation failed, so we manually fail the
 		 *  tracker here with DATA_TRANSFER_ERROR status.
 		 *
 		 * nvme_qpair_manual_complete_tracker must not be called
 		 *  with the qpair lock held.
 		 */
 		mtx_unlock(&qpair->lock);
 		nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC,
 		    NVME_SC_DATA_TRANSFER_ERROR, 1 /* do not retry */, TRUE);
 		mtx_lock(&qpair->lock);
 	}
 }
 
 void
 nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req)
 {
 
 	mtx_lock(&qpair->lock);
 	_nvme_qpair_submit_request(qpair, req);
 	mtx_unlock(&qpair->lock);
 }
 
 static void
 nvme_qpair_enable(struct nvme_qpair *qpair)
 {
 
 	qpair->is_enabled = TRUE;
 }
 
 void
 nvme_qpair_reset(struct nvme_qpair *qpair)
 {
 
 	qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0;
 
 	/*
 	 * First time through the completion queue, HW will set phase
 	 *  bit on completions to 1.  So set this to 1 here, indicating
 	 *  we're looking for a 1 to know which entries have completed.
 	 *  we'll toggle the bit each time when the completion queue
 	 *  rolls over.
 	 */
 	qpair->phase = 1;
 
 	memset(qpair->cmd, 0,
 	    qpair->num_entries * sizeof(struct nvme_command));
 	memset(qpair->cpl, 0,
 	    qpair->num_entries * sizeof(struct nvme_completion));
 }
 
 void
 nvme_admin_qpair_enable(struct nvme_qpair *qpair)
 {
 	struct nvme_tracker		*tr;
 	struct nvme_tracker		*tr_temp;
 
 	/*
 	 * Manually abort each outstanding admin command.  Do not retry
 	 *  admin commands found here, since they will be left over from
 	 *  a controller reset and its likely the context in which the
 	 *  command was issued no longer applies.
 	 */
 	TAILQ_FOREACH_SAFE(tr, &qpair->outstanding_tr, tailq, tr_temp) {
 		nvme_printf(qpair->ctrlr,
 		    "aborting outstanding admin command\n");
 		nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC,
 		    NVME_SC_ABORTED_BY_REQUEST, 1 /* do not retry */, TRUE);
 	}
 
 	nvme_qpair_enable(qpair);
 }
 
 void
 nvme_io_qpair_enable(struct nvme_qpair *qpair)
 {
 	STAILQ_HEAD(, nvme_request)	temp;
 	struct nvme_tracker		*tr;
 	struct nvme_tracker		*tr_temp;
 	struct nvme_request		*req;
 
 	/*
 	 * Manually abort each outstanding I/O.  This normally results in a
 	 *  retry, unless the retry count on the associated request has
 	 *  reached its limit.
 	 */
 	TAILQ_FOREACH_SAFE(tr, &qpair->outstanding_tr, tailq, tr_temp) {
 		nvme_printf(qpair->ctrlr, "aborting outstanding i/o\n");
 		nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC,
 		    NVME_SC_ABORTED_BY_REQUEST, 0, TRUE);
 	}
 
 	mtx_lock(&qpair->lock);
 
 	nvme_qpair_enable(qpair);
 
 	STAILQ_INIT(&temp);
 	STAILQ_SWAP(&qpair->queued_req, &temp, nvme_request);
 
 	while (!STAILQ_EMPTY(&temp)) {
 		req = STAILQ_FIRST(&temp);
 		STAILQ_REMOVE_HEAD(&temp, stailq);
 		nvme_printf(qpair->ctrlr, "resubmitting queued i/o\n");
 		nvme_qpair_print_command(qpair, &req->cmd);
 		_nvme_qpair_submit_request(qpair, req);
 	}
 
 	mtx_unlock(&qpair->lock);
 }
 
 static void
 nvme_qpair_disable(struct nvme_qpair *qpair)
 {
 	struct nvme_tracker *tr;
 
 	qpair->is_enabled = FALSE;
 	mtx_lock(&qpair->lock);
 	TAILQ_FOREACH(tr, &qpair->outstanding_tr, tailq)
 		callout_stop(&tr->timer);
 	mtx_unlock(&qpair->lock);
 }
 
 void
 nvme_admin_qpair_disable(struct nvme_qpair *qpair)
 {
 
 	nvme_qpair_disable(qpair);
 	nvme_admin_qpair_abort_aers(qpair);
 }
 
 void
 nvme_io_qpair_disable(struct nvme_qpair *qpair)
 {
 
 	nvme_qpair_disable(qpair);
 }
 
 void
 nvme_qpair_fail(struct nvme_qpair *qpair)
 {
 	struct nvme_tracker		*tr;
 	struct nvme_request		*req;
 
 	mtx_lock(&qpair->lock);
 
 	while (!STAILQ_EMPTY(&qpair->queued_req)) {
 		req = STAILQ_FIRST(&qpair->queued_req);
 		STAILQ_REMOVE_HEAD(&qpair->queued_req, stailq);
 		nvme_printf(qpair->ctrlr, "failing queued i/o\n");
 		mtx_unlock(&qpair->lock);
 		nvme_qpair_manual_complete_request(qpair, req, NVME_SCT_GENERIC,
 		    NVME_SC_ABORTED_BY_REQUEST, TRUE);
 		mtx_lock(&qpair->lock);
 	}
 
 	/* Manually abort each outstanding I/O. */
 	while (!TAILQ_EMPTY(&qpair->outstanding_tr)) {
 		tr = TAILQ_FIRST(&qpair->outstanding_tr);
 		/*
 		 * Do not remove the tracker.  The abort_tracker path will
 		 *  do that for us.
 		 */
 		nvme_printf(qpair->ctrlr, "failing outstanding i/o\n");
 		mtx_unlock(&qpair->lock);
 		nvme_qpair_manual_complete_tracker(qpair, tr, NVME_SCT_GENERIC,
 		    NVME_SC_ABORTED_BY_REQUEST, 1 /* do not retry */, TRUE);
 		mtx_lock(&qpair->lock);
 	}
 
 	mtx_unlock(&qpair->lock);
 }
 
Index: projects/release-pkg/sys/geom/eli/g_eli.c
===================================================================
--- projects/release-pkg/sys/geom/eli/g_eli.c	(revision 293335)
+++ projects/release-pkg/sys/geom/eli/g_eli.c	(revision 293336)
@@ -1,1348 +1,1267 @@
 /*-
  * Copyright (c) 2005-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/cons.h>
 #include <sys/kernel.h>
 #include <sys/linker.h>
 #include <sys/module.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bio.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/eventhandler.h>
 #include <sys/kthread.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/uio.h>
 #include <sys/vnode.h>
 
 #include <vm/uma.h>
 
 #include <geom/geom.h>
 #include <geom/eli/g_eli.h>
 #include <geom/eli/pkcs5v2.h>
 
 FEATURE(geom_eli, "GEOM crypto module");
 
 MALLOC_DEFINE(M_ELI, "eli data", "GEOM_ELI Data");
 
 SYSCTL_DECL(_kern_geom);
 SYSCTL_NODE(_kern_geom, OID_AUTO, eli, CTLFLAG_RW, 0, "GEOM_ELI stuff");
 static int g_eli_version = G_ELI_VERSION;
 SYSCTL_INT(_kern_geom_eli, OID_AUTO, version, CTLFLAG_RD, &g_eli_version, 0,
     "GELI version");
 int g_eli_debug = 0;
 SYSCTL_INT(_kern_geom_eli, OID_AUTO, debug, CTLFLAG_RWTUN, &g_eli_debug, 0,
     "Debug level");
 static u_int g_eli_tries = 3;
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, tries, CTLFLAG_RWTUN, &g_eli_tries, 0,
     "Number of tries for entering the passphrase");
 static u_int g_eli_visible_passphrase = GETS_NOECHO;
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, visible_passphrase, CTLFLAG_RWTUN,
     &g_eli_visible_passphrase, 0,
     "Visibility of passphrase prompt (0 = invisible, 1 = visible, 2 = asterisk)");
 u_int g_eli_overwrites = G_ELI_OVERWRITES;
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, overwrites, CTLFLAG_RWTUN, &g_eli_overwrites,
     0, "Number of times on-disk keys should be overwritten when destroying them");
 static u_int g_eli_threads = 0;
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, threads, CTLFLAG_RWTUN, &g_eli_threads, 0,
     "Number of threads doing crypto work");
 u_int g_eli_batch = 0;
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, batch, CTLFLAG_RWTUN, &g_eli_batch, 0,
     "Use crypto operations batching");
 
 /*
  * Passphrase cached during boot, in order to be more user-friendly if
  * there are multiple providers using the same passphrase.
  */
 static char cached_passphrase[256];
 static u_int g_eli_boot_passcache = 1;
 TUNABLE_INT("kern.geom.eli.boot_passcache", &g_eli_boot_passcache);
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, boot_passcache, CTLFLAG_RD,
     &g_eli_boot_passcache, 0,
     "Passphrases are cached during boot process for possible reuse");
 static void
 fetch_loader_passphrase(void * dummy)
 {
 	char * env_passphrase;
 
 	KASSERT(dynamic_kenv, ("need dynamic kenv"));
 
 	if ((env_passphrase = kern_getenv("kern.geom.eli.passphrase")) != NULL) {
 		/* Extract passphrase from the environment. */
 		strlcpy(cached_passphrase, env_passphrase,
 		    sizeof(cached_passphrase));
 		freeenv(env_passphrase);
 
 		/* Wipe the passphrase from the environment. */
 		kern_unsetenv("kern.geom.eli.passphrase");
 	}
 }
 SYSINIT(geli_fetch_loader_passphrase, SI_SUB_KMEM + 1, SI_ORDER_ANY,
     fetch_loader_passphrase, NULL);
 static void
 zero_boot_passcache(void * dummy)
 {
 
 	memset(cached_passphrase, 0, sizeof(cached_passphrase));
 }
 EVENTHANDLER_DEFINE(mountroot, zero_boot_passcache, NULL, 0);
 
 static eventhandler_tag g_eli_pre_sync = NULL;
 
 static int g_eli_destroy_geom(struct gctl_req *req, struct g_class *mp,
     struct g_geom *gp);
 static void g_eli_init(struct g_class *mp);
 static void g_eli_fini(struct g_class *mp);
 
 static g_taste_t g_eli_taste;
 static g_dumpconf_t g_eli_dumpconf;
 
 struct g_class g_eli_class = {
 	.name = G_ELI_CLASS_NAME,
 	.version = G_VERSION,
 	.ctlreq = g_eli_config,
 	.taste = g_eli_taste,
 	.destroy_geom = g_eli_destroy_geom,
 	.init = g_eli_init,
 	.fini = g_eli_fini
 };
 
 
 /*
  * Code paths:
  * BIO_READ:
  *	g_eli_start -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
  * BIO_WRITE:
  *	g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
  */
 
 
 /*
  * EAGAIN from crypto(9) means, that we were probably balanced to another crypto
  * accelerator or something like this.
  * The function updates the SID and rerun the operation.
  */
 int
 g_eli_crypto_rerun(struct cryptop *crp)
 {
 	struct g_eli_softc *sc;
 	struct g_eli_worker *wr;
 	struct bio *bp;
 	int error;
 
 	bp = (struct bio *)crp->crp_opaque;
 	sc = bp->bio_to->geom->softc;
 	LIST_FOREACH(wr, &sc->sc_workers, w_next) {
 		if (wr->w_number == bp->bio_pflags)
 			break;
 	}
 	KASSERT(wr != NULL, ("Invalid worker (%u).", bp->bio_pflags));
 	G_ELI_DEBUG(1, "Rerunning crypto %s request (sid: %ju -> %ju).",
 	    bp->bio_cmd == BIO_READ ? "READ" : "WRITE", (uintmax_t)wr->w_sid,
 	    (uintmax_t)crp->crp_sid);
 	wr->w_sid = crp->crp_sid;
 	crp->crp_etype = 0;
 	error = crypto_dispatch(crp);
 	if (error == 0)
 		return (0);
 	G_ELI_DEBUG(1, "%s: crypto_dispatch() returned %d.", __func__, error);
 	crp->crp_etype = error;
 	return (error);
 }
 
 /*
  * The function is called afer reading encrypted data from the provider.
  *
  * g_eli_start -> g_eli_crypto_read -> g_io_request -> G_ELI_READ_DONE -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
  */
 void
 g_eli_read_done(struct bio *bp)
 {
 	struct g_eli_softc *sc;
 	struct bio *pbp;
 
 	G_ELI_LOGREQ(2, bp, "Request done.");
 	pbp = bp->bio_parent;
 	if (pbp->bio_error == 0 && bp->bio_error != 0)
 		pbp->bio_error = bp->bio_error;
 	g_destroy_bio(bp);
 	/*
 	 * Do we have all sectors already?
 	 */
 	pbp->bio_inbed++;
 	if (pbp->bio_inbed < pbp->bio_children)
 		return;
 	sc = pbp->bio_to->geom->softc;
 	if (pbp->bio_error != 0) {
 		G_ELI_LOGREQ(0, pbp, "%s() failed (error=%d)", __func__,
 		    pbp->bio_error);
 		pbp->bio_completed = 0;
 		if (pbp->bio_driver2 != NULL) {
 			free(pbp->bio_driver2, M_ELI);
 			pbp->bio_driver2 = NULL;
 		}
 		g_io_deliver(pbp, pbp->bio_error);
 		atomic_subtract_int(&sc->sc_inflight, 1);
 		return;
 	}
 	mtx_lock(&sc->sc_queue_mtx);
 	bioq_insert_tail(&sc->sc_queue, pbp);
 	mtx_unlock(&sc->sc_queue_mtx);
 	wakeup(sc);
 }
 
 /*
  * The function is called after we encrypt and write data.
  *
  * g_eli_start -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> G_ELI_WRITE_DONE -> g_io_deliver
  */
 void
 g_eli_write_done(struct bio *bp)
 {
 	struct g_eli_softc *sc;
 	struct bio *pbp;
 
 	G_ELI_LOGREQ(2, bp, "Request done.");
 	pbp = bp->bio_parent;
 	if (pbp->bio_error == 0 && bp->bio_error != 0)
 		pbp->bio_error = bp->bio_error;
 	g_destroy_bio(bp);
 	/*
 	 * Do we have all sectors already?
 	 */
 	pbp->bio_inbed++;
 	if (pbp->bio_inbed < pbp->bio_children)
 		return;
 	free(pbp->bio_driver2, M_ELI);
 	pbp->bio_driver2 = NULL;
 	if (pbp->bio_error != 0) {
 		G_ELI_LOGREQ(0, pbp, "%s() failed (error=%d)", __func__,
 		    pbp->bio_error);
 		pbp->bio_completed = 0;
 	} else
 		pbp->bio_completed = pbp->bio_length;
 
 	/*
 	 * Write is finished, send it up.
 	 */
 	sc = pbp->bio_to->geom->softc;
 	g_io_deliver(pbp, pbp->bio_error);
 	atomic_subtract_int(&sc->sc_inflight, 1);
 }
 
 /*
  * This function should never be called, but GEOM made as it set ->orphan()
  * method for every geom.
  */
 static void
 g_eli_orphan_spoil_assert(struct g_consumer *cp)
 {
 
 	panic("Function %s() called for %s.", __func__, cp->geom->name);
 }
 
 static void
 g_eli_orphan(struct g_consumer *cp)
 {
 	struct g_eli_softc *sc;
 
 	g_topology_assert();
 	sc = cp->geom->softc;
 	if (sc == NULL)
 		return;
 	g_eli_destroy(sc, TRUE);
 }
 
 /*
  * BIO_READ:
  *	G_ELI_START -> g_eli_crypto_read -> g_io_request -> g_eli_read_done -> g_eli_crypto_run -> g_eli_crypto_read_done -> g_io_deliver
  * BIO_WRITE:
  *	G_ELI_START -> g_eli_crypto_run -> g_eli_crypto_write_done -> g_io_request -> g_eli_write_done -> g_io_deliver
  */
 static void
 g_eli_start(struct bio *bp)
 {
 	struct g_eli_softc *sc;
 	struct g_consumer *cp;
 	struct bio *cbp;
 
 	sc = bp->bio_to->geom->softc;
 	KASSERT(sc != NULL,
 	    ("Provider's error should be set (error=%d)(device=%s).",
 	    bp->bio_to->error, bp->bio_to->name));
 	G_ELI_LOGREQ(2, bp, "Request received.");
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_GETATTR:
 	case BIO_FLUSH:
 		break;
 	case BIO_DELETE:
 		/*
 		 * If the user hasn't set the NODELETE flag, we just pass
 		 * it down the stack and let the layers beneath us do (or
 		 * not) whatever they do with it.  If they have, we
 		 * reject it.  A possible extension would be an
 		 * additional flag to take it as a hint to shred the data
 		 * with [multiple?] overwrites.
 		 */
 		if (!(sc->sc_flags & G_ELI_FLAG_NODELETE))
 			break;
 	default:
 		g_io_deliver(bp, EOPNOTSUPP);
 		return;
 	}
 	cbp = g_clone_bio(bp);
 	if (cbp == NULL) {
 		g_io_deliver(bp, ENOMEM);
 		return;
 	}
 	bp->bio_driver1 = cbp;
 	bp->bio_pflags = G_ELI_NEW_BIO;
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 		if (!(sc->sc_flags & G_ELI_FLAG_AUTH)) {
 			g_eli_crypto_read(sc, bp, 0);
 			break;
 		}
 		/* FALLTHROUGH */
 	case BIO_WRITE:
 		mtx_lock(&sc->sc_queue_mtx);
 		bioq_insert_tail(&sc->sc_queue, bp);
 		mtx_unlock(&sc->sc_queue_mtx);
 		wakeup(sc);
 		break;
 	case BIO_GETATTR:
 	case BIO_FLUSH:
 	case BIO_DELETE:
 		cbp->bio_done = g_std_done;
 		cp = LIST_FIRST(&sc->sc_geom->consumer);
 		cbp->bio_to = cp->provider;
 		G_ELI_LOGREQ(2, cbp, "Sending request.");
 		g_io_request(cbp, cp);
 		break;
 	}
 }
 
 static int
 g_eli_newsession(struct g_eli_worker *wr)
 {
 	struct g_eli_softc *sc;
 	struct cryptoini crie, cria;
 	int error;
 
 	sc = wr->w_softc;
 
 	bzero(&crie, sizeof(crie));
 	crie.cri_alg = sc->sc_ealgo;
 	crie.cri_klen = sc->sc_ekeylen;
 	if (sc->sc_ealgo == CRYPTO_AES_XTS)
 		crie.cri_klen <<= 1;
 	if ((sc->sc_flags & G_ELI_FLAG_FIRST_KEY) != 0) {
 		crie.cri_key = g_eli_key_hold(sc, 0,
 		    LIST_FIRST(&sc->sc_geom->consumer)->provider->sectorsize);
 	} else {
 		crie.cri_key = sc->sc_ekey;
 	}
 	if (sc->sc_flags & G_ELI_FLAG_AUTH) {
 		bzero(&cria, sizeof(cria));
 		cria.cri_alg = sc->sc_aalgo;
 		cria.cri_klen = sc->sc_akeylen;
 		cria.cri_key = sc->sc_akey;
 		crie.cri_next = &cria;
 	}
 
 	switch (sc->sc_crypto) {
 	case G_ELI_CRYPTO_SW:
 		error = crypto_newsession(&wr->w_sid, &crie,
 		    CRYPTOCAP_F_SOFTWARE);
 		break;
 	case G_ELI_CRYPTO_HW:
 		error = crypto_newsession(&wr->w_sid, &crie,
 		    CRYPTOCAP_F_HARDWARE);
 		break;
 	case G_ELI_CRYPTO_UNKNOWN:
 		error = crypto_newsession(&wr->w_sid, &crie,
 		    CRYPTOCAP_F_HARDWARE);
 		if (error == 0) {
 			mtx_lock(&sc->sc_queue_mtx);
 			if (sc->sc_crypto == G_ELI_CRYPTO_UNKNOWN)
 				sc->sc_crypto = G_ELI_CRYPTO_HW;
 			mtx_unlock(&sc->sc_queue_mtx);
 		} else {
 			error = crypto_newsession(&wr->w_sid, &crie,
 			    CRYPTOCAP_F_SOFTWARE);
 			mtx_lock(&sc->sc_queue_mtx);
 			if (sc->sc_crypto == G_ELI_CRYPTO_UNKNOWN)
 				sc->sc_crypto = G_ELI_CRYPTO_SW;
 			mtx_unlock(&sc->sc_queue_mtx);
 		}
 		break;
 	default:
 		panic("%s: invalid condition", __func__);
 	}
 
 	if ((sc->sc_flags & G_ELI_FLAG_FIRST_KEY) != 0)
 		g_eli_key_drop(sc, crie.cri_key);
 
 	return (error);
 }
 
 static void
 g_eli_freesession(struct g_eli_worker *wr)
 {
 
 	crypto_freesession(wr->w_sid);
 }
 
 static void
 g_eli_cancel(struct g_eli_softc *sc)
 {
 	struct bio *bp;
 
 	mtx_assert(&sc->sc_queue_mtx, MA_OWNED);
 
 	while ((bp = bioq_takefirst(&sc->sc_queue)) != NULL) {
 		KASSERT(bp->bio_pflags == G_ELI_NEW_BIO,
 		    ("Not new bio when canceling (bp=%p).", bp));
 		g_io_deliver(bp, ENXIO);
 	}
 }
 
 static struct bio *
 g_eli_takefirst(struct g_eli_softc *sc)
 {
 	struct bio *bp;
 
 	mtx_assert(&sc->sc_queue_mtx, MA_OWNED);
 
 	if (!(sc->sc_flags & G_ELI_FLAG_SUSPEND))
 		return (bioq_takefirst(&sc->sc_queue));
 	/*
 	 * Device suspended, so we skip new I/O requests.
 	 */
 	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
 		if (bp->bio_pflags != G_ELI_NEW_BIO)
 			break;
 	}
 	if (bp != NULL)
 		bioq_remove(&sc->sc_queue, bp);
 	return (bp);
 }
 
 /*
  * This is the main function for kernel worker thread when we don't have
  * hardware acceleration and we have to do cryptography in software.
  * Dedicated thread is needed, so we don't slow down g_up/g_down GEOM
  * threads with crypto work.
  */
 static void
 g_eli_worker(void *arg)
 {
 	struct g_eli_softc *sc;
 	struct g_eli_worker *wr;
 	struct bio *bp;
 	int error;
 
 	wr = arg;
 	sc = wr->w_softc;
 #ifdef SMP
 	/* Before sched_bind() to a CPU, wait for all CPUs to go on-line. */
 	if (sc->sc_cpubind) {
 		while (!smp_started)
 			tsleep(wr, 0, "geli:smp", hz / 4);
 	}
 #endif
 	thread_lock(curthread);
 	sched_prio(curthread, PUSER);
 	if (sc->sc_cpubind)
 		sched_bind(curthread, wr->w_number % mp_ncpus);
 	thread_unlock(curthread);
 
 	G_ELI_DEBUG(1, "Thread %s started.", curthread->td_proc->p_comm);
 
 	for (;;) {
 		mtx_lock(&sc->sc_queue_mtx);
 again:
 		bp = g_eli_takefirst(sc);
 		if (bp == NULL) {
 			if (sc->sc_flags & G_ELI_FLAG_DESTROY) {
 				g_eli_cancel(sc);
 				LIST_REMOVE(wr, w_next);
 				g_eli_freesession(wr);
 				free(wr, M_ELI);
 				G_ELI_DEBUG(1, "Thread %s exiting.",
 				    curthread->td_proc->p_comm);
 				wakeup(&sc->sc_workers);
 				mtx_unlock(&sc->sc_queue_mtx);
 				kproc_exit(0);
 			}
 			while (sc->sc_flags & G_ELI_FLAG_SUSPEND) {
 				if (sc->sc_inflight > 0) {
 					G_ELI_DEBUG(0, "inflight=%d",
 					    sc->sc_inflight);
 					/*
 					 * We still have inflight BIOs, so
 					 * sleep and retry.
 					 */
 					msleep(sc, &sc->sc_queue_mtx, PRIBIO,
 					    "geli:inf", hz / 5);
 					goto again;
 				}
 				/*
 				 * Suspend requested, mark the worker as
 				 * suspended and go to sleep.
 				 */
 				if (wr->w_active) {
 					g_eli_freesession(wr);
 					wr->w_active = FALSE;
 				}
 				wakeup(&sc->sc_workers);
 				msleep(sc, &sc->sc_queue_mtx, PRIBIO,
 				    "geli:suspend", 0);
 				if (!wr->w_active &&
 				    !(sc->sc_flags & G_ELI_FLAG_SUSPEND)) {
 					error = g_eli_newsession(wr);
 					KASSERT(error == 0,
 					    ("g_eli_newsession() failed on resume (error=%d)",
 					    error));
 					wr->w_active = TRUE;
 				}
 				goto again;
 			}
 			msleep(sc, &sc->sc_queue_mtx, PDROP, "geli:w", 0);
 			continue;
 		}
 		if (bp->bio_pflags == G_ELI_NEW_BIO)
 			atomic_add_int(&sc->sc_inflight, 1);
 		mtx_unlock(&sc->sc_queue_mtx);
 		if (bp->bio_pflags == G_ELI_NEW_BIO) {
 			bp->bio_pflags = 0;
 			if (sc->sc_flags & G_ELI_FLAG_AUTH) {
 				if (bp->bio_cmd == BIO_READ)
 					g_eli_auth_read(sc, bp);
 				else
 					g_eli_auth_run(wr, bp);
 			} else {
 				if (bp->bio_cmd == BIO_READ)
 					g_eli_crypto_read(sc, bp, 1);
 				else
 					g_eli_crypto_run(wr, bp);
 			}
 		} else {
 			if (sc->sc_flags & G_ELI_FLAG_AUTH)
 				g_eli_auth_run(wr, bp);
 			else
 				g_eli_crypto_run(wr, bp);
 		}
 	}
 }
 
-/*
- * Here we generate IV. It is unique for every sector.
- */
-void
-g_eli_crypto_ivgen(struct g_eli_softc *sc, off_t offset, u_char *iv,
-    size_t size)
-{
-	uint8_t off[8];
-
-	if ((sc->sc_flags & G_ELI_FLAG_NATIVE_BYTE_ORDER) != 0)
-		bcopy(&offset, off, sizeof(off));
-	else
-		le64enc(off, (uint64_t)offset);
-
-	switch (sc->sc_ealgo) {
-	case CRYPTO_AES_XTS:
-		bcopy(off, iv, sizeof(off));
-		bzero(iv + sizeof(off), size - sizeof(off));
-		break;
-	default:
-	    {
-		u_char hash[SHA256_DIGEST_LENGTH];
-		SHA256_CTX ctx;
-
-		/* Copy precalculated SHA256 context for IV-Key. */
-		bcopy(&sc->sc_ivctx, &ctx, sizeof(ctx));
-		SHA256_Update(&ctx, off, sizeof(off));
-		SHA256_Final(hash, &ctx);
-		bcopy(hash, iv, MIN(sizeof(hash), size));
-		break;
-	    }
-	}
-}
-
 int
 g_eli_read_metadata(struct g_class *mp, struct g_provider *pp,
     struct g_eli_metadata *md)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	u_char *buf = NULL;
 	int error;
 
 	g_topology_assert();
 
 	gp = g_new_geomf(mp, "eli:taste");
 	gp->start = g_eli_start;
 	gp->access = g_std_access;
 	/*
 	 * g_eli_read_metadata() is always called from the event thread.
 	 * Our geom is created and destroyed in the same event, so there
 	 * could be no orphan nor spoil event in the meantime.
 	 */
 	gp->orphan = g_eli_orphan_spoil_assert;
 	gp->spoiled = g_eli_orphan_spoil_assert;
 	cp = g_new_consumer(gp);
 	error = g_attach(cp, pp);
 	if (error != 0)
 		goto end;
 	error = g_access(cp, 1, 0, 0);
 	if (error != 0)
 		goto end;
 	g_topology_unlock();
 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
 	    &error);
 	g_topology_lock();
 	if (buf == NULL)
 		goto end;
 	error = eli_metadata_decode(buf, md);
 	if (error != 0)
 		goto end;
 	/* Metadata was read and decoded successfully. */
 end:
 	if (buf != NULL)
 		g_free(buf);
 	if (cp->provider != NULL) {
 		if (cp->acr == 1)
 			g_access(cp, -1, 0, 0);
 		g_detach(cp);
 	}
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	return (error);
 }
 
 /*
  * The function is called when we had last close on provider and user requested
  * to close it when this situation occur.
  */
 static void
 g_eli_last_close(void *arg, int flags __unused)
 {
 	struct g_geom *gp;
 	char gpname[64];
 	int error;
 
 	g_topology_assert();
 	gp = arg;
 	strlcpy(gpname, gp->name, sizeof(gpname));
 	error = g_eli_destroy(gp->softc, TRUE);
 	KASSERT(error == 0, ("Cannot detach %s on last close (error=%d).",
 	    gpname, error));
 	G_ELI_DEBUG(0, "Detached %s on last close.", gpname);
 }
 
 int
 g_eli_access(struct g_provider *pp, int dr, int dw, int de)
 {
 	struct g_eli_softc *sc;
 	struct g_geom *gp;
 
 	gp = pp->geom;
 	sc = gp->softc;
 
 	if (dw > 0) {
 		if (sc->sc_flags & G_ELI_FLAG_RO) {
 			/* Deny write attempts. */
 			return (EROFS);
 		}
 		/* Someone is opening us for write, we need to remember that. */
 		sc->sc_flags |= G_ELI_FLAG_WOPEN;
 		return (0);
 	}
 	/* Is this the last close? */
 	if (pp->acr + dr > 0 || pp->acw + dw > 0 || pp->ace + de > 0)
 		return (0);
 
 	/*
 	 * Automatically detach on last close if requested.
 	 */
 	if ((sc->sc_flags & G_ELI_FLAG_RW_DETACH) ||
 	    (sc->sc_flags & G_ELI_FLAG_WOPEN)) {
 		g_post_event(g_eli_last_close, gp, M_WAITOK, NULL);
 	}
 	return (0);
 }
 
 static int
 g_eli_cpu_is_disabled(int cpu)
 {
 #ifdef SMP
 	return (CPU_ISSET(cpu, &hlt_cpus_mask));
 #else
 	return (0);
 #endif
 }
 
 struct g_geom *
 g_eli_create(struct gctl_req *req, struct g_class *mp, struct g_provider *bpp,
     const struct g_eli_metadata *md, const u_char *mkey, int nkey)
 {
 	struct g_eli_softc *sc;
 	struct g_eli_worker *wr;
 	struct g_geom *gp;
 	struct g_provider *pp;
 	struct g_consumer *cp;
 	u_int i, threads;
 	int error;
 
 	G_ELI_DEBUG(1, "Creating device %s%s.", bpp->name, G_ELI_SUFFIX);
 
 	gp = g_new_geomf(mp, "%s%s", bpp->name, G_ELI_SUFFIX);
 	sc = malloc(sizeof(*sc), M_ELI, M_WAITOK | M_ZERO);
 	gp->start = g_eli_start;
 	/*
 	 * Spoiling can happen even though we have the provider open
 	 * exclusively, e.g. through media change events.
 	 */
 	gp->spoiled = g_eli_orphan;
 	gp->orphan = g_eli_orphan;
 	gp->dumpconf = g_eli_dumpconf;
 	/*
 	 * If detach-on-last-close feature is not enabled and we don't operate
 	 * on read-only provider, we can simply use g_std_access().
 	 */
 	if (md->md_flags & (G_ELI_FLAG_WO_DETACH | G_ELI_FLAG_RO))
 		gp->access = g_eli_access;
 	else
 		gp->access = g_std_access;
 
-	sc->sc_version = md->md_version;
-	sc->sc_inflight = 0;
-	sc->sc_crypto = G_ELI_CRYPTO_UNKNOWN;
-	sc->sc_flags = md->md_flags;
-	/* Backward compatibility. */
-	if (md->md_version < G_ELI_VERSION_04)
-		sc->sc_flags |= G_ELI_FLAG_NATIVE_BYTE_ORDER;
-	if (md->md_version < G_ELI_VERSION_05)
-		sc->sc_flags |= G_ELI_FLAG_SINGLE_KEY;
-	if (md->md_version < G_ELI_VERSION_06 &&
-	    (sc->sc_flags & G_ELI_FLAG_AUTH) != 0) {
-		sc->sc_flags |= G_ELI_FLAG_FIRST_KEY;
-	}
-	if (md->md_version < G_ELI_VERSION_07)
-		sc->sc_flags |= G_ELI_FLAG_ENC_IVKEY;
-	sc->sc_ealgo = md->md_ealgo;
+	eli_metadata_softc(sc, md, bpp->sectorsize, bpp->mediasize);
 	sc->sc_nkey = nkey;
 
-	if (sc->sc_flags & G_ELI_FLAG_AUTH) {
-		sc->sc_akeylen = sizeof(sc->sc_akey) * 8;
-		sc->sc_aalgo = md->md_aalgo;
-		sc->sc_alen = g_eli_hashlen(sc->sc_aalgo);
-
-		sc->sc_data_per_sector = bpp->sectorsize - sc->sc_alen;
-		/*
-		 * Some hash functions (like SHA1 and RIPEMD160) generates hash
-		 * which length is not multiple of 128 bits, but we want data
-		 * length to be multiple of 128, so we can encrypt without
-		 * padding. The line below rounds down data length to multiple
-		 * of 128 bits.
-		 */
-		sc->sc_data_per_sector -= sc->sc_data_per_sector % 16;
-
-		sc->sc_bytes_per_sector =
-		    (md->md_sectorsize - 1) / sc->sc_data_per_sector + 1;
-		sc->sc_bytes_per_sector *= bpp->sectorsize;
-	}
-
 	gp->softc = sc;
 	sc->sc_geom = gp;
 
 	bioq_init(&sc->sc_queue);
 	mtx_init(&sc->sc_queue_mtx, "geli:queue", NULL, MTX_DEF);
 	mtx_init(&sc->sc_ekeys_lock, "geli:ekeys", NULL, MTX_DEF);
 
 	pp = NULL;
 	cp = g_new_consumer(gp);
 	error = g_attach(cp, bpp);
 	if (error != 0) {
 		if (req != NULL) {
 			gctl_error(req, "Cannot attach to %s (error=%d).",
 			    bpp->name, error);
 		} else {
 			G_ELI_DEBUG(1, "Cannot attach to %s (error=%d).",
 			    bpp->name, error);
 		}
 		goto failed;
 	}
 	/*
 	 * Keep provider open all the time, so we can run critical tasks,
 	 * like Master Keys deletion, without wondering if we can open
 	 * provider or not.
 	 * We don't open provider for writing only when user requested read-only
 	 * access.
 	 */
 	if (sc->sc_flags & G_ELI_FLAG_RO)
 		error = g_access(cp, 1, 0, 1);
 	else
 		error = g_access(cp, 1, 1, 1);
 	if (error != 0) {
 		if (req != NULL) {
 			gctl_error(req, "Cannot access %s (error=%d).",
 			    bpp->name, error);
 		} else {
 			G_ELI_DEBUG(1, "Cannot access %s (error=%d).",
 			    bpp->name, error);
 		}
 		goto failed;
 	}
 
-	sc->sc_sectorsize = md->md_sectorsize;
-	sc->sc_mediasize = bpp->mediasize;
-	if (!(sc->sc_flags & G_ELI_FLAG_ONETIME))
-		sc->sc_mediasize -= bpp->sectorsize;
-	if (!(sc->sc_flags & G_ELI_FLAG_AUTH))
-		sc->sc_mediasize -= (sc->sc_mediasize % sc->sc_sectorsize);
-	else {
-		sc->sc_mediasize /= sc->sc_bytes_per_sector;
-		sc->sc_mediasize *= sc->sc_sectorsize;
-	}
-
 	/*
 	 * Remember the keys in our softc structure.
 	 */
 	g_eli_mkey_propagate(sc, mkey);
-	sc->sc_ekeylen = md->md_keylen;
 
 	LIST_INIT(&sc->sc_workers);
 
 	threads = g_eli_threads;
 	if (threads == 0)
 		threads = mp_ncpus;
 	sc->sc_cpubind = (mp_ncpus > 1 && threads == mp_ncpus);
 	for (i = 0; i < threads; i++) {
 		if (g_eli_cpu_is_disabled(i)) {
 			G_ELI_DEBUG(1, "%s: CPU %u disabled, skipping.",
 			    bpp->name, i);
 			continue;
 		}
 		wr = malloc(sizeof(*wr), M_ELI, M_WAITOK | M_ZERO);
 		wr->w_softc = sc;
 		wr->w_number = i;
 		wr->w_active = TRUE;
 
 		error = g_eli_newsession(wr);
 		if (error != 0) {
 			free(wr, M_ELI);
 			if (req != NULL) {
 				gctl_error(req, "Cannot set up crypto session "
 				    "for %s (error=%d).", bpp->name, error);
 			} else {
 				G_ELI_DEBUG(1, "Cannot set up crypto session "
 				    "for %s (error=%d).", bpp->name, error);
 			}
 			goto failed;
 		}
 
 		error = kproc_create(g_eli_worker, wr, &wr->w_proc, 0, 0,
 		    "g_eli[%u] %s", i, bpp->name);
 		if (error != 0) {
 			g_eli_freesession(wr);
 			free(wr, M_ELI);
 			if (req != NULL) {
 				gctl_error(req, "Cannot create kernel thread "
 				    "for %s (error=%d).", bpp->name, error);
 			} else {
 				G_ELI_DEBUG(1, "Cannot create kernel thread "
 				    "for %s (error=%d).", bpp->name, error);
 			}
 			goto failed;
 		}
 		LIST_INSERT_HEAD(&sc->sc_workers, wr, w_next);
 	}
 
 	/*
 	 * Create decrypted provider.
 	 */
 	pp = g_new_providerf(gp, "%s%s", bpp->name, G_ELI_SUFFIX);
 	pp->mediasize = sc->sc_mediasize;
 	pp->sectorsize = sc->sc_sectorsize;
 
 	g_error_provider(pp, 0);
 
 	G_ELI_DEBUG(0, "Device %s created.", pp->name);
 	G_ELI_DEBUG(0, "Encryption: %s %u", g_eli_algo2str(sc->sc_ealgo),
 	    sc->sc_ekeylen);
 	if (sc->sc_flags & G_ELI_FLAG_AUTH)
 		G_ELI_DEBUG(0, " Integrity: %s", g_eli_algo2str(sc->sc_aalgo));
 	G_ELI_DEBUG(0, "    Crypto: %s",
 	    sc->sc_crypto == G_ELI_CRYPTO_SW ? "software" : "hardware");
 	return (gp);
 failed:
 	mtx_lock(&sc->sc_queue_mtx);
 	sc->sc_flags |= G_ELI_FLAG_DESTROY;
 	wakeup(sc);
 	/*
 	 * Wait for kernel threads self destruction.
 	 */
 	while (!LIST_EMPTY(&sc->sc_workers)) {
 		msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO,
 		    "geli:destroy", 0);
 	}
 	mtx_destroy(&sc->sc_queue_mtx);
 	if (cp->provider != NULL) {
 		if (cp->acr == 1)
 			g_access(cp, -1, -1, -1);
 		g_detach(cp);
 	}
 	g_destroy_consumer(cp);
 	g_destroy_geom(gp);
 	g_eli_key_destroy(sc);
 	bzero(sc, sizeof(*sc));
 	free(sc, M_ELI);
 	return (NULL);
 }
 
 int
 g_eli_destroy(struct g_eli_softc *sc, boolean_t force)
 {
 	struct g_geom *gp;
 	struct g_provider *pp;
 
 	g_topology_assert();
 
 	if (sc == NULL)
 		return (ENXIO);
 
 	gp = sc->sc_geom;
 	pp = LIST_FIRST(&gp->provider);
 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
 		if (force) {
 			G_ELI_DEBUG(1, "Device %s is still open, so it "
 			    "cannot be definitely removed.", pp->name);
 			sc->sc_flags |= G_ELI_FLAG_RW_DETACH;
 			gp->access = g_eli_access;
 			g_wither_provider(pp, ENXIO);
 			return (EBUSY);
 		} else {
 			G_ELI_DEBUG(1,
 			    "Device %s is still open (r%dw%de%d).", pp->name,
 			    pp->acr, pp->acw, pp->ace);
 			return (EBUSY);
 		}
 	}
 
 	mtx_lock(&sc->sc_queue_mtx);
 	sc->sc_flags |= G_ELI_FLAG_DESTROY;
 	wakeup(sc);
 	while (!LIST_EMPTY(&sc->sc_workers)) {
 		msleep(&sc->sc_workers, &sc->sc_queue_mtx, PRIBIO,
 		    "geli:destroy", 0);
 	}
 	mtx_destroy(&sc->sc_queue_mtx);
 	gp->softc = NULL;
 	g_eli_key_destroy(sc);
 	bzero(sc, sizeof(*sc));
 	free(sc, M_ELI);
 
 	if (pp == NULL || (pp->acr == 0 && pp->acw == 0 && pp->ace == 0))
 		G_ELI_DEBUG(0, "Device %s destroyed.", gp->name);
 	g_wither_geom_close(gp, ENXIO);
 
 	return (0);
 }
 
 static int
 g_eli_destroy_geom(struct gctl_req *req __unused,
     struct g_class *mp __unused, struct g_geom *gp)
 {
 	struct g_eli_softc *sc;
 
 	sc = gp->softc;
 	return (g_eli_destroy(sc, FALSE));
 }
 
 static int
 g_eli_keyfiles_load(struct hmac_ctx *ctx, const char *provider)
 {
 	u_char *keyfile, *data;
 	char *file, name[64];
 	size_t size;
 	int i;
 
 	for (i = 0; ; i++) {
 		snprintf(name, sizeof(name), "%s:geli_keyfile%d", provider, i);
 		keyfile = preload_search_by_type(name);
 		if (keyfile == NULL && i == 0) {
 			/*
 			 * If there is only one keyfile, allow simpler name.
 			 */
 			snprintf(name, sizeof(name), "%s:geli_keyfile", provider);
 			keyfile = preload_search_by_type(name);
 		}
 		if (keyfile == NULL)
 			return (i);	/* Return number of loaded keyfiles. */
 		data = preload_fetch_addr(keyfile);
 		if (data == NULL) {
 			G_ELI_DEBUG(0, "Cannot find key file data for %s.",
 			    name);
 			return (0);
 		}
 		size = preload_fetch_size(keyfile);
 		if (size == 0) {
 			G_ELI_DEBUG(0, "Cannot find key file size for %s.",
 			    name);
 			return (0);
 		}
 		file = preload_search_info(keyfile, MODINFO_NAME);
 		if (file == NULL) {
 			G_ELI_DEBUG(0, "Cannot find key file name for %s.",
 			    name);
 			return (0);
 		}
 		G_ELI_DEBUG(1, "Loaded keyfile %s for %s (type: %s).", file,
 		    provider, name);
 		g_eli_crypto_hmac_update(ctx, data, size);
 	}
 }
 
 static void
 g_eli_keyfiles_clear(const char *provider)
 {
 	u_char *keyfile, *data;
 	char name[64];
 	size_t size;
 	int i;
 
 	for (i = 0; ; i++) {
 		snprintf(name, sizeof(name), "%s:geli_keyfile%d", provider, i);
 		keyfile = preload_search_by_type(name);
 		if (keyfile == NULL)
 			return;
 		data = preload_fetch_addr(keyfile);
 		size = preload_fetch_size(keyfile);
 		if (data != NULL && size != 0)
 			bzero(data, size);
 	}
 }
 
 /*
  * Tasting is only made on boot.
  * We detect providers which should be attached before root is mounted.
  */
 static struct g_geom *
 g_eli_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
 {
 	struct g_eli_metadata md;
 	struct g_geom *gp;
 	struct hmac_ctx ctx;
 	char passphrase[256];
 	u_char key[G_ELI_USERKEYLEN], mkey[G_ELI_DATAIVKEYLEN];
 	u_int i, nkey, nkeyfiles, tries;
 	int error;
 
 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
 	g_topology_assert();
 
 	if (root_mounted() || g_eli_tries == 0)
 		return (NULL);
 
 	G_ELI_DEBUG(3, "Tasting %s.", pp->name);
 
 	error = g_eli_read_metadata(mp, pp, &md);
 	if (error != 0)
 		return (NULL);
 	gp = NULL;
 
 	if (strcmp(md.md_magic, G_ELI_MAGIC) != 0)
 		return (NULL);
 	if (md.md_version > G_ELI_VERSION) {
 		printf("geom_eli.ko module is too old to handle %s.\n",
 		    pp->name);
 		return (NULL);
 	}
 	if (md.md_provsize != pp->mediasize)
 		return (NULL);
 	/* Should we attach it on boot? */
 	if (!(md.md_flags & G_ELI_FLAG_BOOT))
 		return (NULL);
 	if (md.md_keys == 0x00) {
 		G_ELI_DEBUG(0, "No valid keys on %s.", pp->name);
 		return (NULL);
 	}
 	if (md.md_iterations == -1) {
 		/* If there is no passphrase, we try only once. */
 		tries = 1;
 	} else {
 		/* Ask for the passphrase no more than g_eli_tries times. */
 		tries = g_eli_tries;
 	}
 
 	for (i = 0; i <= tries; i++) {
 		g_eli_crypto_hmac_init(&ctx, NULL, 0);
 
 		/*
 		 * Load all key files.
 		 */
 		nkeyfiles = g_eli_keyfiles_load(&ctx, pp->name);
 
 		if (nkeyfiles == 0 && md.md_iterations == -1) {
 			/*
 			 * No key files and no passphrase, something is
 			 * definitely wrong here.
 			 * geli(8) doesn't allow for such situation, so assume
 			 * that there was really no passphrase and in that case
 			 * key files are no properly defined in loader.conf.
 			 */
 			G_ELI_DEBUG(0,
 			    "Found no key files in loader.conf for %s.",
 			    pp->name);
 			return (NULL);
 		}
 
 		/* Ask for the passphrase if defined. */
 		if (md.md_iterations >= 0) {
 			/* Try first with cached passphrase. */
 			if (i == 0) {
 				if (!g_eli_boot_passcache)
 					continue;
 				memcpy(passphrase, cached_passphrase,
 				    sizeof(passphrase));
 			} else {
 				printf("Enter passphrase for %s: ", pp->name);
 				cngets(passphrase, sizeof(passphrase),
 				    g_eli_visible_passphrase);
 				memcpy(cached_passphrase, passphrase,
 				    sizeof(passphrase));
 			}
 		}
 
 		/*
 		 * Prepare Derived-Key from the user passphrase.
 		 */
 		if (md.md_iterations == 0) {
 			g_eli_crypto_hmac_update(&ctx, md.md_salt,
 			    sizeof(md.md_salt));
 			g_eli_crypto_hmac_update(&ctx, passphrase,
 			    strlen(passphrase));
 			bzero(passphrase, sizeof(passphrase));
 		} else if (md.md_iterations > 0) {
 			u_char dkey[G_ELI_USERKEYLEN];
 
 			pkcs5v2_genkey(dkey, sizeof(dkey), md.md_salt,
 			    sizeof(md.md_salt), passphrase, md.md_iterations);
 			bzero(passphrase, sizeof(passphrase));
 			g_eli_crypto_hmac_update(&ctx, dkey, sizeof(dkey));
 			bzero(dkey, sizeof(dkey));
 		}
 
 		g_eli_crypto_hmac_final(&ctx, key, 0);
 
 		/*
 		 * Decrypt Master-Key.
 		 */
 		error = g_eli_mkey_decrypt(&md, key, mkey, &nkey);
 		bzero(key, sizeof(key));
 		if (error == -1) {
 			if (i == tries) {
 				G_ELI_DEBUG(0,
 				    "Wrong key for %s. No tries left.",
 				    pp->name);
 				g_eli_keyfiles_clear(pp->name);
 				return (NULL);
 			}
 			if (i > 0) {
 				G_ELI_DEBUG(0,
 				    "Wrong key for %s. Tries left: %u.",
 				    pp->name, tries - i);
 			}
 			/* Try again. */
 			continue;
 		} else if (error > 0) {
 			G_ELI_DEBUG(0,
 			    "Cannot decrypt Master Key for %s (error=%d).",
 			    pp->name, error);
 			g_eli_keyfiles_clear(pp->name);
 			return (NULL);
 		}
 		g_eli_keyfiles_clear(pp->name);
 		G_ELI_DEBUG(1, "Using Master Key %u for %s.", nkey, pp->name);
 		break;
 	}
 
 	/*
 	 * We have correct key, let's attach provider.
 	 */
 	gp = g_eli_create(NULL, mp, pp, &md, mkey, nkey);
 	bzero(mkey, sizeof(mkey));
 	bzero(&md, sizeof(md));
 	if (gp == NULL) {
 		G_ELI_DEBUG(0, "Cannot create device %s%s.", pp->name,
 		    G_ELI_SUFFIX);
 		return (NULL);
 	}
 	return (gp);
 }
 
 static void
 g_eli_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
     struct g_consumer *cp, struct g_provider *pp)
 {
 	struct g_eli_softc *sc;
 
 	g_topology_assert();
 	sc = gp->softc;
 	if (sc == NULL)
 		return;
 	if (pp != NULL || cp != NULL)
 		return;	/* Nothing here. */
 
 	sbuf_printf(sb, "%s<KeysTotal>%ju</KeysTotal>\n", indent,
 	    (uintmax_t)sc->sc_ekeys_total);
 	sbuf_printf(sb, "%s<KeysAllocated>%ju</KeysAllocated>\n", indent,
 	    (uintmax_t)sc->sc_ekeys_allocated);
 	sbuf_printf(sb, "%s<Flags>", indent);
 	if (sc->sc_flags == 0)
 		sbuf_printf(sb, "NONE");
 	else {
 		int first = 1;
 
 #define ADD_FLAG(flag, name)	do {					\
 	if (sc->sc_flags & (flag)) {					\
 		if (!first)						\
 			sbuf_printf(sb, ", ");				\
 		else							\
 			first = 0;					\
 		sbuf_printf(sb, name);					\
 	}								\
 } while (0)
 		ADD_FLAG(G_ELI_FLAG_SUSPEND, "SUSPEND");
 		ADD_FLAG(G_ELI_FLAG_SINGLE_KEY, "SINGLE-KEY");
 		ADD_FLAG(G_ELI_FLAG_NATIVE_BYTE_ORDER, "NATIVE-BYTE-ORDER");
 		ADD_FLAG(G_ELI_FLAG_ONETIME, "ONETIME");
 		ADD_FLAG(G_ELI_FLAG_BOOT, "BOOT");
 		ADD_FLAG(G_ELI_FLAG_WO_DETACH, "W-DETACH");
 		ADD_FLAG(G_ELI_FLAG_RW_DETACH, "RW-DETACH");
 		ADD_FLAG(G_ELI_FLAG_AUTH, "AUTH");
 		ADD_FLAG(G_ELI_FLAG_WOPEN, "W-OPEN");
 		ADD_FLAG(G_ELI_FLAG_DESTROY, "DESTROY");
 		ADD_FLAG(G_ELI_FLAG_RO, "READ-ONLY");
 		ADD_FLAG(G_ELI_FLAG_NODELETE, "NODELETE");
 #undef  ADD_FLAG
 	}
 	sbuf_printf(sb, "</Flags>\n");
 
 	if (!(sc->sc_flags & G_ELI_FLAG_ONETIME)) {
 		sbuf_printf(sb, "%s<UsedKey>%u</UsedKey>\n", indent,
 		    sc->sc_nkey);
 	}
 	sbuf_printf(sb, "%s<Version>%u</Version>\n", indent, sc->sc_version);
 	sbuf_printf(sb, "%s<Crypto>", indent);
 	switch (sc->sc_crypto) {
 	case G_ELI_CRYPTO_HW:
 		sbuf_printf(sb, "hardware");
 		break;
 	case G_ELI_CRYPTO_SW:
 		sbuf_printf(sb, "software");
 		break;
 	default:
 		sbuf_printf(sb, "UNKNOWN");
 		break;
 	}
 	sbuf_printf(sb, "</Crypto>\n");
 	if (sc->sc_flags & G_ELI_FLAG_AUTH) {
 		sbuf_printf(sb,
 		    "%s<AuthenticationAlgorithm>%s</AuthenticationAlgorithm>\n",
 		    indent, g_eli_algo2str(sc->sc_aalgo));
 	}
 	sbuf_printf(sb, "%s<KeyLength>%u</KeyLength>\n", indent,
 	    sc->sc_ekeylen);
 	sbuf_printf(sb, "%s<EncryptionAlgorithm>%s</EncryptionAlgorithm>\n",
 	    indent, g_eli_algo2str(sc->sc_ealgo));
 	sbuf_printf(sb, "%s<State>%s</State>\n", indent,
 	    (sc->sc_flags & G_ELI_FLAG_SUSPEND) ? "SUSPENDED" : "ACTIVE");
 }
 
 static void
 g_eli_shutdown_pre_sync(void *arg, int howto)
 {
 	struct g_class *mp;
 	struct g_geom *gp, *gp2;
 	struct g_provider *pp;
 	struct g_eli_softc *sc;
 	int error;
 
 	mp = arg;
 	DROP_GIANT();
 	g_topology_lock();
 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
 		sc = gp->softc;
 		if (sc == NULL)
 			continue;
 		pp = LIST_FIRST(&gp->provider);
 		KASSERT(pp != NULL, ("No provider? gp=%p (%s)", gp, gp->name));
 		if (pp->acr + pp->acw + pp->ace == 0)
 			error = g_eli_destroy(sc, TRUE);
 		else {
 			sc->sc_flags |= G_ELI_FLAG_RW_DETACH;
 			gp->access = g_eli_access;
 		}
 	}
 	g_topology_unlock();
 	PICKUP_GIANT();
 }
 
 static void
 g_eli_init(struct g_class *mp)
 {
 
 	g_eli_pre_sync = EVENTHANDLER_REGISTER(shutdown_pre_sync,
 	    g_eli_shutdown_pre_sync, mp, SHUTDOWN_PRI_FIRST);
 	if (g_eli_pre_sync == NULL)
 		G_ELI_DEBUG(0, "Warning! Cannot register shutdown event.");
 }
 
 static void
 g_eli_fini(struct g_class *mp)
 {
 
 	if (g_eli_pre_sync != NULL)
 		EVENTHANDLER_DEREGISTER(shutdown_pre_sync, g_eli_pre_sync);
 }
 
 DECLARE_GEOM_CLASS(g_eli_class, g_eli);
 MODULE_DEPEND(g_eli, crypto, 1, 1, 1);
Index: projects/release-pkg/sys/geom/eli/g_eli.h
===================================================================
--- projects/release-pkg/sys/geom/eli/g_eli.h	(revision 293335)
+++ projects/release-pkg/sys/geom/eli/g_eli.h	(revision 293336)
@@ -1,629 +1,707 @@
 /*-
  * Copyright (c) 2005-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef	_G_ELI_H_
 #define	_G_ELI_H_
 
 #include <sys/endian.h>
 #include <sys/errno.h>
 #include <sys/malloc.h>
 #include <crypto/sha2/sha256.h>
 #include <crypto/sha2/sha512.h>
 #include <opencrypto/cryptodev.h>
 #ifdef _KERNEL
 #include <sys/bio.h>
 #include <sys/libkern.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
-#include <sys/queue.h>
-#include <sys/tree.h>
 #include <geom/geom.h>
 #else
 #include <assert.h>
 #include <stdio.h>
 #include <string.h>
 #include <strings.h>
 #endif
+#include <sys/queue.h>
+#include <sys/tree.h>
 #ifndef _OpenSSL_
 #include <sys/md5.h>
 #endif
 
 #define	G_ELI_CLASS_NAME	"ELI"
 #define	G_ELI_MAGIC		"GEOM::ELI"
 #define	G_ELI_SUFFIX		".eli"
 
 /*
  * Version history:
  * 0 - Initial version number.
  * 1 - Added data authentication support (md_aalgo field and
  *     G_ELI_FLAG_AUTH flag).
  * 2 - Added G_ELI_FLAG_READONLY.
  * 3 - Added 'configure' subcommand.
  * 4 - IV is generated from offset converted to little-endian
  *     (the G_ELI_FLAG_NATIVE_BYTE_ORDER flag will be set for older versions).
  * 5 - Added multiple encrypton keys and AES-XTS support.
  * 6 - Fixed usage of multiple keys for authenticated providers (the
  *     G_ELI_FLAG_FIRST_KEY flag will be set for older versions).
  * 7 - Encryption keys are now generated from the Data Key and not from the
  *     IV Key (the G_ELI_FLAG_ENC_IVKEY flag will be set for older versions).
  */
 #define	G_ELI_VERSION_00	0
 #define	G_ELI_VERSION_01	1
 #define	G_ELI_VERSION_02	2
 #define	G_ELI_VERSION_03	3
 #define	G_ELI_VERSION_04	4
 #define	G_ELI_VERSION_05	5
 #define	G_ELI_VERSION_06	6
 #define	G_ELI_VERSION_07	7
 #define	G_ELI_VERSION		G_ELI_VERSION_07
 
 /* ON DISK FLAGS. */
 /* Use random, onetime keys. */
 #define	G_ELI_FLAG_ONETIME		0x00000001
 /* Ask for the passphrase from the kernel, before mounting root. */
 #define	G_ELI_FLAG_BOOT			0x00000002
 /* Detach on last close, if we were open for writing. */
 #define	G_ELI_FLAG_WO_DETACH		0x00000004
 /* Detach on last close. */
 #define	G_ELI_FLAG_RW_DETACH		0x00000008
 /* Provide data authentication. */
 #define	G_ELI_FLAG_AUTH			0x00000010
 /* Provider is read-only, we should deny all write attempts. */
 #define	G_ELI_FLAG_RO			0x00000020
 /* Don't pass through BIO_DELETE requests. */
 #define	G_ELI_FLAG_NODELETE		0x00000040
 /* RUNTIME FLAGS. */
 /* Provider was open for writing. */
 #define	G_ELI_FLAG_WOPEN		0x00010000
 /* Destroy device. */
 #define	G_ELI_FLAG_DESTROY		0x00020000
 /* Provider uses native byte-order for IV generation. */
 #define	G_ELI_FLAG_NATIVE_BYTE_ORDER	0x00040000
 /* Provider uses single encryption key. */
 #define	G_ELI_FLAG_SINGLE_KEY		0x00080000
 /* Device suspended. */
 #define	G_ELI_FLAG_SUSPEND		0x00100000
 /* Provider uses first encryption key. */
 #define	G_ELI_FLAG_FIRST_KEY		0x00200000
 /* Provider uses IV-Key for encryption key generation. */
 #define	G_ELI_FLAG_ENC_IVKEY		0x00400000
 
 #define	G_ELI_NEW_BIO	255
 
 #define	SHA512_MDLEN		64
 #define	G_ELI_AUTH_SECKEYLEN	SHA256_DIGEST_LENGTH
 
 #define	G_ELI_MAXMKEYS		2
 #define	G_ELI_MAXKEYLEN		64
 #define	G_ELI_USERKEYLEN	G_ELI_MAXKEYLEN
 #define	G_ELI_DATAKEYLEN	G_ELI_MAXKEYLEN
 #define	G_ELI_AUTHKEYLEN	G_ELI_MAXKEYLEN
 #define	G_ELI_IVKEYLEN		G_ELI_MAXKEYLEN
 #define	G_ELI_SALTLEN		64
 #define	G_ELI_DATAIVKEYLEN	(G_ELI_DATAKEYLEN + G_ELI_IVKEYLEN)
 /* Data-Key, IV-Key, HMAC_SHA512(Derived-Key, Data-Key+IV-Key) */
 #define	G_ELI_MKEYLEN		(G_ELI_DATAIVKEYLEN + SHA512_MDLEN)
 #define	G_ELI_OVERWRITES	5
 /* Switch data encryption key every 2^20 blocks. */
 #define	G_ELI_KEY_SHIFT		20
 
+#define	G_ELI_CRYPTO_UNKNOWN	0
+#define	G_ELI_CRYPTO_HW		1
+#define	G_ELI_CRYPTO_SW		2
+
 #ifdef _KERNEL
 extern int g_eli_debug;
 extern u_int g_eli_overwrites;
 extern u_int g_eli_batch;
 
-#define	G_ELI_CRYPTO_UNKNOWN	0
-#define	G_ELI_CRYPTO_HW		1
-#define	G_ELI_CRYPTO_SW		2
-
 #define	G_ELI_DEBUG(lvl, ...)	do {					\
 	if (g_eli_debug >= (lvl)) {					\
 		printf("GEOM_ELI");					\
 		if (g_eli_debug > 0)					\
 			printf("[%u]", lvl);				\
 		printf(": ");						\
 		printf(__VA_ARGS__);					\
 		printf("\n");						\
 	}								\
 } while (0)
 #define	G_ELI_LOGREQ(lvl, bp, ...)	do {				\
 	if (g_eli_debug >= (lvl)) {					\
 		printf("GEOM_ELI");					\
 		if (g_eli_debug > 0)					\
 			printf("[%u]", lvl);				\
 		printf(": ");						\
 		printf(__VA_ARGS__);					\
 		printf(" ");						\
 		g_print_bio(bp);					\
 		printf("\n");						\
 	}								\
 } while (0)
 
 struct g_eli_worker {
 	struct g_eli_softc	*w_softc;
 	struct proc		*w_proc;
 	u_int			 w_number;
 	uint64_t		 w_sid;
 	boolean_t		 w_active;
 	LIST_ENTRY(g_eli_worker) w_next;
 };
 
+#endif	/* _KERNEL */
+
 struct g_eli_softc {
 	struct g_geom	*sc_geom;
 	u_int		 sc_version;
 	u_int		 sc_crypto;
 	uint8_t		 sc_mkey[G_ELI_DATAIVKEYLEN];
 	uint8_t		 sc_ekey[G_ELI_DATAKEYLEN];
 	TAILQ_HEAD(, g_eli_key) sc_ekeys_queue;
 	RB_HEAD(g_eli_key_tree, g_eli_key) sc_ekeys_tree;
 	struct mtx	 sc_ekeys_lock;
 	uint64_t	 sc_ekeys_total;
 	uint64_t	 sc_ekeys_allocated;
 	u_int		 sc_ealgo;
 	u_int		 sc_ekeylen;
 	uint8_t		 sc_akey[G_ELI_AUTHKEYLEN];
 	u_int		 sc_aalgo;
 	u_int		 sc_akeylen;
 	u_int		 sc_alen;
 	SHA256_CTX	 sc_akeyctx;
 	uint8_t		 sc_ivkey[G_ELI_IVKEYLEN];
 	SHA256_CTX	 sc_ivctx;
 	int		 sc_nkey;
 	uint32_t	 sc_flags;
 	int		 sc_inflight;
 	off_t		 sc_mediasize;
 	size_t		 sc_sectorsize;
 	u_int		 sc_bytes_per_sector;
 	u_int		 sc_data_per_sector;
+#ifndef _KERNEL
+	int		 sc_cpubind;
+#else /* _KERNEL */
 	boolean_t	 sc_cpubind;
 
 	/* Only for software cryptography. */
 	struct bio_queue_head sc_queue;
 	struct mtx	 sc_queue_mtx;
 	LIST_HEAD(, g_eli_worker) sc_workers;
+#endif /* _KERNEL */
 };
 #define	sc_name		 sc_geom->name
-#endif	/* _KERNEL */
 
+#define	G_ELI_KEY_MAGIC	0xe11341c
+
+struct g_eli_key {
+	/* Key value, must be first in the structure. */
+	uint8_t		gek_key[G_ELI_DATAKEYLEN];
+	/* Magic. */
+	int		gek_magic;
+	/* Key number. */
+	uint64_t	gek_keyno;
+	/* Reference counter. */
+	int		gek_count;
+	/* Keeps keys sorted by most recent use. */
+	TAILQ_ENTRY(g_eli_key) gek_next;
+	/* Keeps keys sorted by number. */
+	RB_ENTRY(g_eli_key) gek_link;
+};
+
 struct g_eli_metadata {
 	char		md_magic[16];	/* Magic value. */
 	uint32_t	md_version;	/* Version number. */
 	uint32_t	md_flags;	/* Additional flags. */
 	uint16_t	md_ealgo;	/* Encryption algorithm. */
 	uint16_t	md_keylen;	/* Key length. */
 	uint16_t	md_aalgo;	/* Authentication algorithm. */
 	uint64_t	md_provsize;	/* Provider's size. */
 	uint32_t	md_sectorsize;	/* Sector size. */
 	uint8_t		md_keys;	/* Available keys. */
 	int32_t		md_iterations;	/* Number of iterations for PKCS#5v2. */
 	uint8_t		md_salt[G_ELI_SALTLEN]; /* Salt. */
 			/* Encrypted master key (IV-key, Data-key, HMAC). */
 	uint8_t		md_mkeys[G_ELI_MAXMKEYS * G_ELI_MKEYLEN];
 	u_char		md_hash[16];	/* MD5 hash. */
 } __packed;
 #ifndef _OpenSSL_
 static __inline void
 eli_metadata_encode_v0(struct g_eli_metadata *md, u_char **datap)
 {
 	u_char *p;
 
 	p = *datap;
 	le32enc(p, md->md_flags);	p += sizeof(md->md_flags);
 	le16enc(p, md->md_ealgo);	p += sizeof(md->md_ealgo);
 	le16enc(p, md->md_keylen);	p += sizeof(md->md_keylen);
 	le64enc(p, md->md_provsize);	p += sizeof(md->md_provsize);
 	le32enc(p, md->md_sectorsize);	p += sizeof(md->md_sectorsize);
 	*p = md->md_keys;		p += sizeof(md->md_keys);
 	le32enc(p, md->md_iterations);	p += sizeof(md->md_iterations);
 	bcopy(md->md_salt, p, sizeof(md->md_salt)); p += sizeof(md->md_salt);
 	bcopy(md->md_mkeys, p, sizeof(md->md_mkeys)); p += sizeof(md->md_mkeys);
 	*datap = p;
 }
 static __inline void
 eli_metadata_encode_v1v2v3v4v5v6v7(struct g_eli_metadata *md, u_char **datap)
 {
 	u_char *p;
 
 	p = *datap;
 	le32enc(p, md->md_flags);	p += sizeof(md->md_flags);
 	le16enc(p, md->md_ealgo);	p += sizeof(md->md_ealgo);
 	le16enc(p, md->md_keylen);	p += sizeof(md->md_keylen);
 	le16enc(p, md->md_aalgo);	p += sizeof(md->md_aalgo);
 	le64enc(p, md->md_provsize);	p += sizeof(md->md_provsize);
 	le32enc(p, md->md_sectorsize);	p += sizeof(md->md_sectorsize);
 	*p = md->md_keys;		p += sizeof(md->md_keys);
 	le32enc(p, md->md_iterations);	p += sizeof(md->md_iterations);
 	bcopy(md->md_salt, p, sizeof(md->md_salt)); p += sizeof(md->md_salt);
 	bcopy(md->md_mkeys, p, sizeof(md->md_mkeys)); p += sizeof(md->md_mkeys);
 	*datap = p;
 }
 static __inline void
 eli_metadata_encode(struct g_eli_metadata *md, u_char *data)
 {
 	MD5_CTX ctx;
 	u_char *p;
 
 	p = data;
 	bcopy(md->md_magic, p, sizeof(md->md_magic));
 	p += sizeof(md->md_magic);
 	le32enc(p, md->md_version);
 	p += sizeof(md->md_version);
 	switch (md->md_version) {
 	case G_ELI_VERSION_00:
 		eli_metadata_encode_v0(md, &p);
 		break;
 	case G_ELI_VERSION_01:
 	case G_ELI_VERSION_02:
 	case G_ELI_VERSION_03:
 	case G_ELI_VERSION_04:
 	case G_ELI_VERSION_05:
 	case G_ELI_VERSION_06:
 	case G_ELI_VERSION_07:
 		eli_metadata_encode_v1v2v3v4v5v6v7(md, &p);
 		break;
 	default:
 #ifdef _KERNEL
 		panic("%s: Unsupported version %u.", __func__,
 		    (u_int)md->md_version);
 #else
 		assert(!"Unsupported metadata version.");
 #endif
 	}
 	MD5Init(&ctx);
 	MD5Update(&ctx, data, p - data);
 	MD5Final(md->md_hash, &ctx);
 	bcopy(md->md_hash, p, sizeof(md->md_hash));
 }
 static __inline int
 eli_metadata_decode_v0(const u_char *data, struct g_eli_metadata *md)
 {
 	MD5_CTX ctx;
 	const u_char *p;
 
 	p = data + sizeof(md->md_magic) + sizeof(md->md_version);
 	md->md_flags = le32dec(p);	p += sizeof(md->md_flags);
 	md->md_ealgo = le16dec(p);	p += sizeof(md->md_ealgo);
 	md->md_keylen = le16dec(p);	p += sizeof(md->md_keylen);
 	md->md_provsize = le64dec(p);	p += sizeof(md->md_provsize);
 	md->md_sectorsize = le32dec(p);	p += sizeof(md->md_sectorsize);
 	md->md_keys = *p;		p += sizeof(md->md_keys);
 	md->md_iterations = le32dec(p);	p += sizeof(md->md_iterations);
 	bcopy(p, md->md_salt, sizeof(md->md_salt)); p += sizeof(md->md_salt);
 	bcopy(p, md->md_mkeys, sizeof(md->md_mkeys)); p += sizeof(md->md_mkeys);
 	MD5Init(&ctx);
 	MD5Update(&ctx, data, p - data);
 	MD5Final(md->md_hash, &ctx);
 	if (bcmp(md->md_hash, p, 16) != 0)
 		return (EINVAL);
 	return (0);
 }
 
 static __inline int
 eli_metadata_decode_v1v2v3v4v5v6v7(const u_char *data, struct g_eli_metadata *md)
 {
 	MD5_CTX ctx;
 	const u_char *p;
 
 	p = data + sizeof(md->md_magic) + sizeof(md->md_version);
 	md->md_flags = le32dec(p);	p += sizeof(md->md_flags);
 	md->md_ealgo = le16dec(p);	p += sizeof(md->md_ealgo);
 	md->md_keylen = le16dec(p);	p += sizeof(md->md_keylen);
 	md->md_aalgo = le16dec(p);	p += sizeof(md->md_aalgo);
 	md->md_provsize = le64dec(p);	p += sizeof(md->md_provsize);
 	md->md_sectorsize = le32dec(p);	p += sizeof(md->md_sectorsize);
 	md->md_keys = *p;		p += sizeof(md->md_keys);
 	md->md_iterations = le32dec(p);	p += sizeof(md->md_iterations);
 	bcopy(p, md->md_salt, sizeof(md->md_salt)); p += sizeof(md->md_salt);
 	bcopy(p, md->md_mkeys, sizeof(md->md_mkeys)); p += sizeof(md->md_mkeys);
 	MD5Init(&ctx);
 	MD5Update(&ctx, data, p - data);
 	MD5Final(md->md_hash, &ctx);
 	if (bcmp(md->md_hash, p, 16) != 0)
 		return (EINVAL);
 	return (0);
 }
 static __inline int
 eli_metadata_decode(const u_char *data, struct g_eli_metadata *md)
 {
 	int error;
 
 	bcopy(data, md->md_magic, sizeof(md->md_magic));
 	if (strcmp(md->md_magic, G_ELI_MAGIC) != 0)
 		return (EINVAL);
 	md->md_version = le32dec(data + sizeof(md->md_magic));
 	switch (md->md_version) {
 	case G_ELI_VERSION_00:
 		error = eli_metadata_decode_v0(data, md);
 		break;
 	case G_ELI_VERSION_01:
 	case G_ELI_VERSION_02:
 	case G_ELI_VERSION_03:
 	case G_ELI_VERSION_04:
 	case G_ELI_VERSION_05:
 	case G_ELI_VERSION_06:
 	case G_ELI_VERSION_07:
 		error = eli_metadata_decode_v1v2v3v4v5v6v7(data, md);
 		break;
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 	return (error);
 }
 #endif	/* !_OpenSSL */
 
 static __inline u_int
 g_eli_str2ealgo(const char *name)
 {
 
 	if (strcasecmp("null", name) == 0)
 		return (CRYPTO_NULL_CBC);
 	else if (strcasecmp("null-cbc", name) == 0)
 		return (CRYPTO_NULL_CBC);
 	else if (strcasecmp("aes", name) == 0)
 		return (CRYPTO_AES_XTS);
 	else if (strcasecmp("aes-cbc", name) == 0)
 		return (CRYPTO_AES_CBC);
 	else if (strcasecmp("aes-xts", name) == 0)
 		return (CRYPTO_AES_XTS);
 	else if (strcasecmp("blowfish", name) == 0)
 		return (CRYPTO_BLF_CBC);
 	else if (strcasecmp("blowfish-cbc", name) == 0)
 		return (CRYPTO_BLF_CBC);
 	else if (strcasecmp("camellia", name) == 0)
 		return (CRYPTO_CAMELLIA_CBC);
 	else if (strcasecmp("camellia-cbc", name) == 0)
 		return (CRYPTO_CAMELLIA_CBC);
 	else if (strcasecmp("3des", name) == 0)
 		return (CRYPTO_3DES_CBC);
 	else if (strcasecmp("3des-cbc", name) == 0)
 		return (CRYPTO_3DES_CBC);
 	return (CRYPTO_ALGORITHM_MIN - 1);
 }
 
 static __inline u_int
 g_eli_str2aalgo(const char *name)
 {
 
 	if (strcasecmp("hmac/md5", name) == 0)
 		return (CRYPTO_MD5_HMAC);
 	else if (strcasecmp("hmac/sha1", name) == 0)
 		return (CRYPTO_SHA1_HMAC);
 	else if (strcasecmp("hmac/ripemd160", name) == 0)
 		return (CRYPTO_RIPEMD160_HMAC);
 	else if (strcasecmp("hmac/sha256", name) == 0)
 		return (CRYPTO_SHA2_256_HMAC);
 	else if (strcasecmp("hmac/sha384", name) == 0)
 		return (CRYPTO_SHA2_384_HMAC);
 	else if (strcasecmp("hmac/sha512", name) == 0)
 		return (CRYPTO_SHA2_512_HMAC);
 	return (CRYPTO_ALGORITHM_MIN - 1);
 }
 
 static __inline const char *
 g_eli_algo2str(u_int algo)
 {
 
 	switch (algo) {
 	case CRYPTO_NULL_CBC:
 		return ("NULL");
 	case CRYPTO_AES_CBC:
 		return ("AES-CBC");
 	case CRYPTO_AES_XTS:
 		return ("AES-XTS");
 	case CRYPTO_BLF_CBC:
 		return ("Blowfish-CBC");
 	case CRYPTO_CAMELLIA_CBC:
 		return ("CAMELLIA-CBC");
 	case CRYPTO_3DES_CBC:
 		return ("3DES-CBC");
 	case CRYPTO_MD5_HMAC:
 		return ("HMAC/MD5");
 	case CRYPTO_SHA1_HMAC:
 		return ("HMAC/SHA1");
 	case CRYPTO_RIPEMD160_HMAC:
 		return ("HMAC/RIPEMD160");
 	case CRYPTO_SHA2_256_HMAC:
 		return ("HMAC/SHA256");
 	case CRYPTO_SHA2_384_HMAC:
 		return ("HMAC/SHA384");
 	case CRYPTO_SHA2_512_HMAC:
 		return ("HMAC/SHA512");
 	}
 	return ("unknown");
 }
 
 static __inline void
 eli_metadata_dump(const struct g_eli_metadata *md)
 {
 	static const char hex[] = "0123456789abcdef";
 	char str[sizeof(md->md_mkeys) * 2 + 1];
 	u_int i;
 
 	printf("     magic: %s\n", md->md_magic);
 	printf("   version: %u\n", (u_int)md->md_version);
 	printf("     flags: 0x%x\n", (u_int)md->md_flags);
 	printf("     ealgo: %s\n", g_eli_algo2str(md->md_ealgo));
 	printf("    keylen: %u\n", (u_int)md->md_keylen);
 	if (md->md_flags & G_ELI_FLAG_AUTH)
 		printf("     aalgo: %s\n", g_eli_algo2str(md->md_aalgo));
 	printf("  provsize: %ju\n", (uintmax_t)md->md_provsize);
 	printf("sectorsize: %u\n", (u_int)md->md_sectorsize);
 	printf("      keys: 0x%02x\n", (u_int)md->md_keys);
 	printf("iterations: %u\n", (u_int)md->md_iterations);
 	bzero(str, sizeof(str));
 	for (i = 0; i < sizeof(md->md_salt); i++) {
 		str[i * 2] = hex[md->md_salt[i] >> 4];
 		str[i * 2 + 1] = hex[md->md_salt[i] & 0x0f];
 	}
 	printf("      Salt: %s\n", str);
 	bzero(str, sizeof(str));
 	for (i = 0; i < sizeof(md->md_mkeys); i++) {
 		str[i * 2] = hex[md->md_mkeys[i] >> 4];
 		str[i * 2 + 1] = hex[md->md_mkeys[i] & 0x0f];
 	}
 	printf("Master Key: %s\n", str);
 	bzero(str, sizeof(str));
 	for (i = 0; i < 16; i++) {
 		str[i * 2] = hex[md->md_hash[i] >> 4];
 		str[i * 2 + 1] = hex[md->md_hash[i] & 0x0f];
 	}
 	printf("  MD5 hash: %s\n", str);
 }
 
 static __inline u_int
 g_eli_keylen(u_int algo, u_int keylen)
 {
 
 	switch (algo) {
 	case CRYPTO_NULL_CBC:
 		if (keylen == 0)
 			keylen = 64 * 8;
 		else {
 			if (keylen > 64 * 8)
 				keylen = 0;
 		}
 		return (keylen);
 	case CRYPTO_AES_CBC:
 	case CRYPTO_CAMELLIA_CBC:
 		switch (keylen) {
 		case 0:
 			return (128);
 		case 128:
 		case 192:
 		case 256:
 			return (keylen);
 		default:
 			return (0);
 		}
 	case CRYPTO_AES_XTS:
 		switch (keylen) {
 		case 0:
 			return (128);
 		case 128:
 		case 256:
 			return (keylen);
 		default:
 			return (0);
 		}
 	case CRYPTO_BLF_CBC:
 		if (keylen == 0)
 			return (128);
 		if (keylen < 128 || keylen > 448)
 			return (0);
 		if ((keylen % 32) != 0)
 			return (0);
 		return (keylen);
 	case CRYPTO_3DES_CBC:
 		if (keylen == 0 || keylen == 192)
 			return (192);
 		return (0);
 	default:
 		return (0);
 	}
 }
 
 static __inline u_int
 g_eli_hashlen(u_int algo)
 {
 
 	switch (algo) {
 	case CRYPTO_MD5_HMAC:
 		return (16);
 	case CRYPTO_SHA1_HMAC:
 		return (20);
 	case CRYPTO_RIPEMD160_HMAC:
 		return (20);
 	case CRYPTO_SHA2_256_HMAC:
 		return (32);
 	case CRYPTO_SHA2_384_HMAC:
 		return (48);
 	case CRYPTO_SHA2_512_HMAC:
 		return (64);
 	}
 	return (0);
 }
 
+static __inline void
+eli_metadata_softc(struct g_eli_softc *sc, const struct g_eli_metadata *md,
+    u_int sectorsize, off_t mediasize)
+{
+
+	sc->sc_version = md->md_version;
+	sc->sc_inflight = 0;
+	sc->sc_crypto = G_ELI_CRYPTO_UNKNOWN;
+	sc->sc_flags = md->md_flags;
+	/* Backward compatibility. */
+	if (md->md_version < G_ELI_VERSION_04)
+		sc->sc_flags |= G_ELI_FLAG_NATIVE_BYTE_ORDER;
+	if (md->md_version < G_ELI_VERSION_05)
+		sc->sc_flags |= G_ELI_FLAG_SINGLE_KEY;
+	if (md->md_version < G_ELI_VERSION_06 &&
+	    (sc->sc_flags & G_ELI_FLAG_AUTH) != 0) {
+		sc->sc_flags |= G_ELI_FLAG_FIRST_KEY;
+	}
+	if (md->md_version < G_ELI_VERSION_07)
+		sc->sc_flags |= G_ELI_FLAG_ENC_IVKEY;
+	sc->sc_ealgo = md->md_ealgo;
+
+	if (sc->sc_flags & G_ELI_FLAG_AUTH) {
+		sc->sc_akeylen = sizeof(sc->sc_akey) * 8;
+		sc->sc_aalgo = md->md_aalgo;
+		sc->sc_alen = g_eli_hashlen(sc->sc_aalgo);
+
+		sc->sc_data_per_sector = sectorsize - sc->sc_alen;
+		/*
+		 * Some hash functions (like SHA1 and RIPEMD160) generates hash
+		 * which length is not multiple of 128 bits, but we want data
+		 * length to be multiple of 128, so we can encrypt without
+		 * padding. The line below rounds down data length to multiple
+		 * of 128 bits.
+		 */
+		sc->sc_data_per_sector -= sc->sc_data_per_sector % 16;
+
+		sc->sc_bytes_per_sector =
+		    (md->md_sectorsize - 1) / sc->sc_data_per_sector + 1;
+		sc->sc_bytes_per_sector *= sectorsize;
+	}
+	sc->sc_sectorsize = md->md_sectorsize;
+	sc->sc_mediasize = mediasize;
+	if (!(sc->sc_flags & G_ELI_FLAG_ONETIME))
+		sc->sc_mediasize -= sectorsize;
+	if (!(sc->sc_flags & G_ELI_FLAG_AUTH))
+		sc->sc_mediasize -= (sc->sc_mediasize % sc->sc_sectorsize);
+	else {
+		sc->sc_mediasize /= sc->sc_bytes_per_sector;
+		sc->sc_mediasize *= sc->sc_sectorsize;
+	}
+	sc->sc_ekeylen = md->md_keylen;
+}
+
 #ifdef _KERNEL
 int g_eli_read_metadata(struct g_class *mp, struct g_provider *pp,
     struct g_eli_metadata *md);
 struct g_geom *g_eli_create(struct gctl_req *req, struct g_class *mp,
     struct g_provider *bpp, const struct g_eli_metadata *md,
     const u_char *mkey, int nkey);
 int g_eli_destroy(struct g_eli_softc *sc, boolean_t force);
 
 int g_eli_access(struct g_provider *pp, int dr, int dw, int de);
 void g_eli_config(struct gctl_req *req, struct g_class *mp, const char *verb);
 
 void g_eli_read_done(struct bio *bp);
 void g_eli_write_done(struct bio *bp);
 int g_eli_crypto_rerun(struct cryptop *crp);
-void g_eli_crypto_ivgen(struct g_eli_softc *sc, off_t offset, u_char *iv,
-    size_t size);
 
 void g_eli_crypto_read(struct g_eli_softc *sc, struct bio *bp, boolean_t fromworker);
 void g_eli_crypto_run(struct g_eli_worker *wr, struct bio *bp);
 
 void g_eli_auth_read(struct g_eli_softc *sc, struct bio *bp);
 void g_eli_auth_run(struct g_eli_worker *wr, struct bio *bp);
 #endif
+void g_eli_crypto_ivgen(struct g_eli_softc *sc, off_t offset, u_char *iv,
+    size_t size);
 
 void g_eli_mkey_hmac(unsigned char *mkey, const unsigned char *key);
 int g_eli_mkey_decrypt(const struct g_eli_metadata *md,
     const unsigned char *key, unsigned char *mkey, unsigned *nkeyp);
 int g_eli_mkey_encrypt(unsigned algo, const unsigned char *key, unsigned keylen,
     unsigned char *mkey);
 #ifdef _KERNEL
 void g_eli_mkey_propagate(struct g_eli_softc *sc, const unsigned char *mkey);
 #endif
 
 int g_eli_crypto_encrypt(u_int algo, u_char *data, size_t datasize,
     const u_char *key, size_t keysize);
 int g_eli_crypto_decrypt(u_int algo, u_char *data, size_t datasize,
     const u_char *key, size_t keysize);
 
 struct hmac_ctx {
 	SHA512_CTX	shactx;
 	u_char		k_opad[128];
 };
 
 void g_eli_crypto_hmac_init(struct hmac_ctx *ctx, const uint8_t *hkey,
     size_t hkeylen);
 void g_eli_crypto_hmac_update(struct hmac_ctx *ctx, const uint8_t *data,
     size_t datasize);
 void g_eli_crypto_hmac_final(struct hmac_ctx *ctx, uint8_t *md, size_t mdsize);
 void g_eli_crypto_hmac(const uint8_t *hkey, size_t hkeysize,
     const uint8_t *data, size_t datasize, uint8_t *md, size_t mdsize);
 
+void g_eli_key_fill(struct g_eli_softc *sc, struct g_eli_key *key,
+    uint64_t keyno);
 #ifdef _KERNEL
 void g_eli_key_init(struct g_eli_softc *sc);
 void g_eli_key_destroy(struct g_eli_softc *sc);
 uint8_t *g_eli_key_hold(struct g_eli_softc *sc, off_t offset, size_t blocksize);
 void g_eli_key_drop(struct g_eli_softc *sc, uint8_t *rawkey);
 #endif
 #endif	/* !_G_ELI_H_ */
Index: projects/release-pkg/sys/geom/eli/g_eli_crypto.c
===================================================================
--- projects/release-pkg/sys/geom/eli/g_eli_crypto.c	(revision 293335)
+++ projects/release-pkg/sys/geom/eli/g_eli_crypto.c	(revision 293336)
@@ -1,295 +1,223 @@
 /*-
  * Copyright (c) 2005-2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #ifdef _KERNEL
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #else
 #include <stdint.h>
 #include <string.h>
 #include <strings.h>
 #include <errno.h>
 #include <assert.h>
 #include <openssl/evp.h>
 #define	_OpenSSL_
 #endif
 #include <geom/eli/g_eli.h>
 
 #ifdef _KERNEL
 MALLOC_DECLARE(M_ELI);
 
 static int
 g_eli_crypto_done(struct cryptop *crp)
 {
 
 	crp->crp_opaque = (void *)crp;
 	wakeup(crp);
 	return (0);
 }
 
 static int
 g_eli_crypto_cipher(u_int algo, int enc, u_char *data, size_t datasize,
     const u_char *key, size_t keysize)
 {
 	struct cryptoini cri;
 	struct cryptop *crp;
 	struct cryptodesc *crd;
 	uint64_t sid;
 	u_char *p;
 	int error;
 
 	KASSERT(algo != CRYPTO_AES_XTS,
 	    ("%s: CRYPTO_AES_XTS unexpected here", __func__));
 
 	bzero(&cri, sizeof(cri));
 	cri.cri_alg = algo;
 	cri.cri_key = __DECONST(void *, key);
 	cri.cri_klen = keysize;
 	error = crypto_newsession(&sid, &cri, CRYPTOCAP_F_SOFTWARE);
 	if (error != 0)
 		return (error);
 	p = malloc(sizeof(*crp) + sizeof(*crd), M_ELI, M_NOWAIT | M_ZERO);
 	if (p == NULL) {
 		crypto_freesession(sid);
 		return (ENOMEM);
 	}
 	crp = (struct cryptop *)p;	p += sizeof(*crp);
 	crd = (struct cryptodesc *)p;	p += sizeof(*crd);
 
 	crd->crd_skip = 0;
 	crd->crd_len = datasize;
 	crd->crd_flags = CRD_F_IV_EXPLICIT | CRD_F_IV_PRESENT;
 	if (enc)
 		crd->crd_flags |= CRD_F_ENCRYPT;
 	crd->crd_alg = algo;
 	crd->crd_key = __DECONST(void *, key);
 	crd->crd_klen = keysize;
 	bzero(crd->crd_iv, sizeof(crd->crd_iv));
 	crd->crd_next = NULL;
 
 	crp->crp_sid = sid;
 	crp->crp_ilen = datasize;
 	crp->crp_olen = datasize;
 	crp->crp_opaque = NULL;
 	crp->crp_callback = g_eli_crypto_done;
 	crp->crp_buf = (void *)data;
 	crp->crp_flags = CRYPTO_F_CBIFSYNC;
 	crp->crp_desc = crd;
 
 	error = crypto_dispatch(crp);
 	if (error == 0) {
 		while (crp->crp_opaque == NULL)
 			tsleep(crp, PRIBIO, "geli", hz / 5);
 		error = crp->crp_etype;
 	}
 
 	free(crp, M_ELI);
 	crypto_freesession(sid);
 	return (error);
 }
 #else	/* !_KERNEL */
 static int
 g_eli_crypto_cipher(u_int algo, int enc, u_char *data, size_t datasize,
     const u_char *key, size_t keysize)
 {
 	EVP_CIPHER_CTX ctx;
 	const EVP_CIPHER *type;
 	u_char iv[keysize];
 	int outsize;
 
 	assert(algo != CRYPTO_AES_XTS);
 
 	switch (algo) {
 	case CRYPTO_NULL_CBC:
 		type = EVP_enc_null();
 		break;
 	case CRYPTO_AES_CBC:
 		switch (keysize) {
 		case 128:
 			type = EVP_aes_128_cbc();
 			break;
 		case 192:
 			type = EVP_aes_192_cbc();
 			break;
 		case 256:
 			type = EVP_aes_256_cbc();
 			break;
 		default:
 			return (EINVAL);
 		}
 		break;
 	case CRYPTO_BLF_CBC:
 		type = EVP_bf_cbc();
 		break;
 #ifndef OPENSSL_NO_CAMELLIA
 	case CRYPTO_CAMELLIA_CBC:
 		switch (keysize) {
 		case 128:
 			type = EVP_camellia_128_cbc();
 			break;
 		case 192:
 			type = EVP_camellia_192_cbc();
 			break;
 		case 256:
 			type = EVP_camellia_256_cbc();
 			break;
 		default:
 			return (EINVAL);
 		}
 		break;
 #endif
 	case CRYPTO_3DES_CBC:
 		type = EVP_des_ede3_cbc();
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	EVP_CIPHER_CTX_init(&ctx);
 
 	EVP_CipherInit_ex(&ctx, type, NULL, NULL, NULL, enc);
 	EVP_CIPHER_CTX_set_key_length(&ctx, keysize / 8);
 	EVP_CIPHER_CTX_set_padding(&ctx, 0);
 	bzero(iv, sizeof(iv));
 	EVP_CipherInit_ex(&ctx, NULL, NULL, key, iv, enc);
 
 	if (EVP_CipherUpdate(&ctx, data, &outsize, data, datasize) == 0) {
 		EVP_CIPHER_CTX_cleanup(&ctx);
 		return (EINVAL);
 	}
 	assert(outsize == (int)datasize);
 
 	if (EVP_CipherFinal_ex(&ctx, data + outsize, &outsize) == 0) {
 		EVP_CIPHER_CTX_cleanup(&ctx);
 		return (EINVAL);
 	}
 	assert(outsize == 0);
 
 	EVP_CIPHER_CTX_cleanup(&ctx);
 	return (0);
 }
 #endif	/* !_KERNEL */
 
 int
 g_eli_crypto_encrypt(u_int algo, u_char *data, size_t datasize,
     const u_char *key, size_t keysize)
 {
 
 	/* We prefer AES-CBC for metadata protection. */
 	if (algo == CRYPTO_AES_XTS)
 		algo = CRYPTO_AES_CBC;
 
 	return (g_eli_crypto_cipher(algo, 1, data, datasize, key, keysize));
 }
 
 int
 g_eli_crypto_decrypt(u_int algo, u_char *data, size_t datasize,
     const u_char *key, size_t keysize)
 {
 
 	/* We prefer AES-CBC for metadata protection. */
 	if (algo == CRYPTO_AES_XTS)
 		algo = CRYPTO_AES_CBC;
 
 	return (g_eli_crypto_cipher(algo, 0, data, datasize, key, keysize));
 }
-
-void
-g_eli_crypto_hmac_init(struct hmac_ctx *ctx, const uint8_t *hkey,
-    size_t hkeylen)
-{
-	u_char k_ipad[128], key[128];
-	SHA512_CTX lctx;
-	u_int i;
-
-	bzero(key, sizeof(key));
-	if (hkeylen == 0)
-		; /* do nothing */
-	else if (hkeylen <= 128)
-		bcopy(hkey, key, hkeylen);
-	else {
-		/* If key is longer than 128 bytes reset it to key = SHA512(key). */
-		SHA512_Init(&lctx);
-		SHA512_Update(&lctx, hkey, hkeylen);
-		SHA512_Final(key, &lctx);
-	}
-
-	/* XOR key with ipad and opad values. */
-	for (i = 0; i < sizeof(key); i++) {
-		k_ipad[i] = key[i] ^ 0x36;
-		ctx->k_opad[i] = key[i] ^ 0x5c;
-	}
-	bzero(key, sizeof(key));
-	/* Perform inner SHA512. */
-	SHA512_Init(&ctx->shactx);
-	SHA512_Update(&ctx->shactx, k_ipad, sizeof(k_ipad));
-	bzero(k_ipad, sizeof(k_ipad));
-}
-
-void
-g_eli_crypto_hmac_update(struct hmac_ctx *ctx, const uint8_t *data,
-    size_t datasize)
-{
-
-	SHA512_Update(&ctx->shactx, data, datasize);
-}
-
-void
-g_eli_crypto_hmac_final(struct hmac_ctx *ctx, uint8_t *md, size_t mdsize)
-{
-	u_char digest[SHA512_MDLEN];
-	SHA512_CTX lctx;
-
-	SHA512_Final(digest, &ctx->shactx);
-	/* Perform outer SHA512. */
-	SHA512_Init(&lctx);
-	SHA512_Update(&lctx, ctx->k_opad, sizeof(ctx->k_opad));
-	bzero(ctx, sizeof(*ctx));
-	SHA512_Update(&lctx, digest, sizeof(digest));
-	SHA512_Final(digest, &lctx);
-	bzero(&lctx, sizeof(lctx));
-	/* mdsize == 0 means "Give me the whole hash!" */
-	if (mdsize == 0)
-		mdsize = SHA512_MDLEN;
-	bcopy(digest, md, mdsize);
-	bzero(digest, sizeof(digest));
-}
-
-void
-g_eli_crypto_hmac(const uint8_t *hkey, size_t hkeysize, const uint8_t *data,
-    size_t datasize, uint8_t *md, size_t mdsize)
-{
-	struct hmac_ctx ctx;
-
-	g_eli_crypto_hmac_init(&ctx, hkey, hkeysize);
-	g_eli_crypto_hmac_update(&ctx, data, datasize);
-	g_eli_crypto_hmac_final(&ctx, md, mdsize);
-}
Index: projects/release-pkg/sys/geom/eli/g_eli_hmac.c
===================================================================
--- projects/release-pkg/sys/geom/eli/g_eli_hmac.c	(nonexistent)
+++ projects/release-pkg/sys/geom/eli/g_eli_hmac.c	(revision 293336)
@@ -0,0 +1,150 @@
+/*-
+ * Copyright (c) 2005-2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#ifdef _KERNEL
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#else
+#include <stdint.h>
+#include <string.h>
+#include <strings.h>
+#include <errno.h>
+#include <assert.h>
+#include <openssl/evp.h>
+#define	_OpenSSL_
+#endif
+#include <geom/eli/g_eli.h>
+
+void
+g_eli_crypto_hmac_init(struct hmac_ctx *ctx, const uint8_t *hkey,
+    size_t hkeylen)
+{
+	u_char k_ipad[128], key[128];
+	SHA512_CTX lctx;
+	u_int i;
+
+	bzero(key, sizeof(key));
+	if (hkeylen == 0)
+		; /* do nothing */
+	else if (hkeylen <= 128)
+		bcopy(hkey, key, hkeylen);
+	else {
+		/* If key is longer than 128 bytes reset it to key = SHA512(key). */
+		SHA512_Init(&lctx);
+		SHA512_Update(&lctx, hkey, hkeylen);
+		SHA512_Final(key, &lctx);
+	}
+
+	/* XOR key with ipad and opad values. */
+	for (i = 0; i < sizeof(key); i++) {
+		k_ipad[i] = key[i] ^ 0x36;
+		ctx->k_opad[i] = key[i] ^ 0x5c;
+	}
+	bzero(key, sizeof(key));
+	/* Perform inner SHA512. */
+	SHA512_Init(&ctx->shactx);
+	SHA512_Update(&ctx->shactx, k_ipad, sizeof(k_ipad));
+	bzero(k_ipad, sizeof(k_ipad));
+}
+
+void
+g_eli_crypto_hmac_update(struct hmac_ctx *ctx, const uint8_t *data,
+    size_t datasize)
+{
+
+	SHA512_Update(&ctx->shactx, data, datasize);
+}
+
+void
+g_eli_crypto_hmac_final(struct hmac_ctx *ctx, uint8_t *md, size_t mdsize)
+{
+	u_char digest[SHA512_MDLEN];
+	SHA512_CTX lctx;
+
+	SHA512_Final(digest, &ctx->shactx);
+	/* Perform outer SHA512. */
+	SHA512_Init(&lctx);
+	SHA512_Update(&lctx, ctx->k_opad, sizeof(ctx->k_opad));
+	bzero(ctx, sizeof(*ctx));
+	SHA512_Update(&lctx, digest, sizeof(digest));
+	SHA512_Final(digest, &lctx);
+	bzero(&lctx, sizeof(lctx));
+	/* mdsize == 0 means "Give me the whole hash!" */
+	if (mdsize == 0)
+		mdsize = SHA512_MDLEN;
+	bcopy(digest, md, mdsize);
+	bzero(digest, sizeof(digest));
+}
+
+void
+g_eli_crypto_hmac(const uint8_t *hkey, size_t hkeysize, const uint8_t *data,
+    size_t datasize, uint8_t *md, size_t mdsize)
+{
+	struct hmac_ctx ctx;
+
+	g_eli_crypto_hmac_init(&ctx, hkey, hkeysize);
+	g_eli_crypto_hmac_update(&ctx, data, datasize);
+	g_eli_crypto_hmac_final(&ctx, md, mdsize);
+}
+
+/*
+ * Here we generate IV. It is unique for every sector.
+ */
+void
+g_eli_crypto_ivgen(struct g_eli_softc *sc, off_t offset, u_char *iv,
+    size_t size)
+{
+	uint8_t off[8];
+
+	if ((sc->sc_flags & G_ELI_FLAG_NATIVE_BYTE_ORDER) != 0)
+		bcopy(&offset, off, sizeof(off));
+	else
+		le64enc(off, (uint64_t)offset);
+
+	switch (sc->sc_ealgo) {
+	case CRYPTO_AES_XTS:
+		bcopy(off, iv, sizeof(off));
+		bzero(iv + sizeof(off), size - sizeof(off));
+		break;
+	default:
+	    {
+		u_char hash[SHA256_DIGEST_LENGTH];
+		SHA256_CTX ctx;
+
+		/* Copy precalculated SHA256 context for IV-Key. */
+		bcopy(&sc->sc_ivctx, &ctx, sizeof(ctx));
+		SHA256_Update(&ctx, off, sizeof(off));
+		SHA256_Final(hash, &ctx);
+		bcopy(hash, iv, MIN(sizeof(hash), size));
+		break;
+	    }
+	}
+}

Property changes on: projects/release-pkg/sys/geom/eli/g_eli_hmac.c
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Index: projects/release-pkg/sys/geom/eli/g_eli_key_cache.c
===================================================================
--- projects/release-pkg/sys/geom/eli/g_eli_key_cache.c	(revision 293335)
+++ projects/release-pkg/sys/geom/eli/g_eli_key_cache.c	(revision 293336)
@@ -1,352 +1,342 @@
 /*-
  * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
+#ifdef _KERNEL
 #include <sys/kernel.h>
 #include <sys/malloc.h>
-#include <sys/queue.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
+#endif /* _KERNEL */
+#include <sys/queue.h>
 #include <sys/tree.h>
 
 #include <geom/geom.h>
 
 #include <geom/eli/g_eli.h>
 
+#ifdef _KERNEL
 MALLOC_DECLARE(M_ELI);
 
 SYSCTL_DECL(_kern_geom_eli);
 /*
  * The default limit (8192 keys) will allow to cache all keys for 4TB
  * provider with 512 bytes sectors and will take around 1MB of memory.
  */
 static u_int g_eli_key_cache_limit = 8192;
 SYSCTL_UINT(_kern_geom_eli, OID_AUTO, key_cache_limit, CTLFLAG_RDTUN,
     &g_eli_key_cache_limit, 0, "Maximum number of encryption keys to cache");
 static uint64_t g_eli_key_cache_hits;
 SYSCTL_UQUAD(_kern_geom_eli, OID_AUTO, key_cache_hits, CTLFLAG_RW,
     &g_eli_key_cache_hits, 0, "Key cache hits");
 static uint64_t g_eli_key_cache_misses;
 SYSCTL_UQUAD(_kern_geom_eli, OID_AUTO, key_cache_misses, CTLFLAG_RW,
     &g_eli_key_cache_misses, 0, "Key cache misses");
 
-#define	G_ELI_KEY_MAGIC	0xe11341c
+#endif /* _KERNEL */
 
-struct g_eli_key {
-	/* Key value, must be first in the structure. */
-	uint8_t		gek_key[G_ELI_DATAKEYLEN];
-	/* Magic. */
-	int		gek_magic;
-	/* Key number. */
-	uint64_t	gek_keyno;
-	/* Reference counter. */
-	int		gek_count;
-	/* Keeps keys sorted by most recent use. */
-	TAILQ_ENTRY(g_eli_key) gek_next;
-	/* Keeps keys sorted by number. */
-	RB_ENTRY(g_eli_key) gek_link;
-};
-
 static int
 g_eli_key_cmp(const struct g_eli_key *a, const struct g_eli_key *b)
 {
 
 	if (a->gek_keyno > b->gek_keyno)
 		return (1);
 	else if (a->gek_keyno < b->gek_keyno)
 		return (-1);
 	return (0);
 }
 
-RB_PROTOTYPE(g_eli_key_tree, g_eli_key, gek_link, g_eli_key_cmp);
-RB_GENERATE(g_eli_key_tree, g_eli_key, gek_link, g_eli_key_cmp);
-
-static void
+void
 g_eli_key_fill(struct g_eli_softc *sc, struct g_eli_key *key, uint64_t keyno)
 {
 	const uint8_t *ekey;
 	struct {
 		char magic[4];
 		uint8_t keyno[8];
 	} __packed hmacdata;
 
 	if ((sc->sc_flags & G_ELI_FLAG_ENC_IVKEY) != 0)
 		ekey = sc->sc_mkey;
 	else
 		ekey = sc->sc_ekey;
 
 	bcopy("ekey", hmacdata.magic, 4);
 	le64enc(hmacdata.keyno, keyno);
 	g_eli_crypto_hmac(ekey, G_ELI_MAXKEYLEN, (uint8_t *)&hmacdata,
 	    sizeof(hmacdata), key->gek_key, 0);
 	key->gek_keyno = keyno;
 	key->gek_count = 0;
 	key->gek_magic = G_ELI_KEY_MAGIC;
 }
 
+#ifdef _KERNEL
+RB_PROTOTYPE(g_eli_key_tree, g_eli_key, gek_link, g_eli_key_cmp);
+RB_GENERATE(g_eli_key_tree, g_eli_key, gek_link, g_eli_key_cmp);
+
 static struct g_eli_key *
 g_eli_key_allocate(struct g_eli_softc *sc, uint64_t keyno)
 {
 	struct g_eli_key *key, *ekey, keysearch;
 
 	mtx_assert(&sc->sc_ekeys_lock, MA_OWNED);
 	mtx_unlock(&sc->sc_ekeys_lock);
 
 	key = malloc(sizeof(*key), M_ELI, M_WAITOK);
 	g_eli_key_fill(sc, key, keyno);
 
 	mtx_lock(&sc->sc_ekeys_lock);
 	/*
 	 * Recheck if the key wasn't added while we weren't holding the lock.
 	 */
 	keysearch.gek_keyno = keyno;
 	ekey = RB_FIND(g_eli_key_tree, &sc->sc_ekeys_tree, &keysearch);
 	if (ekey != NULL) {
 		bzero(key, sizeof(*key));
 		free(key, M_ELI);
 		key = ekey;
 		TAILQ_REMOVE(&sc->sc_ekeys_queue, key, gek_next);
 	} else {
 		RB_INSERT(g_eli_key_tree, &sc->sc_ekeys_tree, key);
 		sc->sc_ekeys_allocated++;
 	}
 	TAILQ_INSERT_TAIL(&sc->sc_ekeys_queue, key, gek_next);
 
 	return (key);
 }
 
 static struct g_eli_key *
 g_eli_key_find_last(struct g_eli_softc *sc)
 {
 	struct g_eli_key *key;
 
 	mtx_assert(&sc->sc_ekeys_lock, MA_OWNED);
 
 	TAILQ_FOREACH(key, &sc->sc_ekeys_queue, gek_next) {
 		if (key->gek_count == 0)
 			break;
 	}
 
 	return (key);
 }
 
 static void
 g_eli_key_replace(struct g_eli_softc *sc, struct g_eli_key *key, uint64_t keyno)
 {
 
 	mtx_assert(&sc->sc_ekeys_lock, MA_OWNED);
 	KASSERT(key->gek_magic == G_ELI_KEY_MAGIC, ("Invalid magic."));
 
 	RB_REMOVE(g_eli_key_tree, &sc->sc_ekeys_tree, key);
 	TAILQ_REMOVE(&sc->sc_ekeys_queue, key, gek_next);
 
 	KASSERT(key->gek_count == 0, ("gek_count=%d", key->gek_count));
 
 	g_eli_key_fill(sc, key, keyno);
 
 	RB_INSERT(g_eli_key_tree, &sc->sc_ekeys_tree, key);
 	TAILQ_INSERT_TAIL(&sc->sc_ekeys_queue, key, gek_next);
 }
 
 static void
 g_eli_key_remove(struct g_eli_softc *sc, struct g_eli_key *key)
 {
 
 	mtx_assert(&sc->sc_ekeys_lock, MA_OWNED);
 	KASSERT(key->gek_magic == G_ELI_KEY_MAGIC, ("Invalid magic."));
 	KASSERT(key->gek_count == 0, ("gek_count=%d", key->gek_count));
 
 	RB_REMOVE(g_eli_key_tree, &sc->sc_ekeys_tree, key);
 	TAILQ_REMOVE(&sc->sc_ekeys_queue, key, gek_next);
 	sc->sc_ekeys_allocated--;
 	bzero(key, sizeof(*key));
 	free(key, M_ELI);
 }
 
 void
 g_eli_key_init(struct g_eli_softc *sc)
 {
 	uint8_t *mkey;
 
 	mtx_lock(&sc->sc_ekeys_lock);
 
 	mkey = sc->sc_mkey + sizeof(sc->sc_ivkey);
 	if ((sc->sc_flags & G_ELI_FLAG_AUTH) == 0)
 		bcopy(mkey, sc->sc_ekey, G_ELI_DATAKEYLEN);
 	else {
 		/*
 		 * The encryption key is: ekey = HMAC_SHA512(Data-Key, 0x10)
 		 */
 		g_eli_crypto_hmac(mkey, G_ELI_MAXKEYLEN, "\x10", 1,
 		    sc->sc_ekey, 0);
 	}
 
 	if ((sc->sc_flags & G_ELI_FLAG_SINGLE_KEY) != 0) {
 		sc->sc_ekeys_total = 1;
 		sc->sc_ekeys_allocated = 0;
 	} else {
 		off_t mediasize;
 		size_t blocksize;
 
 		if ((sc->sc_flags & G_ELI_FLAG_AUTH) != 0) {
 			struct g_provider *pp;
 
 			pp = LIST_FIRST(&sc->sc_geom->consumer)->provider;
 			mediasize = pp->mediasize;
 			blocksize = pp->sectorsize;
 		} else {
 			mediasize = sc->sc_mediasize;
 			blocksize = sc->sc_sectorsize;
 		}
 		sc->sc_ekeys_total =
 		    ((mediasize - 1) >> G_ELI_KEY_SHIFT) / blocksize + 1;
 		sc->sc_ekeys_allocated = 0;
 		TAILQ_INIT(&sc->sc_ekeys_queue);
 		RB_INIT(&sc->sc_ekeys_tree);
 		if (sc->sc_ekeys_total <= g_eli_key_cache_limit) {
 			uint64_t keyno;
 
 			for (keyno = 0; keyno < sc->sc_ekeys_total; keyno++)
 				(void)g_eli_key_allocate(sc, keyno);
 			KASSERT(sc->sc_ekeys_total == sc->sc_ekeys_allocated,
 			    ("sc_ekeys_total=%ju != sc_ekeys_allocated=%ju",
 			    (uintmax_t)sc->sc_ekeys_total,
 			    (uintmax_t)sc->sc_ekeys_allocated));
 		}
 	}
 
 	mtx_unlock(&sc->sc_ekeys_lock);
 }
 
 void
 g_eli_key_destroy(struct g_eli_softc *sc)
 {
 
 	mtx_lock(&sc->sc_ekeys_lock);
 	if ((sc->sc_flags & G_ELI_FLAG_SINGLE_KEY) != 0) {
 		bzero(sc->sc_ekey, sizeof(sc->sc_ekey));
 	} else {
 		struct g_eli_key *key;
 
 		while ((key = TAILQ_FIRST(&sc->sc_ekeys_queue)) != NULL)
 			g_eli_key_remove(sc, key);
 		TAILQ_INIT(&sc->sc_ekeys_queue);
 		RB_INIT(&sc->sc_ekeys_tree);
 	}
 	mtx_unlock(&sc->sc_ekeys_lock);
 }
 
 /*
  * Select encryption key. If G_ELI_FLAG_SINGLE_KEY is present we only have one
  * key available for all the data. If the flag is not present select the key
  * based on data offset.
  */
 uint8_t *
 g_eli_key_hold(struct g_eli_softc *sc, off_t offset, size_t blocksize)
 {
 	struct g_eli_key *key, keysearch;
 	uint64_t keyno;
 
 	if ((sc->sc_flags & G_ELI_FLAG_SINGLE_KEY) != 0)
 		return (sc->sc_ekey);
 
 	/* We switch key every 2^G_ELI_KEY_SHIFT blocks. */
 	keyno = (offset >> G_ELI_KEY_SHIFT) / blocksize;
 
 	KASSERT(keyno < sc->sc_ekeys_total,
 	    ("%s: keyno=%ju >= sc_ekeys_total=%ju",
 	    __func__, (uintmax_t)keyno, (uintmax_t)sc->sc_ekeys_total));
 
 	keysearch.gek_keyno = keyno;
 
 	if (sc->sc_ekeys_total == sc->sc_ekeys_allocated) {
 		/* We have all the keys, so avoid some overhead. */
 		key = RB_FIND(g_eli_key_tree, &sc->sc_ekeys_tree, &keysearch);
 		KASSERT(key != NULL, ("No key %ju found.", (uintmax_t)keyno));
 		KASSERT(key->gek_magic == G_ELI_KEY_MAGIC,
 		    ("Invalid key magic."));
 		return (key->gek_key);
 	}
 
 	mtx_lock(&sc->sc_ekeys_lock);
 	key = RB_FIND(g_eli_key_tree, &sc->sc_ekeys_tree, &keysearch);
 	if (key != NULL) {
 		g_eli_key_cache_hits++;
 		TAILQ_REMOVE(&sc->sc_ekeys_queue, key, gek_next);
 		TAILQ_INSERT_TAIL(&sc->sc_ekeys_queue, key, gek_next);
 	} else {
 		/*
 		 * No key in cache, find the least recently unreferenced key
 		 * or allocate one if we haven't reached our limit yet.
 		 */
 		if (sc->sc_ekeys_allocated < g_eli_key_cache_limit) {
 			key = g_eli_key_allocate(sc, keyno);
 		} else {
 			g_eli_key_cache_misses++;
 			key = g_eli_key_find_last(sc);
 			if (key != NULL) {
 				g_eli_key_replace(sc, key, keyno);
 			} else {
 				/* All keys are referenced? Allocate one. */
 				key = g_eli_key_allocate(sc, keyno);
 			}
 		}
 	}
 	key->gek_count++;
 	mtx_unlock(&sc->sc_ekeys_lock);
 
 	KASSERT(key->gek_magic == G_ELI_KEY_MAGIC, ("Invalid key magic."));
 
 	return (key->gek_key);
 }
 
 void
 g_eli_key_drop(struct g_eli_softc *sc, uint8_t *rawkey)
 {
 	struct g_eli_key *key = (struct g_eli_key *)rawkey;
 
 	if ((sc->sc_flags & G_ELI_FLAG_SINGLE_KEY) != 0)
 		return;
 
 	KASSERT(key->gek_magic == G_ELI_KEY_MAGIC, ("Invalid key magic."));
 
 	if (sc->sc_ekeys_total == sc->sc_ekeys_allocated)
 		return;
 
 	mtx_lock(&sc->sc_ekeys_lock);
 	KASSERT(key->gek_count > 0, ("key->gek_count=%d", key->gek_count));
 	key->gek_count--;
 	while (sc->sc_ekeys_allocated > g_eli_key_cache_limit) {
 		key = g_eli_key_find_last(sc);
 		if (key == NULL)
 			break;
 		g_eli_key_remove(sc, key);
 	}
 	mtx_unlock(&sc->sc_ekeys_lock);
 }
+#endif /* _KERNEL */
Index: projects/release-pkg/sys/geom/eli/pkcs5v2.c
===================================================================
--- projects/release-pkg/sys/geom/eli/pkcs5v2.c	(revision 293335)
+++ projects/release-pkg/sys/geom/eli/pkcs5v2.c	(revision 293336)
@@ -1,123 +1,125 @@
 /*-
  * Copyright (c) 2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #ifdef _KERNEL
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #else
 #include <sys/resource.h>
 #include <stdint.h>
 #include <strings.h>
 #endif
 
 #include <geom/eli/g_eli.h>
 #include <geom/eli/pkcs5v2.h>
 
 static __inline void
 xor(uint8_t *dst, const uint8_t *src, size_t size)
 {
 
 	for (; size > 0; size--)
 		*dst++ ^= *src++;
 }
 
 void
 pkcs5v2_genkey(uint8_t *key, unsigned keylen, const uint8_t *salt,
     size_t saltsize, const char *passphrase, u_int iterations)
 {
 	uint8_t md[SHA512_MDLEN], saltcount[saltsize + sizeof(uint32_t)];
 	uint8_t *counter, *keyp;
 	u_int i, bsize, passlen;
 	uint32_t count;
 
 	passlen = strlen(passphrase);
 	bzero(key, keylen);
 	bcopy(salt, saltcount, saltsize);
 	counter = saltcount + saltsize;
 
 	keyp = key;
 	for (count = 1; keylen > 0; count++, keylen -= bsize, keyp += bsize) {
 		bsize = MIN(keylen, sizeof(md));
 
 		counter[0] = (count >> 24) & 0xff;
 		counter[1] = (count >> 16) & 0xff;
 		counter[2] = (count >> 8) & 0xff;
 		counter[3] = count & 0xff;
 		g_eli_crypto_hmac(passphrase, passlen, saltcount,
 		    sizeof(saltcount), md, 0);
 		xor(keyp, md, bsize);
 
 		for(i = 1; i < iterations; i++) {
 			g_eli_crypto_hmac(passphrase, passlen, md, sizeof(md),
 			    md, 0);
 			xor(keyp, md, bsize);
 		}
 	}
 }
 
 #ifndef _KERNEL
+#ifndef _STAND
 /*
  * Return the number of microseconds needed for 'interations' iterations.
  */
 static int
 pkcs5v2_probe(int iterations)
 {
 	uint8_t	key[G_ELI_USERKEYLEN], salt[G_ELI_SALTLEN];
 	uint8_t passphrase[] = "passphrase";
 	struct rusage start, end;
 	int usecs;
 
 	getrusage(RUSAGE_SELF, &start);
 	pkcs5v2_genkey(key, sizeof(key), salt, sizeof(salt), passphrase,
 	    iterations);
 	getrusage(RUSAGE_SELF, &end);
 
 	usecs = end.ru_utime.tv_sec - start.ru_utime.tv_sec;
 	usecs *= 1000000;
 	usecs += end.ru_utime.tv_usec - start.ru_utime.tv_usec;
 	return (usecs);
 }
 
 /*
  * Return the number of iterations which takes 'usecs' microseconds.
  */
 int
 pkcs5v2_calculate(int usecs)
 {
 	int iterations, v;
 
 	for (iterations = 1; ; iterations <<= 1) {
 		v = pkcs5v2_probe(iterations);
 		if (v > 2000000)
 			break;
 	}
 	return (((intmax_t)iterations * (intmax_t)usecs) / v);
 }
+#endif	/* !_STAND */
 #endif	/* !_KERNEL */
Index: projects/release-pkg/sys/kern/vfs_cache.c
===================================================================
--- projects/release-pkg/sys/kern/vfs_cache.c	(revision 293335)
+++ projects/release-pkg/sys/kern/vfs_cache.c	(revision 293336)
@@ -1,1518 +1,1497 @@
 /*-
  * Copyright (c) 1989, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Poul-Henning Kamp of the FreeBSD Project.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_cache.c	8.5 (Berkeley) 3/22/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/filedesc.h>
 #include <sys/fnv_hash.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/fcntl.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sdt.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/vnode.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 #include <vm/uma.h>
 
 SDT_PROVIDER_DECLARE(vfs);
 SDT_PROBE_DEFINE3(vfs, namecache, enter, done, "struct vnode *", "char *",
     "struct vnode *");
 SDT_PROBE_DEFINE2(vfs, namecache, enter_negative, done, "struct vnode *",
     "char *");
 SDT_PROBE_DEFINE1(vfs, namecache, fullpath, entry, "struct vnode *");
 SDT_PROBE_DEFINE3(vfs, namecache, fullpath, hit, "struct vnode *",
     "char *", "struct vnode *");
 SDT_PROBE_DEFINE1(vfs, namecache, fullpath, miss, "struct vnode *");
 SDT_PROBE_DEFINE3(vfs, namecache, fullpath, return, "int",
     "struct vnode *", "char *");
 SDT_PROBE_DEFINE3(vfs, namecache, lookup, hit, "struct vnode *", "char *",
     "struct vnode *");
 SDT_PROBE_DEFINE2(vfs, namecache, lookup, hit__negative,
     "struct vnode *", "char *");
 SDT_PROBE_DEFINE2(vfs, namecache, lookup, miss, "struct vnode *",
     "char *");
 SDT_PROBE_DEFINE1(vfs, namecache, purge, done, "struct vnode *");
 SDT_PROBE_DEFINE1(vfs, namecache, purge_negative, done, "struct vnode *");
 SDT_PROBE_DEFINE1(vfs, namecache, purgevfs, done, "struct mount *");
 SDT_PROBE_DEFINE3(vfs, namecache, zap, done, "struct vnode *", "char *",
     "struct vnode *");
 SDT_PROBE_DEFINE2(vfs, namecache, zap_negative, done, "struct vnode *",
     "char *");
 
 /*
  * This structure describes the elements in the cache of recent
  * names looked up by namei.
  */
 
 struct	namecache {
 	LIST_ENTRY(namecache) nc_hash;	/* hash chain */
 	LIST_ENTRY(namecache) nc_src;	/* source vnode list */
 	TAILQ_ENTRY(namecache) nc_dst;	/* destination vnode list */
 	struct	vnode *nc_dvp;		/* vnode of parent of name */
 	struct	vnode *nc_vp;		/* vnode the name refers to */
 	u_char	nc_flag;		/* flag bits */
 	u_char	nc_nlen;		/* length of name */
 	char	nc_name[0];		/* segment name + nul */
 };
 
 /*
  * struct namecache_ts repeats struct namecache layout up to the
  * nc_nlen member.
  * struct namecache_ts is used in place of struct namecache when time(s) need
  * to be stored.  The nc_dotdottime field is used when a cache entry is mapping
  * both a non-dotdot directory name plus dotdot for the directory's
  * parent.
  */
 struct	namecache_ts {
 	LIST_ENTRY(namecache) nc_hash;	/* hash chain */
 	LIST_ENTRY(namecache) nc_src;	/* source vnode list */
 	TAILQ_ENTRY(namecache) nc_dst;	/* destination vnode list */
 	struct	vnode *nc_dvp;		/* vnode of parent of name */
 	struct	vnode *nc_vp;		/* vnode the name refers to */
 	u_char	nc_flag;		/* flag bits */
 	u_char	nc_nlen;		/* length of name */
 	struct	timespec nc_time;	/* timespec provided by fs */
 	struct	timespec nc_dotdottime;	/* dotdot timespec provided by fs */
 	int	nc_ticks;		/* ticks value when entry was added */
 	char	nc_name[0];		/* segment name + nul */
 };
 
 /*
  * Flags in namecache.nc_flag
  */
 #define NCF_WHITE	0x01
 #define NCF_ISDOTDOT	0x02
 #define	NCF_TS		0x04
 #define	NCF_DTS		0x08
 
 /*
  * Name caching works as follows:
  *
  * Names found by directory scans are retained in a cache
  * for future reference.  It is managed LRU, so frequently
  * used names will hang around.  Cache is indexed by hash value
  * obtained from (vp, name) where vp refers to the directory
  * containing name.
  *
  * If it is a "negative" entry, (i.e. for a name that is known NOT to
  * exist) the vnode pointer will be NULL.
  *
  * Upon reaching the last segment of a path, if the reference
  * is for DELETE, or NOCACHE is set (rewrite), and the
  * name is located in the cache, it will be dropped.
  */
 
 /*
  * Structures associated with name cacheing.
  */
 #define NCHHASH(hash) \
 	(&nchashtbl[(hash) & nchash])
 static LIST_HEAD(nchashhead, namecache) *nchashtbl;	/* Hash Table */
 static TAILQ_HEAD(, namecache) ncneg;	/* Hash Table */
 static u_long	nchash;			/* size of hash table */
 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0,
     "Size of namecache hash table");
 static u_long	ncnegfactor = 16;	/* ratio of negative entries */
 SYSCTL_ULONG(_vfs, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0,
     "Ratio of negative namecache entries");
 static u_long	numneg;			/* number of negative entries allocated */
 SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0,
     "Number of negative entries in namecache");
 static u_long	numcache;		/* number of cache entries allocated */
 SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0,
     "Number of namecache entries");
 static u_long	numcachehv;		/* number of cache entries with vnodes held */
 SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0,
     "Number of namecache entries with vnodes held");
 static u_int	ncsizefactor = 2;
 SYSCTL_UINT(_vfs, OID_AUTO, ncsizefactor, CTLFLAG_RW, &ncsizefactor, 0,
     "Size factor for namecache");
 
 struct nchstats	nchstats;		/* cache effectiveness statistics */
 
 static struct rwlock cache_lock;
 RW_SYSINIT(vfscache, &cache_lock, "Name Cache");
 
 #define	CACHE_UPGRADE_LOCK()	rw_try_upgrade(&cache_lock)
 #define	CACHE_RLOCK()		rw_rlock(&cache_lock)
 #define	CACHE_RUNLOCK()		rw_runlock(&cache_lock)
 #define	CACHE_WLOCK()		rw_wlock(&cache_lock)
 #define	CACHE_WUNLOCK()		rw_wunlock(&cache_lock)
 
 /*
  * UMA zones for the VFS cache.
  *
  * The small cache is used for entries with short names, which are the
  * most common.  The large cache is used for entries which are too big to
  * fit in the small cache.
  */
 static uma_zone_t cache_zone_small;
 static uma_zone_t cache_zone_small_ts;
 static uma_zone_t cache_zone_large;
 static uma_zone_t cache_zone_large_ts;
 
 #define	CACHE_PATH_CUTOFF	35
 
 static struct namecache *
 cache_alloc(int len, int ts)
 {
 
 	if (len > CACHE_PATH_CUTOFF) {
 		if (ts)
 			return (uma_zalloc(cache_zone_large_ts, M_WAITOK));
 		else
 			return (uma_zalloc(cache_zone_large, M_WAITOK));
 	}
 	if (ts)
 		return (uma_zalloc(cache_zone_small_ts, M_WAITOK));
 	else
 		return (uma_zalloc(cache_zone_small, M_WAITOK));
 }
 
 static void
 cache_free(struct namecache *ncp)
 {
 	int ts;
 
 	if (ncp == NULL)
 		return;
 	ts = ncp->nc_flag & NCF_TS;
 	if (ncp->nc_nlen <= CACHE_PATH_CUTOFF) {
 		if (ts)
 			uma_zfree(cache_zone_small_ts, ncp);
 		else
 			uma_zfree(cache_zone_small, ncp);
 	} else if (ts)
 		uma_zfree(cache_zone_large_ts, ncp);
 	else
 		uma_zfree(cache_zone_large, ncp);
 }
 
 static char *
 nc_get_name(struct namecache *ncp)
 {
 	struct namecache_ts *ncp_ts;
 
 	if ((ncp->nc_flag & NCF_TS) == 0)
 		return (ncp->nc_name);
 	ncp_ts = (struct namecache_ts *)ncp;
 	return (ncp_ts->nc_name);
 }
 
 static void
 cache_out_ts(struct namecache *ncp, struct timespec *tsp, int *ticksp)
 {
 
 	KASSERT((ncp->nc_flag & NCF_TS) != 0 ||
 	    (tsp == NULL && ticksp == NULL),
 	    ("No NCF_TS"));
 
 	if (tsp != NULL)
 		*tsp = ((struct namecache_ts *)ncp)->nc_time;
 	if (ticksp != NULL)
 		*ticksp = ((struct namecache_ts *)ncp)->nc_ticks;
 }
 
 static int	doingcache = 1;		/* 1 => enable the cache */
 SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0,
     "VFS namecache enabled");
 
 /* Export size information to userland */
 SYSCTL_INT(_debug_sizeof, OID_AUTO, namecache, CTLFLAG_RD, SYSCTL_NULL_INT_PTR,
     sizeof(struct namecache), "sizeof(struct namecache)");
 
 /*
  * The new name cache statistics
  */
 static SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0,
     "Name cache statistics");
 #define STATNODE(mode, name, var, descr) \
 	SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, descr);
 STATNODE(CTLFLAG_RD, numneg, &numneg, "Number of negative cache entries");
 STATNODE(CTLFLAG_RD, numcache, &numcache, "Number of cache entries");
 static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls,
     "Number of cache lookups");
 static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits,
     "Number of '.' hits");
 static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits,
     "Number of '..' hits");
 static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks,
     "Number of checks in lookup");
 static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss,
     "Number of cache misses");
 static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap,
     "Number of cache misses we do not want to cache");
-static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps, 
+static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps,
     "Number of cache hits (positive) we do not want to cache");
 static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits,
     "Number of cache hits (positive)");
 static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps,
     "Number of cache hits (negative) we do not want to cache");
 static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits,
     "Number of cache hits (negative)");
 static u_long numupgrades; STATNODE(CTLFLAG_RD, numupgrades, &numupgrades,
     "Number of updates of the cache after lookup (write lock + retry)");
 
 SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD | CTLFLAG_MPSAFE,
     &nchstats, sizeof(nchstats), "LU",
     "VFS cache effectiveness statistics");
 
-
-
 static void cache_zap(struct namecache *ncp);
 static int vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf,
     u_int *buflen);
 static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
     char *buf, char **retbuf, u_int buflen);
 
 static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
 
 #ifdef DIAGNOSTIC
 /*
  * Grab an atomic snapshot of the name cache hash chain lengths
  */
 static SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL,
     "hash table stats");
 
 static int
 sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS)
 {
 	struct nchashhead *ncpp;
 	struct namecache *ncp;
 	int i, error, n_nchash, *cntbuf;
 
 retry:
 	n_nchash = nchash + 1;	/* nchash is max index, not count */
 	if (req->oldptr == NULL)
 		return SYSCTL_OUT(req, 0, n_nchash * sizeof(int));
 	cntbuf = malloc(n_nchash * sizeof(int), M_TEMP, M_ZERO | M_WAITOK);
 	CACHE_RLOCK();
 	if (n_nchash != nchash + 1) {
 		CACHE_RUNLOCK();
 		free(cntbuf, M_TEMP);
 		goto retry;
 	}
 	/* Scan hash tables counting entries */
 	for (ncpp = nchashtbl, i = 0; i < n_nchash; ncpp++, i++)
 		LIST_FOREACH(ncp, ncpp, nc_hash)
 			cntbuf[i]++;
 	CACHE_RUNLOCK();
 	for (error = 0, i = 0; i < n_nchash; i++)
 		if ((error = SYSCTL_OUT(req, &cntbuf[i], sizeof(int))) != 0)
 			break;
 	free(cntbuf, M_TEMP);
 	return (error);
 }
 SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD|
     CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_rawnchash, "S,int",
     "nchash chain lengths");
 
 static int
 sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	struct nchashhead *ncpp;
 	struct namecache *ncp;
 	int n_nchash;
 	int count, maxlength, used, pct;
 
 	if (!req->oldptr)
 		return SYSCTL_OUT(req, 0, 4 * sizeof(int));
 
 	CACHE_RLOCK();
 	n_nchash = nchash + 1;	/* nchash is max index, not count */
 	used = 0;
 	maxlength = 0;
 
 	/* Scan hash tables for applicable entries */
 	for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
 		count = 0;
 		LIST_FOREACH(ncp, ncpp, nc_hash) {
 			count++;
 		}
 		if (count)
 			used++;
 		if (maxlength < count)
 			maxlength = count;
 	}
 	n_nchash = nchash + 1;
 	CACHE_RUNLOCK();
 	pct = (used * 100) / (n_nchash / 100);
 	error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash));
 	if (error)
 		return (error);
 	error = SYSCTL_OUT(req, &used, sizeof(used));
 	if (error)
 		return (error);
 	error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength));
 	if (error)
 		return (error);
 	error = SYSCTL_OUT(req, &pct, sizeof(pct));
 	if (error)
 		return (error);
 	return (0);
 }
 SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD|
     CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_nchash, "I",
     "nchash statistics (number of total/used buckets, maximum chain length, usage percentage)");
 #endif
 
 /*
  * cache_zap():
  *
  *   Removes a namecache entry from cache, whether it contains an actual
  *   pointer to a vnode or if it is just a negative cache entry.
  */
 static void
-cache_zap(ncp)
-	struct namecache *ncp;
+cache_zap(struct namecache *ncp)
 {
 	struct vnode *vp;
 
 	rw_assert(&cache_lock, RA_WLOCKED);
 	CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp, ncp->nc_vp);
 	if (ncp->nc_vp != NULL) {
 		SDT_PROBE3(vfs, namecache, zap, done, ncp->nc_dvp,
 		    nc_get_name(ncp), ncp->nc_vp);
 	} else {
 		SDT_PROBE2(vfs, namecache, zap_negative, done, ncp->nc_dvp,
 		    nc_get_name(ncp));
 	}
 	vp = NULL;
 	LIST_REMOVE(ncp, nc_hash);
 	if (ncp->nc_flag & NCF_ISDOTDOT) {
 		if (ncp == ncp->nc_dvp->v_cache_dd)
 			ncp->nc_dvp->v_cache_dd = NULL;
 	} else {
 		LIST_REMOVE(ncp, nc_src);
 		if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) {
 			vp = ncp->nc_dvp;
 			numcachehv--;
 		}
 	}
 	if (ncp->nc_vp) {
 		TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst);
 		if (ncp == ncp->nc_vp->v_cache_dd)
 			ncp->nc_vp->v_cache_dd = NULL;
 	} else {
 		TAILQ_REMOVE(&ncneg, ncp, nc_dst);
 		numneg--;
 	}
 	numcache--;
 	cache_free(ncp);
-	if (vp)
+	if (vp != NULL)
 		vdrop(vp);
 }
 
 /*
  * Lookup an entry in the cache
  *
  * Lookup is called with dvp pointing to the directory to search,
  * cnp pointing to the name of the entry being sought. If the lookup
  * succeeds, the vnode is returned in *vpp, and a status of -1 is
  * returned. If the lookup determines that the name does not exist
  * (negative cacheing), a status of ENOENT is returned. If the lookup
  * fails, a status of zero is returned.  If the directory vnode is
  * recycled out from under us due to a forced unmount, a status of
  * ENOENT is returned.
  *
  * vpp is locked and ref'd on return.  If we're looking up DOTDOT, dvp is
  * unlocked.  If we're looking up . an extra ref is taken, but the lock is
  * not recursively acquired.
  */
 
 int
-cache_lookup(dvp, vpp, cnp, tsp, ticksp)
-	struct vnode *dvp;
-	struct vnode **vpp;
-	struct componentname *cnp;
-	struct timespec *tsp;
-	int *ticksp;
+cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
+    struct timespec *tsp, int *ticksp)
 {
 	struct namecache *ncp;
 	uint32_t hash;
 	int error, ltype, wlocked;
 
 	if (!doingcache) {
 		cnp->cn_flags &= ~MAKEENTRY;
 		return (0);
 	}
 retry:
 	CACHE_RLOCK();
 	wlocked = 0;
 	numcalls++;
 	error = 0;
 
 retry_wlocked:
 	if (cnp->cn_nameptr[0] == '.') {
 		if (cnp->cn_namelen == 1) {
 			*vpp = dvp;
 			CTR2(KTR_VFS, "cache_lookup(%p, %s) found via .",
 			    dvp, cnp->cn_nameptr);
 			dothits++;
 			SDT_PROBE3(vfs, namecache, lookup, hit, dvp, ".", *vpp);
 			if (tsp != NULL)
 				timespecclear(tsp);
 			if (ticksp != NULL)
 				*ticksp = ticks;
 			goto success;
 		}
 		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
 			dotdothits++;
 			if (dvp->v_cache_dd == NULL) {
 				SDT_PROBE3(vfs, namecache, lookup, miss, dvp,
 				    "..", NULL);
 				goto unlock;
 			}
 			if ((cnp->cn_flags & MAKEENTRY) == 0) {
 				if (!wlocked && !CACHE_UPGRADE_LOCK())
 					goto wlock;
 				if (dvp->v_cache_dd->nc_flag & NCF_ISDOTDOT)
 					cache_zap(dvp->v_cache_dd);
 				dvp->v_cache_dd = NULL;
 				CACHE_WUNLOCK();
 				return (0);
 			}
 			ncp = dvp->v_cache_dd;
 			if (ncp->nc_flag & NCF_ISDOTDOT)
 				*vpp = ncp->nc_vp;
 			else
 				*vpp = ncp->nc_dvp;
 			/* Return failure if negative entry was found. */
 			if (*vpp == NULL)
 				goto negative_success;
 			CTR3(KTR_VFS, "cache_lookup(%p, %s) found %p via ..",
 			    dvp, cnp->cn_nameptr, *vpp);
 			SDT_PROBE3(vfs, namecache, lookup, hit, dvp, "..",
 			    *vpp);
 			cache_out_ts(ncp, tsp, ticksp);
 			if ((ncp->nc_flag & (NCF_ISDOTDOT | NCF_DTS)) ==
 			    NCF_DTS && tsp != NULL)
 				*tsp = ((struct namecache_ts *)ncp)->
 				    nc_dotdottime;
 			goto success;
 		}
 	}
 
 	hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT);
 	hash = fnv_32_buf(&dvp, sizeof(dvp), hash);
 	LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
 		numchecks++;
 		if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
 		    !bcmp(nc_get_name(ncp), cnp->cn_nameptr, ncp->nc_nlen))
 			break;
 	}
 
 	/* We failed to find an entry */
 	if (ncp == NULL) {
 		SDT_PROBE3(vfs, namecache, lookup, miss, dvp, cnp->cn_nameptr,
 		    NULL);
 		if ((cnp->cn_flags & MAKEENTRY) == 0) {
 			nummisszap++;
 		} else {
 			nummiss++;
 		}
 		nchstats.ncs_miss++;
 		goto unlock;
 	}
 
 	/* We don't want to have an entry, so dump it */
 	if ((cnp->cn_flags & MAKEENTRY) == 0) {
 		numposzaps++;
 		nchstats.ncs_badhits++;
 		if (!wlocked && !CACHE_UPGRADE_LOCK())
 			goto wlock;
 		cache_zap(ncp);
 		CACHE_WUNLOCK();
 		return (0);
 	}
 
 	/* We found a "positive" match, return the vnode */
 	if (ncp->nc_vp) {
 		numposhits++;
 		nchstats.ncs_goodhits++;
 		*vpp = ncp->nc_vp;
 		CTR4(KTR_VFS, "cache_lookup(%p, %s) found %p via ncp %p",
 		    dvp, cnp->cn_nameptr, *vpp, ncp);
 		SDT_PROBE3(vfs, namecache, lookup, hit, dvp, nc_get_name(ncp),
 		    *vpp);
 		cache_out_ts(ncp, tsp, ticksp);
 		goto success;
 	}
 
 negative_success:
 	/* We found a negative match, and want to create it, so purge */
 	if (cnp->cn_nameiop == CREATE) {
 		numnegzaps++;
 		nchstats.ncs_badhits++;
 		if (!wlocked && !CACHE_UPGRADE_LOCK())
 			goto wlock;
 		cache_zap(ncp);
 		CACHE_WUNLOCK();
 		return (0);
 	}
 
 	if (!wlocked && !CACHE_UPGRADE_LOCK())
 		goto wlock;
 	numneghits++;
 	/*
 	 * We found a "negative" match, so we shift it to the end of
 	 * the "negative" cache entries queue to satisfy LRU.  Also,
 	 * check to see if the entry is a whiteout; indicate this to
 	 * the componentname, if so.
 	 */
 	TAILQ_REMOVE(&ncneg, ncp, nc_dst);
 	TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
 	nchstats.ncs_neghits++;
 	if (ncp->nc_flag & NCF_WHITE)
 		cnp->cn_flags |= ISWHITEOUT;
 	SDT_PROBE2(vfs, namecache, lookup, hit__negative, dvp,
 	    nc_get_name(ncp));
 	cache_out_ts(ncp, tsp, ticksp);
 	CACHE_WUNLOCK();
 	return (ENOENT);
 
 wlock:
 	/*
 	 * We need to update the cache after our lookup, so upgrade to
 	 * a write lock and retry the operation.
 	 */
 	CACHE_RUNLOCK();
 	CACHE_WLOCK();
 	numupgrades++;
 	wlocked = 1;
 	goto retry_wlocked;
 
 success:
 	/*
 	 * On success we return a locked and ref'd vnode as per the lookup
 	 * protocol.
 	 */
 	if (dvp == *vpp) {   /* lookup on "." */
 		VREF(*vpp);
 		if (wlocked)
 			CACHE_WUNLOCK();
 		else
 			CACHE_RUNLOCK();
 		/*
 		 * When we lookup "." we still can be asked to lock it
 		 * differently...
 		 */
 		ltype = cnp->cn_lkflags & LK_TYPE_MASK;
 		if (ltype != VOP_ISLOCKED(*vpp)) {
 			if (ltype == LK_EXCLUSIVE) {
 				vn_lock(*vpp, LK_UPGRADE | LK_RETRY);
 				if ((*vpp)->v_iflag & VI_DOOMED) {
 					/* forced unmount */
 					vrele(*vpp);
 					*vpp = NULL;
 					return (ENOENT);
 				}
 			} else
 				vn_lock(*vpp, LK_DOWNGRADE | LK_RETRY);
 		}
 		return (-1);
 	}
 	ltype = 0;	/* silence gcc warning */
 	if (cnp->cn_flags & ISDOTDOT) {
 		ltype = VOP_ISLOCKED(dvp);
 		VOP_UNLOCK(dvp, 0);
 	}
 	vhold(*vpp);
 	if (wlocked)
 		CACHE_WUNLOCK();
 	else
 		CACHE_RUNLOCK();
 	error = vget(*vpp, cnp->cn_lkflags | LK_VNHELD, cnp->cn_thread);
 	if (cnp->cn_flags & ISDOTDOT) {
 		vn_lock(dvp, ltype | LK_RETRY);
 		if (dvp->v_iflag & VI_DOOMED) {
 			if (error == 0)
 				vput(*vpp);
 			*vpp = NULL;
 			return (ENOENT);
 		}
 	}
 	if (error) {
 		*vpp = NULL;
 		goto retry;
 	}
 	if ((cnp->cn_flags & ISLASTCN) &&
 	    (cnp->cn_lkflags & LK_TYPE_MASK) == LK_EXCLUSIVE) {
 		ASSERT_VOP_ELOCKED(*vpp, "cache_lookup");
 	}
 	return (-1);
 
 unlock:
 	if (wlocked)
 		CACHE_WUNLOCK();
 	else
 		CACHE_RUNLOCK();
 	return (0);
 }
 
 /*
  * Add an entry to the cache.
  */
 void
-cache_enter_time(dvp, vp, cnp, tsp, dtsp)
-	struct vnode *dvp;
-	struct vnode *vp;
-	struct componentname *cnp;
-	struct timespec *tsp;
-	struct timespec *dtsp;
+cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
+    struct timespec *tsp, struct timespec *dtsp)
 {
 	struct namecache *ncp, *n2;
 	struct namecache_ts *n3;
 	struct nchashhead *ncpp;
 	uint32_t hash;
 	int flag;
 	int hold;
 	int zap;
 	int len;
 
 	CTR3(KTR_VFS, "cache_enter(%p, %p, %s)", dvp, vp, cnp->cn_nameptr);
 	VNASSERT(vp == NULL || (vp->v_iflag & VI_DOOMED) == 0, vp,
 	    ("cache_enter: Adding a doomed vnode"));
 	VNASSERT(dvp == NULL || (dvp->v_iflag & VI_DOOMED) == 0, dvp,
 	    ("cache_enter: Doomed vnode used as src"));
 
 	if (!doingcache)
 		return;
 
 	/*
 	 * Avoid blowout in namecache entries.
 	 */
 	if (numcache >= desiredvnodes * ncsizefactor)
 		return;
 
 	flag = 0;
 	if (cnp->cn_nameptr[0] == '.') {
 		if (cnp->cn_namelen == 1)
 			return;
 		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
 			CACHE_WLOCK();
 			/*
 			 * If dotdot entry already exists, just retarget it
 			 * to new parent vnode, otherwise continue with new
 			 * namecache entry allocation.
 			 */
 			if ((ncp = dvp->v_cache_dd) != NULL &&
 			    ncp->nc_flag & NCF_ISDOTDOT) {
 				KASSERT(ncp->nc_dvp == dvp,
 				    ("wrong isdotdot parent"));
 				if (ncp->nc_vp != NULL) {
 					TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst,
 					    ncp, nc_dst);
 				} else {
 					TAILQ_REMOVE(&ncneg, ncp, nc_dst);
 					numneg--;
 				}
 				if (vp != NULL) {
 					TAILQ_INSERT_HEAD(&vp->v_cache_dst,
 					    ncp, nc_dst);
 				} else {
 					TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
 					numneg++;
 				}
 				ncp->nc_vp = vp;
 				CACHE_WUNLOCK();
 				return;
 			}
 			dvp->v_cache_dd = NULL;
 			SDT_PROBE3(vfs, namecache, enter, done, dvp, "..", vp);
 			CACHE_WUNLOCK();
 			flag = NCF_ISDOTDOT;
 		}
 	}
 
 	hold = 0;
 	zap = 0;
 
 	/*
 	 * Calculate the hash key and setup as much of the new
 	 * namecache entry as possible before acquiring the lock.
 	 */
 	ncp = cache_alloc(cnp->cn_namelen, tsp != NULL);
 	ncp->nc_vp = vp;
 	ncp->nc_dvp = dvp;
 	ncp->nc_flag = flag;
 	if (tsp != NULL) {
 		n3 = (struct namecache_ts *)ncp;
 		n3->nc_time = *tsp;
 		n3->nc_ticks = ticks;
 		n3->nc_flag |= NCF_TS;
 		if (dtsp != NULL) {
 			n3->nc_dotdottime = *dtsp;
 			n3->nc_flag |= NCF_DTS;
 		}
 	}
 	len = ncp->nc_nlen = cnp->cn_namelen;
 	hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT);
 	strlcpy(nc_get_name(ncp), cnp->cn_nameptr, len + 1);
 	hash = fnv_32_buf(&dvp, sizeof(dvp), hash);
 	CACHE_WLOCK();
 
 	/*
 	 * See if this vnode or negative entry is already in the cache
 	 * with this name.  This can happen with concurrent lookups of
 	 * the same path name.
 	 */
 	ncpp = NCHHASH(hash);
 	LIST_FOREACH(n2, ncpp, nc_hash) {
 		if (n2->nc_dvp == dvp &&
 		    n2->nc_nlen == cnp->cn_namelen &&
 		    !bcmp(nc_get_name(n2), cnp->cn_nameptr, n2->nc_nlen)) {
 			if (tsp != NULL) {
 				KASSERT((n2->nc_flag & NCF_TS) != 0,
 				    ("no NCF_TS"));
 				n3 = (struct namecache_ts *)n2;
 				n3->nc_time =
 				    ((struct namecache_ts *)ncp)->nc_time;
 				n3->nc_ticks =
 				    ((struct namecache_ts *)ncp)->nc_ticks;
 				if (dtsp != NULL) {
 					n3->nc_dotdottime =
 					    ((struct namecache_ts *)ncp)->
 					    nc_dotdottime;
 					n3->nc_flag |= NCF_DTS;
 				}
 			}
 			CACHE_WUNLOCK();
 			cache_free(ncp);
 			return;
 		}
 	}
 
 	if (flag == NCF_ISDOTDOT) {
 		/*
 		 * See if we are trying to add .. entry, but some other lookup
 		 * has populated v_cache_dd pointer already.
 		 */
 		if (dvp->v_cache_dd != NULL) {
-		    CACHE_WUNLOCK();
-		    cache_free(ncp);
-		    return;
+			CACHE_WUNLOCK();
+			cache_free(ncp);
+			return;
 		}
 		KASSERT(vp == NULL || vp->v_type == VDIR,
 		    ("wrong vnode type %p", vp));
 		dvp->v_cache_dd = ncp;
 	}
 
 	numcache++;
-	if (!vp) {
+	if (vp == NULL) {
 		numneg++;
 		if (cnp->cn_flags & ISWHITEOUT)
 			ncp->nc_flag |= NCF_WHITE;
 	} else if (vp->v_type == VDIR) {
 		if (flag != NCF_ISDOTDOT) {
 			/*
 			 * For this case, the cache entry maps both the
 			 * directory name in it and the name ".." for the
 			 * directory's parent.
 			 */
 			if ((n2 = vp->v_cache_dd) != NULL &&
 			    (n2->nc_flag & NCF_ISDOTDOT) != 0)
 				cache_zap(n2);
 			vp->v_cache_dd = ncp;
 		}
 	} else {
 		vp->v_cache_dd = NULL;
 	}
 
 	/*
 	 * Insert the new namecache entry into the appropriate chain
 	 * within the cache entries table.
 	 */
 	LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
 	if (flag != NCF_ISDOTDOT) {
 		if (LIST_EMPTY(&dvp->v_cache_src)) {
 			hold = 1;
 			numcachehv++;
 		}
 		LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
 	}
 
 	/*
 	 * If the entry is "negative", we place it into the
 	 * "negative" cache queue, otherwise, we place it into the
 	 * destination vnode's cache entries queue.
 	 */
-	if (vp) {
+	if (vp != NULL) {
 		TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst);
 		SDT_PROBE3(vfs, namecache, enter, done, dvp, nc_get_name(ncp),
 		    vp);
 	} else {
 		TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst);
 		SDT_PROBE2(vfs, namecache, enter_negative, done, dvp,
 		    nc_get_name(ncp));
 	}
 	if (numneg * ncnegfactor > numcache) {
 		ncp = TAILQ_FIRST(&ncneg);
 		KASSERT(ncp->nc_vp == NULL, ("ncp %p vp %p on ncneg",
 		    ncp, ncp->nc_vp));
 		zap = 1;
 	}
 	if (hold)
 		vhold(dvp);
 	if (zap)
 		cache_zap(ncp);
 	CACHE_WUNLOCK();
 }
 
 /*
  * Name cache initialization, from vfs_init() when we are booting
  */
 static void
 nchinit(void *dummy __unused)
 {
 
 	TAILQ_INIT(&ncneg);
 
 	cache_zone_small = uma_zcreate("S VFS Cache",
 	    sizeof(struct namecache) + CACHE_PATH_CUTOFF + 1,
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
 	cache_zone_small_ts = uma_zcreate("STS VFS Cache",
 	    sizeof(struct namecache_ts) + CACHE_PATH_CUTOFF + 1,
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
 	cache_zone_large = uma_zcreate("L VFS Cache",
 	    sizeof(struct namecache) + NAME_MAX + 1,
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
 	cache_zone_large_ts = uma_zcreate("LTS VFS Cache",
 	    sizeof(struct namecache_ts) + NAME_MAX + 1,
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
 
 	nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash);
 }
 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL);
 
 void
 cache_changesize(int newmaxvnodes)
 {
 	struct nchashhead *new_nchashtbl, *old_nchashtbl;
 	u_long new_nchash, old_nchash;
 	struct namecache *ncp;
 	uint32_t hash;
 	int i;
 
 	new_nchashtbl = hashinit(newmaxvnodes * 2, M_VFSCACHE, &new_nchash);
 	/* If same hash table size, nothing to do */
 	if (nchash == new_nchash) {
 		free(new_nchashtbl, M_VFSCACHE);
 		return;
 	}
 	/*
 	 * Move everything from the old hash table to the new table.
 	 * None of the namecache entries in the table can be removed
 	 * because to do so, they have to be removed from the hash table.
 	 */
 	CACHE_WLOCK();
 	old_nchashtbl = nchashtbl;
 	old_nchash = nchash;
 	nchashtbl = new_nchashtbl;
 	nchash = new_nchash;
 	for (i = 0; i <= old_nchash; i++) {
 		while ((ncp = LIST_FIRST(&old_nchashtbl[i])) != NULL) {
 			hash = fnv_32_buf(nc_get_name(ncp), ncp->nc_nlen,
 			    FNV1_32_INIT);
 			hash = fnv_32_buf(&ncp->nc_dvp, sizeof(ncp->nc_dvp),
 			    hash);
 			LIST_REMOVE(ncp, nc_hash);
 			LIST_INSERT_HEAD(NCHHASH(hash), ncp, nc_hash);
 		}
 	}
 	CACHE_WUNLOCK();
 	free(old_nchashtbl, M_VFSCACHE);
 }
 
 /*
  * Invalidate all entries to a particular vnode.
  */
 void
-cache_purge(vp)
-	struct vnode *vp;
+cache_purge(struct vnode *vp)
 {
 
 	CTR1(KTR_VFS, "cache_purge(%p)", vp);
 	SDT_PROBE1(vfs, namecache, purge, done, vp);
 	CACHE_WLOCK();
 	while (!LIST_EMPTY(&vp->v_cache_src))
 		cache_zap(LIST_FIRST(&vp->v_cache_src));
 	while (!TAILQ_EMPTY(&vp->v_cache_dst))
 		cache_zap(TAILQ_FIRST(&vp->v_cache_dst));
 	if (vp->v_cache_dd != NULL) {
 		KASSERT(vp->v_cache_dd->nc_flag & NCF_ISDOTDOT,
 		   ("lost dotdot link"));
 		cache_zap(vp->v_cache_dd);
 	}
 	KASSERT(vp->v_cache_dd == NULL, ("incomplete purge"));
 	CACHE_WUNLOCK();
 }
 
 /*
  * Invalidate all negative entries for a particular directory vnode.
  */
 void
-cache_purge_negative(vp)
-	struct vnode *vp;
+cache_purge_negative(struct vnode *vp)
 {
 	struct namecache *cp, *ncp;
 
 	CTR1(KTR_VFS, "cache_purge_negative(%p)", vp);
 	SDT_PROBE1(vfs, namecache, purge_negative, done, vp);
 	CACHE_WLOCK();
 	LIST_FOREACH_SAFE(cp, &vp->v_cache_src, nc_src, ncp) {
 		if (cp->nc_vp == NULL)
 			cache_zap(cp);
 	}
 	CACHE_WUNLOCK();
 }
 
 /*
  * Flush all entries referencing a particular filesystem.
  */
 void
-cache_purgevfs(mp)
-	struct mount *mp;
+cache_purgevfs(struct mount *mp)
 {
 	struct nchashhead *ncpp;
 	struct namecache *ncp, *nnp;
 
 	/* Scan hash tables for applicable entries */
 	SDT_PROBE1(vfs, namecache, purgevfs, done, mp);
 	CACHE_WLOCK();
 	for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
 		LIST_FOREACH_SAFE(ncp, ncpp, nc_hash, nnp) {
 			if (ncp->nc_dvp->v_mount == mp)
 				cache_zap(ncp);
 		}
 	}
 	CACHE_WUNLOCK();
 }
 
 /*
  * Perform canonical checks and cache lookup and pass on to filesystem
  * through the vop_cachedlookup only if needed.
  */
 
 int
-vfs_cache_lookup(ap)
-	struct vop_lookup_args /* {
-		struct vnode *a_dvp;
-		struct vnode **a_vpp;
-		struct componentname *a_cnp;
-	} */ *ap;
+vfs_cache_lookup(struct vop_lookup_args *ap)
 {
 	struct vnode *dvp;
 	int error;
 	struct vnode **vpp = ap->a_vpp;
 	struct componentname *cnp = ap->a_cnp;
 	struct ucred *cred = cnp->cn_cred;
 	int flags = cnp->cn_flags;
 	struct thread *td = cnp->cn_thread;
 
 	*vpp = NULL;
 	dvp = ap->a_dvp;
 
 	if (dvp->v_type != VDIR)
 		return (ENOTDIR);
 
 	if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
 		return (EROFS);
 
 	error = VOP_ACCESS(dvp, VEXEC, cred, td);
 	if (error)
 		return (error);
 
 	error = cache_lookup(dvp, vpp, cnp, NULL, NULL);
 	if (error == 0)
 		return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
 	if (error == -1)
 		return (0);
 	return (error);
 }
 
 /*
  * XXX All of these sysctls would probably be more productive dead.
  */
 static int disablecwd;
 SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0,
    "Disable the getcwd syscall");
 
 /* Implementation of the getcwd syscall. */
 int
-sys___getcwd(td, uap)
-	struct thread *td;
-	struct __getcwd_args *uap;
+sys___getcwd(struct thread *td, struct __getcwd_args *uap)
 {
 
 	return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen,
 	    MAXPATHLEN));
 }
 
 int
 kern___getcwd(struct thread *td, char *buf, enum uio_seg bufseg, u_int buflen,
     u_int path_max)
 {
 	char *bp, *tmpbuf;
 	struct filedesc *fdp;
 	struct vnode *cdir, *rdir;
 	int error;
 
 	if (disablecwd)
 		return (ENODEV);
 	if (buflen < 2)
 		return (EINVAL);
 	if (buflen > path_max)
 		buflen = path_max;
 
 	tmpbuf = malloc(buflen, M_TEMP, M_WAITOK);
 	fdp = td->td_proc->p_fd;
 	FILEDESC_SLOCK(fdp);
 	cdir = fdp->fd_cdir;
 	VREF(cdir);
 	rdir = fdp->fd_rdir;
 	VREF(rdir);
 	FILEDESC_SUNLOCK(fdp);
 	error = vn_fullpath1(td, cdir, rdir, tmpbuf, &bp, buflen);
 	vrele(rdir);
 	vrele(cdir);
 
 	if (!error) {
 		if (bufseg == UIO_SYSSPACE)
 			bcopy(bp, buf, strlen(bp) + 1);
 		else
 			error = copyout(bp, buf, strlen(bp) + 1);
 #ifdef KTRACE
 	if (KTRPOINT(curthread, KTR_NAMEI))
 		ktrnamei(bp);
 #endif
 	}
 	free(tmpbuf, M_TEMP);
 	return (error);
 }
 
 /*
  * Thus begins the fullpath magic.
  */
 
 #undef STATNODE
 #define STATNODE(name, descr)						\
 	static u_int name;						\
 	SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, descr)
 
 static int disablefullpath;
 SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0,
     "Disable the vn_fullpath function");
 
 /* These count for kern___getcwd(), too. */
 STATNODE(numfullpathcalls, "Number of fullpath search calls");
 STATNODE(numfullpathfail1, "Number of fullpath search errors (ENOTDIR)");
 STATNODE(numfullpathfail2,
     "Number of fullpath search errors (VOP_VPTOCNP failures)");
 STATNODE(numfullpathfail4, "Number of fullpath search errors (ENOMEM)");
 STATNODE(numfullpathfound, "Number of successful fullpath calls");
 
 /*
  * Retrieve the full filesystem path that correspond to a vnode from the name
  * cache (if available)
  */
 int
 vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf)
 {
 	char *buf;
 	struct filedesc *fdp;
 	struct vnode *rdir;
 	int error;
 
 	if (disablefullpath)
 		return (ENODEV);
 	if (vn == NULL)
 		return (EINVAL);
 
 	buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
 	fdp = td->td_proc->p_fd;
 	FILEDESC_SLOCK(fdp);
 	rdir = fdp->fd_rdir;
 	VREF(rdir);
 	FILEDESC_SUNLOCK(fdp);
 	error = vn_fullpath1(td, vn, rdir, buf, retbuf, MAXPATHLEN);
 	vrele(rdir);
 
 	if (!error)
 		*freebuf = buf;
 	else
 		free(buf, M_TEMP);
 	return (error);
 }
 
 /*
  * This function is similar to vn_fullpath, but it attempts to lookup the
  * pathname relative to the global root mount point.  This is required for the
  * auditing sub-system, as audited pathnames must be absolute, relative to the
  * global root mount point.
  */
 int
 vn_fullpath_global(struct thread *td, struct vnode *vn,
     char **retbuf, char **freebuf)
 {
 	char *buf;
 	int error;
 
 	if (disablefullpath)
 		return (ENODEV);
 	if (vn == NULL)
 		return (EINVAL);
 	buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
 	error = vn_fullpath1(td, vn, rootvnode, buf, retbuf, MAXPATHLEN);
 	if (!error)
 		*freebuf = buf;
 	else
 		free(buf, M_TEMP);
 	return (error);
 }
 
 int
 vn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen)
 {
 	int error;
 
 	CACHE_RLOCK();
 	error = vn_vptocnp_locked(vp, cred, buf, buflen);
 	if (error == 0)
 		CACHE_RUNLOCK();
 	return (error);
 }
 
 static int
 vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf,
     u_int *buflen)
 {
 	struct vnode *dvp;
 	struct namecache *ncp;
 	int error;
 
 	TAILQ_FOREACH(ncp, &((*vp)->v_cache_dst), nc_dst) {
 		if ((ncp->nc_flag & NCF_ISDOTDOT) == 0)
 			break;
 	}
 	if (ncp != NULL) {
 		if (*buflen < ncp->nc_nlen) {
 			CACHE_RUNLOCK();
 			vrele(*vp);
 			numfullpathfail4++;
 			error = ENOMEM;
 			SDT_PROBE3(vfs, namecache, fullpath, return, error,
 			    vp, NULL);
 			return (error);
 		}
 		*buflen -= ncp->nc_nlen;
 		memcpy(buf + *buflen, nc_get_name(ncp), ncp->nc_nlen);
 		SDT_PROBE3(vfs, namecache, fullpath, hit, ncp->nc_dvp,
 		    nc_get_name(ncp), vp);
 		dvp = *vp;
 		*vp = ncp->nc_dvp;
 		vref(*vp);
 		CACHE_RUNLOCK();
 		vrele(dvp);
 		CACHE_RLOCK();
 		return (0);
 	}
 	SDT_PROBE1(vfs, namecache, fullpath, miss, vp);
 
 	CACHE_RUNLOCK();
 	vn_lock(*vp, LK_SHARED | LK_RETRY);
 	error = VOP_VPTOCNP(*vp, &dvp, cred, buf, buflen);
 	vput(*vp);
 	if (error) {
 		numfullpathfail2++;
 		SDT_PROBE3(vfs, namecache, fullpath, return,  error, vp, NULL);
 		return (error);
 	}
 
 	*vp = dvp;
 	CACHE_RLOCK();
 	if (dvp->v_iflag & VI_DOOMED) {
 		/* forced unmount */
 		CACHE_RUNLOCK();
 		vrele(dvp);
 		error = ENOENT;
 		SDT_PROBE3(vfs, namecache, fullpath, return, error, vp, NULL);
 		return (error);
 	}
 	/*
 	 * *vp has its use count incremented still.
 	 */
 
 	return (0);
 }
 
 /*
  * The magic behind kern___getcwd() and vn_fullpath().
  */
 static int
 vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
     char *buf, char **retbuf, u_int buflen)
 {
 	int error, slash_prefixed;
 #ifdef KDTRACE_HOOKS
 	struct vnode *startvp = vp;
 #endif
 	struct vnode *vp1;
 
 	buflen--;
 	buf[buflen] = '\0';
 	error = 0;
 	slash_prefixed = 0;
 
 	SDT_PROBE1(vfs, namecache, fullpath, entry, vp);
 	numfullpathcalls++;
 	vref(vp);
 	CACHE_RLOCK();
 	if (vp->v_type != VDIR) {
 		error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen);
 		if (error)
 			return (error);
 		if (buflen == 0) {
 			CACHE_RUNLOCK();
 			vrele(vp);
 			return (ENOMEM);
 		}
 		buf[--buflen] = '/';
 		slash_prefixed = 1;
 	}
 	while (vp != rdir && vp != rootvnode) {
 		if (vp->v_vflag & VV_ROOT) {
 			if (vp->v_iflag & VI_DOOMED) {	/* forced unmount */
 				CACHE_RUNLOCK();
 				vrele(vp);
 				error = ENOENT;
 				SDT_PROBE3(vfs, namecache, fullpath, return,
 				    error, vp, NULL);
 				break;
 			}
 			vp1 = vp->v_mount->mnt_vnodecovered;
 			vref(vp1);
 			CACHE_RUNLOCK();
 			vrele(vp);
 			vp = vp1;
 			CACHE_RLOCK();
 			continue;
 		}
 		if (vp->v_type != VDIR) {
 			CACHE_RUNLOCK();
 			vrele(vp);
 			numfullpathfail1++;
 			error = ENOTDIR;
 			SDT_PROBE3(vfs, namecache, fullpath, return,
 			    error, vp, NULL);
 			break;
 		}
 		error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen);
 		if (error)
 			break;
 		if (buflen == 0) {
 			CACHE_RUNLOCK();
 			vrele(vp);
 			error = ENOMEM;
 			SDT_PROBE3(vfs, namecache, fullpath, return, error,
 			    startvp, NULL);
 			break;
 		}
 		buf[--buflen] = '/';
 		slash_prefixed = 1;
 	}
 	if (error)
 		return (error);
 	if (!slash_prefixed) {
 		if (buflen == 0) {
 			CACHE_RUNLOCK();
 			vrele(vp);
 			numfullpathfail4++;
 			SDT_PROBE3(vfs, namecache, fullpath, return, ENOMEM,
 			    startvp, NULL);
 			return (ENOMEM);
 		}
 		buf[--buflen] = '/';
 	}
 	numfullpathfound++;
 	CACHE_RUNLOCK();
 	vrele(vp);
 
 	SDT_PROBE3(vfs, namecache, fullpath, return, 0, startvp, buf + buflen);
 	*retbuf = buf + buflen;
 	return (0);
 }
 
 struct vnode *
 vn_dir_dd_ino(struct vnode *vp)
 {
 	struct namecache *ncp;
 	struct vnode *ddvp;
 
 	ASSERT_VOP_LOCKED(vp, "vn_dir_dd_ino");
 	CACHE_RLOCK();
 	TAILQ_FOREACH(ncp, &(vp->v_cache_dst), nc_dst) {
 		if ((ncp->nc_flag & NCF_ISDOTDOT) != 0)
 			continue;
 		ddvp = ncp->nc_dvp;
 		vhold(ddvp);
 		CACHE_RUNLOCK();
 		if (vget(ddvp, LK_SHARED | LK_NOWAIT | LK_VNHELD, curthread))
 			return (NULL);
 		return (ddvp);
 	}
 	CACHE_RUNLOCK();
 	return (NULL);
 }
 
 int
 vn_commname(struct vnode *vp, char *buf, u_int buflen)
 {
 	struct namecache *ncp;
 	int l;
 
 	CACHE_RLOCK();
 	TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_dst)
 		if ((ncp->nc_flag & NCF_ISDOTDOT) == 0)
 			break;
 	if (ncp == NULL) {
 		CACHE_RUNLOCK();
 		return (ENOENT);
 	}
 	l = min(ncp->nc_nlen, buflen - 1);
 	memcpy(buf, nc_get_name(ncp), l);
 	CACHE_RUNLOCK();
 	buf[l] = '\0';
 	return (0);
 }
 
 /* ABI compat shims for old kernel modules. */
 #undef cache_enter
 
 void	cache_enter(struct vnode *dvp, struct vnode *vp,
 	    struct componentname *cnp);
 
 void
 cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
 {
 
 	cache_enter_time(dvp, vp, cnp, NULL, NULL);
 }
 
 /*
  * This function updates path string to vnode's full global path
  * and checks the size of the new path string against the pathlen argument.
  *
  * Requires a locked, referenced vnode.
  * Vnode is re-locked on success or ENODEV, otherwise unlocked.
  *
  * If sysctl debug.disablefullpath is set, ENODEV is returned,
  * vnode is left locked and path remain untouched.
  *
  * If vp is a directory, the call to vn_fullpath_global() always succeeds
  * because it falls back to the ".." lookup if the namecache lookup fails.
  */
 int
 vn_path_to_global_path(struct thread *td, struct vnode *vp, char *path,
     u_int pathlen)
 {
 	struct nameidata nd;
 	struct vnode *vp1;
 	char *rpath, *fbuf;
 	int error;
 
 	ASSERT_VOP_ELOCKED(vp, __func__);
 
 	/* Return ENODEV if sysctl debug.disablefullpath==1 */
 	if (disablefullpath)
 		return (ENODEV);
 
 	/* Construct global filesystem path from vp. */
 	VOP_UNLOCK(vp, 0);
 	error = vn_fullpath_global(td, vp, &rpath, &fbuf);
 
 	if (error != 0) {
 		vrele(vp);
 		return (error);
 	}
 
 	if (strlen(rpath) >= pathlen) {
 		vrele(vp);
 		error = ENAMETOOLONG;
 		goto out;
 	}
 
 	/*
 	 * Re-lookup the vnode by path to detect a possible rename.
 	 * As a side effect, the vnode is relocked.
 	 * If vnode was renamed, return ENOENT.
 	 */
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1,
 	    UIO_SYSSPACE, path, td);
 	error = namei(&nd);
 	if (error != 0) {
 		vrele(vp);
 		goto out;
 	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp1 = nd.ni_vp;
 	vrele(vp);
 	if (vp1 == vp)
 		strcpy(path, rpath);
 	else {
 		vput(vp1);
 		error = ENOENT;
 	}
 
 out:
 	free(fbuf, M_TEMP);
 	return (error);
 }
Index: projects/release-pkg/sys/modules/geom/geom_eli/Makefile
===================================================================
--- projects/release-pkg/sys/modules/geom/geom_eli/Makefile	(revision 293335)
+++ projects/release-pkg/sys/modules/geom/geom_eli/Makefile	(revision 293336)
@@ -1,16 +1,17 @@
 # $FreeBSD$
 
 .PATH: ${.CURDIR}/../../../geom/eli
 
 KMOD=	geom_eli
 SRCS=	g_eli.c
 SRCS+=	g_eli_crypto.c
 SRCS+=	g_eli_ctl.c
+SRCS+=	g_eli_hmac.c
 SRCS+=	g_eli_integrity.c
 SRCS+=	g_eli_key.c
 SRCS+=	g_eli_key_cache.c
 SRCS+=	g_eli_privacy.c
 SRCS+=	pkcs5v2.c
 SRCS+=	vnode_if.h
 
 .include <bsd.kmod.mk>
Index: projects/release-pkg/sys/netgraph/netflow/netflow.c
===================================================================
--- projects/release-pkg/sys/netgraph/netflow/netflow.c	(revision 293335)
+++ projects/release-pkg/sys/netgraph/netflow/netflow.c	(revision 293336)
@@ -1,1195 +1,1189 @@
 /*-
  * Copyright (c) 2010-2011 Alexander V. Chernikov <melifaro@ipfw.ru>
  * Copyright (c) 2004-2005 Gleb Smirnoff <glebius@FreeBSD.org>
  * Copyright (c) 2001-2003 Roman V. Palagin <romanp@unshadow.net>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $SourceForge: netflow.c,v 1.41 2004/09/05 11:41:10 glebius Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet6.h"
 #include "opt_route.h"
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/counter.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/mbuf.h>
 #include <sys/syslog.h>
 #include <sys/socket.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/ethernet.h>
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip6.h>
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 
 #include <netgraph/ng_message.h>
 #include <netgraph/netgraph.h>
 
 #include <netgraph/netflow/netflow.h>
 #include <netgraph/netflow/netflow_v9.h>
 #include <netgraph/netflow/ng_netflow.h>
 
 #define	NBUCKETS	(65536)		/* must be power of 2 */
 
 /* This hash is for TCP or UDP packets. */
 #define FULL_HASH(addr1, addr2, port1, port2)	\
 	(((addr1 ^ (addr1 >> 16) ^ 		\
 	htons(addr2 ^ (addr2 >> 16))) ^ 	\
 	port1 ^ htons(port2)) &			\
 	(NBUCKETS - 1))
 
 /* This hash is for all other IP packets. */
 #define ADDR_HASH(addr1, addr2)			\
 	((addr1 ^ (addr1 >> 16) ^ 		\
 	htons(addr2 ^ (addr2 >> 16))) &		\
 	(NBUCKETS - 1))
 
 /* Macros to shorten logical constructions */
 /* XXX: priv must exist in namespace */
 #define	INACTIVE(fle)	(time_uptime - fle->f.last > priv->nfinfo_inact_t)
 #define	AGED(fle)	(time_uptime - fle->f.first > priv->nfinfo_act_t)
 #define	ISFREE(fle)	(fle->f.packets == 0)
 
 /*
  * 4 is a magical number: statistically number of 4-packet flows is
  * bigger than 5,6,7...-packet flows by an order of magnitude. Most UDP/ICMP
  * scans are 1 packet (~ 90% of flow cache). TCP scans are 2-packet in case
  * of reachable host and 4-packet otherwise.
  */
 #define	SMALL(fle)	(fle->f.packets <= 4)
 
 MALLOC_DEFINE(M_NETFLOW_HASH, "netflow_hash", "NetFlow hash");
 
 static int export_add(item_p, struct flow_entry *);
 static int export_send(priv_p, fib_export_p, item_p, int);
 
 static int hash_insert(priv_p, struct flow_hash_entry *, struct flow_rec *,
     int, uint8_t, uint8_t);
 #ifdef INET6
 static int hash6_insert(priv_p, struct flow_hash_entry *, struct flow6_rec *,
     int, uint8_t, uint8_t);
 #endif
 
 static void expire_flow(priv_p, fib_export_p, struct flow_entry *, int);
 
 /*
  * Generate hash for a given flow record.
  *
  * FIB is not used here, because:
  * most VRFS will carry public IPv4 addresses which are unique even
  * without FIB private addresses can overlap, but this is worked out
  * via flow_rec bcmp() containing fib id. In IPv6 world addresses are
  * all globally unique (it's not fully true, there is FC00::/7 for example,
  * but chances of address overlap are MUCH smaller)
  */
 static inline uint32_t
 ip_hash(struct flow_rec *r)
 {
 
 	switch (r->r_ip_p) {
 	case IPPROTO_TCP:
 	case IPPROTO_UDP:
 		return FULL_HASH(r->r_src.s_addr, r->r_dst.s_addr,
 		    r->r_sport, r->r_dport);
 	default:
 		return ADDR_HASH(r->r_src.s_addr, r->r_dst.s_addr);
 	}
 }
 
 #ifdef INET6
 /* Generate hash for a given flow6 record. Use lower 4 octets from v6 addresses */
 static inline uint32_t
 ip6_hash(struct flow6_rec *r)
 {
 
 	switch (r->r_ip_p) {
 	case IPPROTO_TCP:
 	case IPPROTO_UDP:
 		return FULL_HASH(r->src.r_src6.__u6_addr.__u6_addr32[3],
 		    r->dst.r_dst6.__u6_addr.__u6_addr32[3], r->r_sport,
 		    r->r_dport);
 	default:
 		return ADDR_HASH(r->src.r_src6.__u6_addr.__u6_addr32[3],
 		    r->dst.r_dst6.__u6_addr.__u6_addr32[3]);
  	}
 }
 #endif
 
 /*
  * Detach export datagram from priv, if there is any.
  * If there is no, allocate a new one.
  */
 static item_p
 get_export_dgram(priv_p priv, fib_export_p fe)
 {
 	item_p	item = NULL;
 
 	mtx_lock(&fe->export_mtx);
 	if (fe->exp.item != NULL) {
 		item = fe->exp.item;
 		fe->exp.item = NULL;
 	}
 	mtx_unlock(&fe->export_mtx);
 
 	if (item == NULL) {
 		struct netflow_v5_export_dgram *dgram;
 		struct mbuf *m;
 
 		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 		if (m == NULL)
 			return (NULL);
 		item = ng_package_data(m, NG_NOFLAGS);
 		if (item == NULL)
 			return (NULL);
 		dgram = mtod(m, struct netflow_v5_export_dgram *);
 		dgram->header.count = 0;
 		dgram->header.version = htons(NETFLOW_V5);
 		dgram->header.pad = 0;
 	}
 
 	return (item);
 }
 
 /*
  * Re-attach incomplete datagram back to priv.
  * If there is already another one, then send incomplete. */
 static void
 return_export_dgram(priv_p priv, fib_export_p fe, item_p item, int flags)
 {
 
 	/*
 	 * It may happen on SMP, that some thread has already
 	 * put its item there, in this case we bail out and
 	 * send what we have to collector.
 	 */
 	mtx_lock(&fe->export_mtx);
 	if (fe->exp.item == NULL) {
 		fe->exp.item = item;
 		mtx_unlock(&fe->export_mtx);
 	} else {
 		mtx_unlock(&fe->export_mtx);
 		export_send(priv, fe, item, flags);
 	}
 }
 
 /*
  * The flow is over. Call export_add() and free it. If datagram is
  * full, then call export_send().
  */
 static void
 expire_flow(priv_p priv, fib_export_p fe, struct flow_entry *fle, int flags)
 {
 	struct netflow_export_item exp;
 	uint16_t version = fle->f.version;
 
 	if ((priv->export != NULL) && (version == IPVERSION)) {
 		exp.item = get_export_dgram(priv, fe);
 		if (exp.item == NULL) {
 			priv->nfinfo_export_failed++;
 			if (priv->export9 != NULL)
 				priv->nfinfo_export9_failed++;
 			/* fle definitely contains IPv4 flow. */
 			uma_zfree_arg(priv->zone, fle, priv);
 			return;
 		}
 
 		if (export_add(exp.item, fle) > 0)
 			export_send(priv, fe, exp.item, flags);
 		else
 			return_export_dgram(priv, fe, exp.item, NG_QUEUE);
 	}
 
 	if (priv->export9 != NULL) {
 		exp.item9 = get_export9_dgram(priv, fe, &exp.item9_opt);
 		if (exp.item9 == NULL) {
 			priv->nfinfo_export9_failed++;
 			if (version == IPVERSION)
 				uma_zfree_arg(priv->zone, fle, priv);
 #ifdef INET6
 			else if (version == IP6VERSION)
 				uma_zfree_arg(priv->zone6, fle, priv);
 #endif
 			else
 				panic("ng_netflow: Unknown IP proto: %d",
 				    version);
 			return;
 		}
 
 		if (export9_add(exp.item9, exp.item9_opt, fle) > 0)
 			export9_send(priv, fe, exp.item9, exp.item9_opt, flags);
 		else
 			return_export9_dgram(priv, fe, exp.item9,
 			    exp.item9_opt, NG_QUEUE);
 	}
 
 	if (version == IPVERSION)
 		uma_zfree_arg(priv->zone, fle, priv);
 #ifdef INET6
 	else if (version == IP6VERSION)
 		uma_zfree_arg(priv->zone6, fle, priv);
 #endif
 }
 
 /* Get a snapshot of node statistics */
 void
 ng_netflow_copyinfo(priv_p priv, struct ng_netflow_info *i)
 {
 
 	i->nfinfo_bytes = counter_u64_fetch(priv->nfinfo_bytes);
 	i->nfinfo_packets = counter_u64_fetch(priv->nfinfo_packets);
 	i->nfinfo_bytes6 = counter_u64_fetch(priv->nfinfo_bytes6);
 	i->nfinfo_packets6 = counter_u64_fetch(priv->nfinfo_packets6);
 	i->nfinfo_sbytes = counter_u64_fetch(priv->nfinfo_sbytes);
 	i->nfinfo_spackets = counter_u64_fetch(priv->nfinfo_spackets);
 	i->nfinfo_sbytes6 = counter_u64_fetch(priv->nfinfo_sbytes6);
 	i->nfinfo_spackets6 = counter_u64_fetch(priv->nfinfo_spackets6);
 	i->nfinfo_act_exp = counter_u64_fetch(priv->nfinfo_act_exp);
 	i->nfinfo_inact_exp = counter_u64_fetch(priv->nfinfo_inact_exp);
 
 	i->nfinfo_used = uma_zone_get_cur(priv->zone);
 #ifdef INET6
 	i->nfinfo_used6 = uma_zone_get_cur(priv->zone6);
 #endif
 
 	i->nfinfo_alloc_failed = priv->nfinfo_alloc_failed;
 	i->nfinfo_export_failed = priv->nfinfo_export_failed;
 	i->nfinfo_export9_failed = priv->nfinfo_export9_failed;
 	i->nfinfo_realloc_mbuf = priv->nfinfo_realloc_mbuf;
 	i->nfinfo_alloc_fibs = priv->nfinfo_alloc_fibs;
 	i->nfinfo_inact_t = priv->nfinfo_inact_t;
 	i->nfinfo_act_t = priv->nfinfo_act_t;
 }
 
 /*
  * Insert a record into defined slot.
  *
  * First we get for us a free flow entry, then fill in all
  * possible fields in it.
  *
  * TODO: consider dropping hash mutex while filling in datagram,
  * as this was done in previous version. Need to test & profile
  * to be sure.
  */
 static int
 hash_insert(priv_p priv, struct flow_hash_entry *hsh, struct flow_rec *r,
 	int plen, uint8_t flags, uint8_t tcp_flags)
 {
 	struct flow_entry *fle;
 	struct sockaddr_in sin;
 	struct rtentry *rt;
 
 	mtx_assert(&hsh->mtx, MA_OWNED);
 
 	fle = uma_zalloc_arg(priv->zone, priv, M_NOWAIT);
 	if (fle == NULL) {
 		priv->nfinfo_alloc_failed++;
 		return (ENOMEM);
 	}
 
 	/*
 	 * Now fle is totally ours. It is detached from all lists,
 	 * we can safely edit it.
 	 */
 	fle->f.version = IPVERSION;
 	bcopy(r, &fle->f.r, sizeof(struct flow_rec));
 	fle->f.bytes = plen;
 	fle->f.packets = 1;
 	fle->f.tcp_flags = tcp_flags;
 
 	fle->f.first = fle->f.last = time_uptime;
 
 	/*
 	 * First we do route table lookup on destination address. So we can
 	 * fill in out_ifx, dst_mask, nexthop, and dst_as in future releases.
 	 */
 	if ((flags & NG_NETFLOW_CONF_NODSTLOOKUP) == 0) {
 		bzero(&sin, sizeof(sin));
 		sin.sin_len = sizeof(struct sockaddr_in);
 		sin.sin_family = AF_INET;
 		sin.sin_addr = fle->f.r.r_dst;
 		rt = rtalloc1_fib((struct sockaddr *)&sin, 0, 0, r->fib);
 		if (rt != NULL) {
 			fle->f.fle_o_ifx = rt->rt_ifp->if_index;
 
 			if (rt->rt_flags & RTF_GATEWAY &&
 			    rt->rt_gateway->sa_family == AF_INET)
 				fle->f.next_hop =
 				    ((struct sockaddr_in *)(rt->rt_gateway))->sin_addr;
 
 			if (rt_mask(rt))
 				fle->f.dst_mask =
 				    bitcount32(((struct sockaddr_in *)rt_mask(rt))->sin_addr.s_addr);
 			else if (rt->rt_flags & RTF_HOST)
 				/* Give up. We can't determine mask :( */
 				fle->f.dst_mask = 32;
 
 			RTFREE_LOCKED(rt);
 		}
 	}
 
 	/* Do route lookup on source address, to fill in src_mask. */
 	if ((flags & NG_NETFLOW_CONF_NOSRCLOOKUP) == 0) {
 		bzero(&sin, sizeof(sin));
 		sin.sin_len = sizeof(struct sockaddr_in);
 		sin.sin_family = AF_INET;
 		sin.sin_addr = fle->f.r.r_src;
 		rt = rtalloc1_fib((struct sockaddr *)&sin, 0, 0, r->fib);
 		if (rt != NULL) {
 			if (rt_mask(rt))
 				fle->f.src_mask =
 				    bitcount32(((struct sockaddr_in *)rt_mask(rt))->sin_addr.s_addr);
 			else if (rt->rt_flags & RTF_HOST)
 				/* Give up. We can't determine mask :( */
 				fle->f.src_mask = 32;
 
 			RTFREE_LOCKED(rt);
 		}
 	}
 
 	/* Push new flow at the and of hash. */
 	TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash);
 
 	return (0);
 }
 
 #ifdef INET6
 /* XXX: make normal function, instead of.. */
 #define ipv6_masklen(x)		bitcount32((x).__u6_addr.__u6_addr32[0]) + \
 				bitcount32((x).__u6_addr.__u6_addr32[1]) + \
 				bitcount32((x).__u6_addr.__u6_addr32[2]) + \
 				bitcount32((x).__u6_addr.__u6_addr32[3])
 #define RT_MASK6(x)	(ipv6_masklen(((struct sockaddr_in6 *)rt_mask(x))->sin6_addr))
 static int
 hash6_insert(priv_p priv, struct flow_hash_entry *hsh6, struct flow6_rec *r,
 	int plen, uint8_t flags, uint8_t tcp_flags)
 {
 	struct flow6_entry *fle6;
-	struct sockaddr_in6 *src, *dst;
+	struct sockaddr_in6 sin6;
 	struct rtentry *rt;
-	struct route_in6 rin6;
 
 	mtx_assert(&hsh6->mtx, MA_OWNED);
 
 	fle6 = uma_zalloc_arg(priv->zone6, priv, M_NOWAIT);
 	if (fle6 == NULL) {
 		priv->nfinfo_alloc_failed++;
 		return (ENOMEM);
 	}
 
 	/*
 	 * Now fle is totally ours. It is detached from all lists,
 	 * we can safely edit it.
 	 */
 
 	fle6->f.version = IP6VERSION;
 	bcopy(r, &fle6->f.r, sizeof(struct flow6_rec));
 	fle6->f.bytes = plen;
 	fle6->f.packets = 1;
 	fle6->f.tcp_flags = tcp_flags;
 
 	fle6->f.first = fle6->f.last = time_uptime;
 
 	/*
 	 * First we do route table lookup on destination address. So we can
 	 * fill in out_ifx, dst_mask, nexthop, and dst_as in future releases.
 	 */
 	if ((flags & NG_NETFLOW_CONF_NODSTLOOKUP) == 0) {
-		bzero(&rin6, sizeof(struct route_in6));
-		dst = (struct sockaddr_in6 *)&rin6.ro_dst;
-		dst->sin6_len = sizeof(struct sockaddr_in6);
-		dst->sin6_family = AF_INET6;
-		dst->sin6_addr = r->dst.r_dst6;
+		bzero(&sin6, sizeof(struct sockaddr_in6));
+		sin6.sin6_len = sizeof(struct sockaddr_in6);
+		sin6.sin6_family = AF_INET6;
+		sin6.sin6_addr = r->dst.r_dst6;
 
-		rin6.ro_rt = rtalloc1_fib((struct sockaddr *)dst, 0, 0, r->fib);
+		rt = rtalloc1_fib((struct sockaddr *)&sin6, 0, 0, r->fib);
 
-		if (rin6.ro_rt != NULL) {
-			rt = rin6.ro_rt;
+		if (rt != NULL) {
 			fle6->f.fle_o_ifx = rt->rt_ifp->if_index;
 
 			if (rt->rt_flags & RTF_GATEWAY &&
 			    rt->rt_gateway->sa_family == AF_INET6)
 				fle6->f.n.next_hop6 =
 				    ((struct sockaddr_in6 *)(rt->rt_gateway))->sin6_addr;
 
 			if (rt_mask(rt))
 				fle6->f.dst_mask = RT_MASK6(rt);
 			else
 				fle6->f.dst_mask = 128;
 
 			RTFREE_LOCKED(rt);
 		}
 	}
 
 	if ((flags & NG_NETFLOW_CONF_NOSRCLOOKUP) == 0) {
 		/* Do route lookup on source address, to fill in src_mask. */
-		bzero(&rin6, sizeof(struct route_in6));
-		src = (struct sockaddr_in6 *)&rin6.ro_dst;
-		src->sin6_len = sizeof(struct sockaddr_in6);
-		src->sin6_family = AF_INET6;
-		src->sin6_addr = r->src.r_src6;
+		bzero(&sin6, sizeof(struct sockaddr_in6));
+		sin6.sin6_len = sizeof(struct sockaddr_in6);
+		sin6.sin6_family = AF_INET6;
+		sin6.sin6_addr = r->src.r_src6;
 
-		rin6.ro_rt = rtalloc1_fib((struct sockaddr *)src, 0, 0, r->fib);
+		rt = rtalloc1_fib((struct sockaddr *)&sin6, 0, 0, r->fib);
 
-		if (rin6.ro_rt != NULL) {
-			rt = rin6.ro_rt;
-
+		if (rt != NULL) {
 			if (rt_mask(rt))
 				fle6->f.src_mask = RT_MASK6(rt);
 			else
 				fle6->f.src_mask = 128;
 
 			RTFREE_LOCKED(rt);
 		}
 	}
 
 	/* Push new flow at the and of hash. */
 	TAILQ_INSERT_TAIL(&hsh6->head, (struct flow_entry *)fle6, fle_hash);
 
 	return (0);
 }
 #undef ipv6_masklen
 #undef RT_MASK6
 #endif
 
 
 /*
  * Non-static functions called from ng_netflow.c
  */
 
 /* Allocate memory and set up flow cache */
 void
 ng_netflow_cache_init(priv_p priv)
 {
 	struct flow_hash_entry *hsh;
 	int i;
 
 	/* Initialize cache UMA zone. */
 	priv->zone = uma_zcreate("NetFlow IPv4 cache",
 	    sizeof(struct flow_entry), NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_CACHE, 0);
 	uma_zone_set_max(priv->zone, CACHESIZE);
 #ifdef INET6	
 	priv->zone6 = uma_zcreate("NetFlow IPv6 cache",
 	    sizeof(struct flow6_entry), NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_CACHE, 0);
 	uma_zone_set_max(priv->zone6, CACHESIZE);
 #endif	
 
 	/* Allocate hash. */
 	priv->hash = malloc(NBUCKETS * sizeof(struct flow_hash_entry),
 	    M_NETFLOW_HASH, M_WAITOK | M_ZERO);
 
 	/* Initialize hash. */
 	for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++) {
 		mtx_init(&hsh->mtx, "hash mutex", NULL, MTX_DEF);
 		TAILQ_INIT(&hsh->head);
 	}
 
 #ifdef INET6
 	/* Allocate hash. */
 	priv->hash6 = malloc(NBUCKETS * sizeof(struct flow_hash_entry),
 	    M_NETFLOW_HASH, M_WAITOK | M_ZERO);
 
 	/* Initialize hash. */
 	for (i = 0, hsh = priv->hash6; i < NBUCKETS; i++, hsh++) {
 		mtx_init(&hsh->mtx, "hash mutex", NULL, MTX_DEF);
 		TAILQ_INIT(&hsh->head);
 	}
 #endif
 
 	priv->nfinfo_bytes = counter_u64_alloc(M_WAITOK);
 	priv->nfinfo_packets = counter_u64_alloc(M_WAITOK);
 	priv->nfinfo_bytes6 = counter_u64_alloc(M_WAITOK);
 	priv->nfinfo_packets6 = counter_u64_alloc(M_WAITOK);
 	priv->nfinfo_sbytes = counter_u64_alloc(M_WAITOK);
 	priv->nfinfo_spackets = counter_u64_alloc(M_WAITOK);
 	priv->nfinfo_sbytes6 = counter_u64_alloc(M_WAITOK);
 	priv->nfinfo_spackets6 = counter_u64_alloc(M_WAITOK);
 	priv->nfinfo_act_exp = counter_u64_alloc(M_WAITOK);
 	priv->nfinfo_inact_exp = counter_u64_alloc(M_WAITOK);
 
 	ng_netflow_v9_cache_init(priv);
 	CTR0(KTR_NET, "ng_netflow startup()");
 }
 
 /* Initialize new FIB table for v5 and v9 */
 int
 ng_netflow_fib_init(priv_p priv, int fib)
 {
 	fib_export_p	fe = priv_to_fib(priv, fib);
 
 	CTR1(KTR_NET, "ng_netflow(): fib init: %d", fib);
 
 	if (fe != NULL)
 		return (0);
 
 	if ((fe = malloc(sizeof(struct fib_export), M_NETGRAPH,
 	    M_NOWAIT | M_ZERO)) == NULL)
 		return (ENOMEM);
 
 	mtx_init(&fe->export_mtx, "export dgram lock", NULL, MTX_DEF);
 	mtx_init(&fe->export9_mtx, "export9 dgram lock", NULL, MTX_DEF);
 	fe->fib = fib;
 	fe->domain_id = fib;
 
 	if (atomic_cmpset_ptr((volatile uintptr_t *)&priv->fib_data[fib],
 	    (uintptr_t)NULL, (uintptr_t)fe) == 0) {
 		/* FIB already set up by other ISR */
 		CTR3(KTR_NET, "ng_netflow(): fib init: %d setup %p but got %p",
 		    fib, fe, priv_to_fib(priv, fib));
 		mtx_destroy(&fe->export_mtx);
 		mtx_destroy(&fe->export9_mtx);
 		free(fe, M_NETGRAPH);
 	} else {
 		/* Increase counter for statistics */
 		CTR3(KTR_NET, "ng_netflow(): fib %d setup to %p (%p)",
 		    fib, fe, priv_to_fib(priv, fib));
 		priv->nfinfo_alloc_fibs++;
 	}
 	
 	return (0);
 }
 
 /* Free all flow cache memory. Called from node close method. */
 void
 ng_netflow_cache_flush(priv_p priv)
 {
 	struct flow_entry	*fle, *fle1;
 	struct flow_hash_entry	*hsh;
 	struct netflow_export_item exp;
 	fib_export_p fe;
 	int i;
 
 	bzero(&exp, sizeof(exp));
 
 	/*
 	 * We are going to free probably billable data.
 	 * Expire everything before freeing it.
 	 * No locking is required since callout is already drained.
 	 */
 	for (hsh = priv->hash, i = 0; i < NBUCKETS; hsh++, i++)
 		TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) {
 			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 			fe = priv_to_fib(priv, fle->f.r.fib);
 			expire_flow(priv, fe, fle, NG_QUEUE);
 		}
 #ifdef INET6
 	for (hsh = priv->hash6, i = 0; i < NBUCKETS; hsh++, i++)
 		TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) {
 			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 			fe = priv_to_fib(priv, fle->f.r.fib);
 			expire_flow(priv, fe, fle, NG_QUEUE);
 		}
 #endif
 
 	uma_zdestroy(priv->zone);
 	/* Destroy hash mutexes. */
 	for (i = 0, hsh = priv->hash; i < NBUCKETS; i++, hsh++)
 		mtx_destroy(&hsh->mtx);
 
 	/* Free hash memory. */
 	if (priv->hash != NULL)
 		free(priv->hash, M_NETFLOW_HASH);
 #ifdef INET6
 	uma_zdestroy(priv->zone6);
 	/* Destroy hash mutexes. */
 	for (i = 0, hsh = priv->hash6; i < NBUCKETS; i++, hsh++)
 		mtx_destroy(&hsh->mtx);
 
 	/* Free hash memory. */
 	if (priv->hash6 != NULL)
 		free(priv->hash6, M_NETFLOW_HASH);
 #endif
 
 	for (i = 0; i < priv->maxfibs; i++) {
 		if ((fe = priv_to_fib(priv, i)) == NULL)
 			continue;
 
 		if (fe->exp.item != NULL)
 			export_send(priv, fe, fe->exp.item, NG_QUEUE);
 
 		if (fe->exp.item9 != NULL)
 			export9_send(priv, fe, fe->exp.item9,
 			    fe->exp.item9_opt, NG_QUEUE);
 
 		mtx_destroy(&fe->export_mtx);
 		mtx_destroy(&fe->export9_mtx);
 		free(fe, M_NETGRAPH);
 	}
 
 	counter_u64_free(priv->nfinfo_bytes);
 	counter_u64_free(priv->nfinfo_packets);
 	counter_u64_free(priv->nfinfo_bytes6);
 	counter_u64_free(priv->nfinfo_packets6);
 	counter_u64_free(priv->nfinfo_sbytes);
 	counter_u64_free(priv->nfinfo_spackets);
 	counter_u64_free(priv->nfinfo_sbytes6);
 	counter_u64_free(priv->nfinfo_spackets6);
 	counter_u64_free(priv->nfinfo_act_exp);
 	counter_u64_free(priv->nfinfo_inact_exp);
 
 	ng_netflow_v9_cache_flush(priv);
 }
 
 /* Insert packet from into flow cache. */
 int
 ng_netflow_flow_add(priv_p priv, fib_export_p fe, struct ip *ip,
     caddr_t upper_ptr, uint8_t upper_proto, uint8_t flags,
     unsigned int src_if_index)
 {
 	struct flow_entry	*fle, *fle1;
 	struct flow_hash_entry	*hsh;
 	struct flow_rec		r;
 	int			hlen, plen;
 	int			error = 0;
 	uint16_t		eproto;
 	uint8_t			tcp_flags = 0;
 
 	bzero(&r, sizeof(r));
 
 	if (ip->ip_v != IPVERSION)
 		return (EINVAL);
 
 	hlen = ip->ip_hl << 2;
 	if (hlen < sizeof(struct ip))
 		return (EINVAL);
 
 	eproto = ETHERTYPE_IP;
 	/* Assume L4 template by default */
 	r.flow_type = NETFLOW_V9_FLOW_V4_L4;
 
 	r.r_src = ip->ip_src;
 	r.r_dst = ip->ip_dst;
 	r.fib = fe->fib;
 
 	plen = ntohs(ip->ip_len);
 
 	r.r_ip_p = ip->ip_p;
 	r.r_tos = ip->ip_tos;
 
 	r.r_i_ifx = src_if_index;
 
 	/*
 	 * XXX NOTE: only first fragment of fragmented TCP, UDP and
 	 * ICMP packet will be recorded with proper s_port and d_port.
 	 * Following fragments will be recorded simply as IP packet with
 	 * ip_proto = ip->ip_p and s_port, d_port set to zero.
 	 * I know, it looks like bug. But I don't want to re-implement
 	 * ip packet assebmling here. Anyway, (in)famous trafd works this way -
 	 * and nobody complains yet :)
 	 */
 	if ((ip->ip_off & htons(IP_OFFMASK)) == 0)
 		switch(r.r_ip_p) {
 		case IPPROTO_TCP:
 		    {
 			struct tcphdr *tcp;
 
 			tcp = (struct tcphdr *)((caddr_t )ip + hlen);
 			r.r_sport = tcp->th_sport;
 			r.r_dport = tcp->th_dport;
 			tcp_flags = tcp->th_flags;
 			break;
 		    }
 		case IPPROTO_UDP:
 			r.r_ports = *(uint32_t *)((caddr_t )ip + hlen);
 			break;
 		}
 
 	counter_u64_add(priv->nfinfo_packets, 1);
 	counter_u64_add(priv->nfinfo_bytes, plen);
 
 	/* Find hash slot. */
 	hsh = &priv->hash[ip_hash(&r)];
 
 	mtx_lock(&hsh->mtx);
 
 	/*
 	 * Go through hash and find our entry. If we encounter an
 	 * entry, that should be expired, purge it. We do a reverse
 	 * search since most active entries are first, and most
 	 * searches are done on most active entries.
 	 */
 	TAILQ_FOREACH_REVERSE_SAFE(fle, &hsh->head, fhead, fle_hash, fle1) {
 		if (bcmp(&r, &fle->f.r, sizeof(struct flow_rec)) == 0)
 			break;
 		if ((INACTIVE(fle) && SMALL(fle)) || AGED(fle)) {
 			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 			expire_flow(priv, priv_to_fib(priv, fle->f.r.fib),
 			    fle, NG_QUEUE);
 			counter_u64_add(priv->nfinfo_act_exp, 1);
 		}
 	}
 
 	if (fle) {			/* An existent entry. */
 
 		fle->f.bytes += plen;
 		fle->f.packets ++;
 		fle->f.tcp_flags |= tcp_flags;
 		fle->f.last = time_uptime;
 
 		/*
 		 * We have the following reasons to expire flow in active way:
 		 * - it hit active timeout
 		 * - a TCP connection closed
 		 * - it is going to overflow counter
 		 */
 		if (tcp_flags & TH_FIN || tcp_flags & TH_RST || AGED(fle) ||
 		    (fle->f.bytes >= (CNTR_MAX - IF_MAXMTU)) ) {
 			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 			expire_flow(priv, priv_to_fib(priv, fle->f.r.fib),
 			    fle, NG_QUEUE);
 			counter_u64_add(priv->nfinfo_act_exp, 1);
 		} else {
 			/*
 			 * It is the newest, move it to the tail,
 			 * if it isn't there already. Next search will
 			 * locate it quicker.
 			 */
 			if (fle != TAILQ_LAST(&hsh->head, fhead)) {
 				TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 				TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash);
 			}
 		}
 	} else				/* A new flow entry. */
 		error = hash_insert(priv, hsh, &r, plen, flags, tcp_flags);
 
 	mtx_unlock(&hsh->mtx);
 
 	return (error);
 }
 
 #ifdef INET6
 /* Insert IPv6 packet from into flow cache. */
 int
 ng_netflow_flow6_add(priv_p priv, fib_export_p fe, struct ip6_hdr *ip6,
     caddr_t upper_ptr, uint8_t upper_proto, uint8_t flags,
     unsigned int src_if_index)
 {
 	struct flow_entry	*fle = NULL, *fle1;
 	struct flow6_entry	*fle6;
 	struct flow_hash_entry	*hsh;
 	struct flow6_rec	r;
 	int			plen;
 	int			error = 0;
 	uint8_t			tcp_flags = 0;
 
 	/* check version */
 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION)
 		return (EINVAL);
 
 	bzero(&r, sizeof(r));
 
 	r.src.r_src6 = ip6->ip6_src;
 	r.dst.r_dst6 = ip6->ip6_dst;
 	r.fib = fe->fib;
 
 	/* Assume L4 template by default */
 	r.flow_type = NETFLOW_V9_FLOW_V6_L4;
 
 	plen = ntohs(ip6->ip6_plen) + sizeof(struct ip6_hdr);
 
 #if 0
 	/* XXX: set DSCP/CoS value */
 	r.r_tos = ip->ip_tos;
 #endif
 	if ((flags & NG_NETFLOW_IS_FRAG) == 0) {
 		switch(upper_proto) {
 		case IPPROTO_TCP:
 		    {
 			struct tcphdr *tcp;
 
 			tcp = (struct tcphdr *)upper_ptr;
 			r.r_ports = *(uint32_t *)upper_ptr;
 			tcp_flags = tcp->th_flags;
 			break;
 		    }
  		case IPPROTO_UDP:
 		case IPPROTO_SCTP:
 			r.r_ports = *(uint32_t *)upper_ptr;
 			break;
 		}
 	}	
 
 	r.r_ip_p = upper_proto;
 	r.r_i_ifx = src_if_index;
  
 	counter_u64_add(priv->nfinfo_packets6, 1);
 	counter_u64_add(priv->nfinfo_bytes6, plen);
 
 	/* Find hash slot. */
 	hsh = &priv->hash6[ip6_hash(&r)];
 
 	mtx_lock(&hsh->mtx);
 
 	/*
 	 * Go through hash and find our entry. If we encounter an
 	 * entry, that should be expired, purge it. We do a reverse
 	 * search since most active entries are first, and most
 	 * searches are done on most active entries.
 	 */
 	TAILQ_FOREACH_REVERSE_SAFE(fle, &hsh->head, fhead, fle_hash, fle1) {
 		if (fle->f.version != IP6VERSION)
 			continue;
 		fle6 = (struct flow6_entry *)fle;
 		if (bcmp(&r, &fle6->f.r, sizeof(struct flow6_rec)) == 0)
 			break;
 		if ((INACTIVE(fle6) && SMALL(fle6)) || AGED(fle6)) {
 			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 			expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle,
 			    NG_QUEUE);
 			counter_u64_add(priv->nfinfo_act_exp, 1);
 		}
 	}
 
 	if (fle != NULL) {			/* An existent entry. */
 		fle6 = (struct flow6_entry *)fle;
 
 		fle6->f.bytes += plen;
 		fle6->f.packets ++;
 		fle6->f.tcp_flags |= tcp_flags;
 		fle6->f.last = time_uptime;
 
 		/*
 		 * We have the following reasons to expire flow in active way:
 		 * - it hit active timeout
 		 * - a TCP connection closed
 		 * - it is going to overflow counter
 		 */
 		if (tcp_flags & TH_FIN || tcp_flags & TH_RST || AGED(fle6) ||
 		    (fle6->f.bytes >= (CNTR_MAX - IF_MAXMTU)) ) {
 			TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 			expire_flow(priv, priv_to_fib(priv, fle->f.r.fib), fle,
 			    NG_QUEUE);
 			counter_u64_add(priv->nfinfo_act_exp, 1);
 		} else {
 			/*
 			 * It is the newest, move it to the tail,
 			 * if it isn't there already. Next search will
 			 * locate it quicker.
 			 */
 			if (fle != TAILQ_LAST(&hsh->head, fhead)) {
 				TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 				TAILQ_INSERT_TAIL(&hsh->head, fle, fle_hash);
 			}
 		}
 	} else				/* A new flow entry. */
 		error = hash6_insert(priv, hsh, &r, plen, flags, tcp_flags);
 
 	mtx_unlock(&hsh->mtx);
 
 	return (error);
 }
 #endif
 
 /*
  * Return records from cache to userland.
  *
  * TODO: matching particular IP should be done in kernel, here.
  */
 int
 ng_netflow_flow_show(priv_p priv, struct ngnf_show_header *req,
 struct ngnf_show_header *resp)
 {
 	struct flow_hash_entry	*hsh;
 	struct flow_entry	*fle;
 	struct flow_entry_data	*data = (struct flow_entry_data *)(resp + 1);
 #ifdef INET6
 	struct flow6_entry_data	*data6 = (struct flow6_entry_data *)(resp + 1);
 #endif
 	int	i, max;
 
 	i = req->hash_id;
 	if (i > NBUCKETS-1)
 		return (EINVAL);
 
 #ifdef INET6
 	if (req->version == 6) {
 		resp->version = 6;
 		hsh = priv->hash6 + i;
 		max = NREC6_AT_ONCE;
 	} else
 #endif
 	if (req->version == 4) {
 		resp->version = 4;
 		hsh = priv->hash + i;
 		max = NREC_AT_ONCE;
 	} else
 		return (EINVAL);
 
 	/*
 	 * We will transfer not more than NREC_AT_ONCE. More data
 	 * will come in next message.
 	 * We send current hash index and current record number in list 
 	 * to userland, and userland should return it back to us. 
 	 * Then, we will restart with new entry.
 	 *
 	 * The resulting cache snapshot can be inaccurate if flow expiration
 	 * is taking place on hash item between userland data requests for 
 	 * this hash item id.
 	 */
 	resp->nentries = 0;
 	for (; i < NBUCKETS; hsh++, i++) {
 		int list_id;
 
 		if (mtx_trylock(&hsh->mtx) == 0) {
 			/* 
 			 * Requested hash index is not available,
 			 * relay decision to skip or re-request data
 			 * to userland.
 			 */
 			resp->hash_id = i;
 			resp->list_id = 0;
 			return (0);
 		}
 
 		list_id = 0;
 		TAILQ_FOREACH(fle, &hsh->head, fle_hash) {
 			if (hsh->mtx.mtx_lock & MTX_CONTESTED) {
 				resp->hash_id = i;
 				resp->list_id = list_id;
 				mtx_unlock(&hsh->mtx);
 				return (0);
 			}
 
 			list_id++;
 			/* Search for particular record in list. */
 			if (req->list_id > 0) {
 				if (list_id < req->list_id)
 					continue;
 
 				/* Requested list position found. */
 				req->list_id = 0;
 			}
 #ifdef INET6
 			if (req->version == 6) {
 				struct flow6_entry *fle6;
 
 				fle6 = (struct flow6_entry *)fle;
 				bcopy(&fle6->f, data6 + resp->nentries,
 				    sizeof(fle6->f));
 			} else
 #endif
 				bcopy(&fle->f, data + resp->nentries,
 				    sizeof(fle->f));
 			resp->nentries++;
 			if (resp->nentries == max) {
 				resp->hash_id = i;
 				/* 
 				 * If it was the last item in list
 				 * we simply skip to next hash_id.
 				 */
 				resp->list_id = list_id + 1;
 				mtx_unlock(&hsh->mtx);
 				return (0);
 			}
 		}
 		mtx_unlock(&hsh->mtx);
 	}
 
 	resp->hash_id = resp->list_id = 0;
 
 	return (0);
 }
 
 /* We have full datagram in privdata. Send it to export hook. */
 static int
 export_send(priv_p priv, fib_export_p fe, item_p item, int flags)
 {
 	struct mbuf *m = NGI_M(item);
 	struct netflow_v5_export_dgram *dgram = mtod(m,
 					struct netflow_v5_export_dgram *);
 	struct netflow_v5_header *header = &dgram->header;
 	struct timespec ts;
 	int error = 0;
 
 	/* Fill mbuf header. */
 	m->m_len = m->m_pkthdr.len = sizeof(struct netflow_v5_record) *
 	   header->count + sizeof(struct netflow_v5_header);
 
 	/* Fill export header. */
 	header->sys_uptime = htonl(MILLIUPTIME(time_uptime));
 	getnanotime(&ts);
 	header->unix_secs  = htonl(ts.tv_sec);
 	header->unix_nsecs = htonl(ts.tv_nsec);
 	header->engine_type = 0;
 	header->engine_id = fe->domain_id;
 	header->pad = 0;
 	header->flow_seq = htonl(atomic_fetchadd_32(&fe->flow_seq,
 	    header->count));
 	header->count = htons(header->count);
 
 	if (priv->export != NULL)
 		NG_FWD_ITEM_HOOK_FLAGS(error, item, priv->export, flags);
 	else
 		NG_FREE_ITEM(item);
 
 	return (error);
 }
 
 
 /* Add export record to dgram. */
 static int
 export_add(item_p item, struct flow_entry *fle)
 {
 	struct netflow_v5_export_dgram *dgram = mtod(NGI_M(item),
 					struct netflow_v5_export_dgram *);
 	struct netflow_v5_header *header = &dgram->header;
 	struct netflow_v5_record *rec;
 
 	rec = &dgram->r[header->count];
 	header->count ++;
 
 	KASSERT(header->count <= NETFLOW_V5_MAX_RECORDS,
 	    ("ng_netflow: export too big"));
 
 	/* Fill in export record. */
 	rec->src_addr = fle->f.r.r_src.s_addr;
 	rec->dst_addr = fle->f.r.r_dst.s_addr;
 	rec->next_hop = fle->f.next_hop.s_addr;
 	rec->i_ifx    = htons(fle->f.fle_i_ifx);
 	rec->o_ifx    = htons(fle->f.fle_o_ifx);
 	rec->packets  = htonl(fle->f.packets);
 	rec->octets   = htonl(fle->f.bytes);
 	rec->first    = htonl(MILLIUPTIME(fle->f.first));
 	rec->last     = htonl(MILLIUPTIME(fle->f.last));
 	rec->s_port   = fle->f.r.r_sport;
 	rec->d_port   = fle->f.r.r_dport;
 	rec->flags    = fle->f.tcp_flags;
 	rec->prot     = fle->f.r.r_ip_p;
 	rec->tos      = fle->f.r.r_tos;
 	rec->dst_mask = fle->f.dst_mask;
 	rec->src_mask = fle->f.src_mask;
 	rec->pad1     = 0;
 	rec->pad2     = 0;
 
 	/* Not supported fields. */
 	rec->src_as = rec->dst_as = 0;
 
 	if (header->count == NETFLOW_V5_MAX_RECORDS)
 		return (1); /* end of datagram */
 	else
 		return (0);	
 }
 
 /* Periodic flow expiry run. */
 void
 ng_netflow_expire(void *arg)
 {
 	struct flow_entry	*fle, *fle1;
 	struct flow_hash_entry	*hsh;
 	priv_p			priv = (priv_p )arg;
 	int			used, i;
 
 	/*
 	 * Going through all the cache.
 	 */
 	used = uma_zone_get_cur(priv->zone);
 	for (hsh = priv->hash, i = 0; i < NBUCKETS; hsh++, i++) {
 		/*
 		 * Skip entries, that are already being worked on.
 		 */
 		if (mtx_trylock(&hsh->mtx) == 0)
 			continue;
 
 		TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) {
 			/*
 			 * Interrupt thread wants this entry!
 			 * Quick! Quick! Bail out!
 			 */
 			if (hsh->mtx.mtx_lock & MTX_CONTESTED)
 				break;
 
 			/*
 			 * Don't expire aggressively while hash collision
 			 * ratio is predicted small.
 			 */
 			if (used <= (NBUCKETS*2) && !INACTIVE(fle))
 				break;
 
 			if ((INACTIVE(fle) && (SMALL(fle) ||
 			    (used > (NBUCKETS*2)))) || AGED(fle)) {
 				TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 				expire_flow(priv, priv_to_fib(priv,
 				    fle->f.r.fib), fle, NG_NOFLAGS);
 				used--;
 				counter_u64_add(priv->nfinfo_inact_exp, 1);
 			}
 		}
 		mtx_unlock(&hsh->mtx);
 	}
 
 #ifdef INET6
 	used = uma_zone_get_cur(priv->zone6);
 	for (hsh = priv->hash6, i = 0; i < NBUCKETS; hsh++, i++) {
 		struct flow6_entry	*fle6;
 
 		/*
 		 * Skip entries, that are already being worked on.
 		 */
 		if (mtx_trylock(&hsh->mtx) == 0)
 			continue;
 
 		TAILQ_FOREACH_SAFE(fle, &hsh->head, fle_hash, fle1) {
 			fle6 = (struct flow6_entry *)fle;
 			/*
 			 * Interrupt thread wants this entry!
 			 * Quick! Quick! Bail out!
 			 */
 			if (hsh->mtx.mtx_lock & MTX_CONTESTED)
 				break;
 
 			/*
 			 * Don't expire aggressively while hash collision
 			 * ratio is predicted small.
 			 */
 			if (used <= (NBUCKETS*2) && !INACTIVE(fle6))
 				break;
 
 			if ((INACTIVE(fle6) && (SMALL(fle6) ||
 			    (used > (NBUCKETS*2)))) || AGED(fle6)) {
 				TAILQ_REMOVE(&hsh->head, fle, fle_hash);
 				expire_flow(priv, priv_to_fib(priv,
 				    fle->f.r.fib), fle, NG_NOFLAGS);
 				used--;
 				counter_u64_add(priv->nfinfo_inact_exp, 1);
 			}
 		}
 		mtx_unlock(&hsh->mtx);
 	}
 #endif
 
 	/* Schedule next expire. */
 	callout_reset(&priv->exp_callout, (1*hz), &ng_netflow_expire,
 	    (void *)priv);
 }
Index: projects/release-pkg/sys/netinet/tcp_input.c
===================================================================
--- projects/release-pkg/sys/netinet/tcp_input.c	(revision 293335)
+++ projects/release-pkg/sys/netinet/tcp_input.c	(revision 293336)
@@ -1,3841 +1,3827 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
  *	The Regents of the University of California.  All rights reserved.
  * Copyright (c) 2007-2008,2010
  *	Swinburne University of Technology, Melbourne, Australia.
  * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
  * Copyright (c) 2010 The FreeBSD Foundation
  * Copyright (c) 2010-2011 Juniper Networks, Inc.
  * All rights reserved.
  *
  * Portions of this software were developed at the Centre for Advanced Internet
  * Architectures, Swinburne University of Technology, by Lawrence Stewart,
  * James Healy and David Hayes, made possible in part by a grant from the Cisco
  * University Research Program Fund at Community Foundation Silicon Valley.
  *
  * Portions of this software were developed at the Centre for Advanced
  * Internet Architectures, Swinburne University of Technology, Melbourne,
  * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
  *
  * Portions of this software were developed by Robert N. M. Watson under
  * contract to Juniper Networks, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)tcp_input.c	8.12 (Berkeley) 5/24/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ipfw.h"		/* for ipfw_fwd	*/
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_tcpdebug.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/hhook.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>		/* for proc0 declaration */
 #include <sys/protosw.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 
 #include <machine/cpu.h>	/* before tcp_seq.h, for tcp_random18() */
 
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #define TCPSTATES		/* for logging */
 
 #include <netinet/cc.h>
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>	/* required for icmp_var.h */
 #include <netinet/icmp_var.h>	/* for ICMP_BANDLIM */
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
 #ifdef TCP_RFC7413
 #include <netinet/tcp_fastopen.h>
 #endif
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet6/tcp6_var.h>
 #include <netinet/tcpip.h>
 #ifdef TCPPCAP
 #include <netinet/tcp_pcap.h>
 #endif
 #include <netinet/tcp_syncache.h>
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif /* TCPDEBUG */
 #ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
 #endif
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #endif /*IPSEC*/
 
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
 const int tcprexmtthresh = 3;
 
 int tcp_log_in_vain = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_in_vain, CTLFLAG_RW,
     &tcp_log_in_vain, 0,
     "Log all incoming TCP segments to closed ports");
 
 VNET_DEFINE(int, blackhole) = 0;
 #define	V_blackhole		VNET(blackhole)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, blackhole, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(blackhole), 0,
     "Do not send RST on segments to closed ports");
 
 VNET_DEFINE(int, tcp_delack_enabled) = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, delayed_ack, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_delack_enabled), 0,
     "Delay ACK to try and piggyback it onto a data packet");
 
 VNET_DEFINE(int, drop_synfin) = 0;
 #define	V_drop_synfin		VNET(drop_synfin)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(drop_synfin), 0,
     "Drop TCP packets with SYN+FIN set");
 
 VNET_DEFINE(int, tcp_do_rfc6675_pipe) = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc6675_pipe, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_rfc6675_pipe), 0,
     "Use calculated pipe/in-flight bytes per RFC 6675");
 
 VNET_DEFINE(int, tcp_do_rfc3042) = 1;
 #define	V_tcp_do_rfc3042	VNET(tcp_do_rfc3042)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3042, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_rfc3042), 0,
     "Enable RFC 3042 (Limited Transmit)");
 
 VNET_DEFINE(int, tcp_do_rfc3390) = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3390, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_rfc3390), 0,
     "Enable RFC 3390 (Increasing TCP's Initial Congestion Window)");
 
 VNET_DEFINE(int, tcp_initcwnd_segments) = 10;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, initcwnd_segments,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_initcwnd_segments), 0,
     "Slow-start flight size (initial congestion window) in number of segments");
 
 VNET_DEFINE(int, tcp_do_rfc3465) = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3465, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_rfc3465), 0,
     "Enable RFC 3465 (Appropriate Byte Counting)");
 
 VNET_DEFINE(int, tcp_abc_l_var) = 2;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, abc_l_var, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_abc_l_var), 2,
     "Cap the max cwnd increment during slow-start to this number of segments");
 
 static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, ecn, CTLFLAG_RW, 0, "TCP ECN");
 
 VNET_DEFINE(int, tcp_do_ecn) = 0;
 SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, enable, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_ecn), 0,
     "TCP ECN support");
 
 VNET_DEFINE(int, tcp_ecn_maxretries) = 1;
 SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, maxretries, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_ecn_maxretries), 0,
     "Max retries before giving up on ECN");
 
 VNET_DEFINE(int, tcp_insecure_syn) = 0;
 #define	V_tcp_insecure_syn	VNET(tcp_insecure_syn)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, insecure_syn, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_insecure_syn), 0,
     "Follow RFC793 instead of RFC5961 criteria for accepting SYN packets");
 
 VNET_DEFINE(int, tcp_insecure_rst) = 0;
 #define	V_tcp_insecure_rst	VNET(tcp_insecure_rst)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, insecure_rst, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_insecure_rst), 0,
     "Follow RFC793 instead of RFC5961 criteria for accepting RST packets");
 
 VNET_DEFINE(int, tcp_recvspace) = 1024*64;
 #define	V_tcp_recvspace	VNET(tcp_recvspace)
 SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_recvspace), 0, "Initial receive socket buffer size");
 
 VNET_DEFINE(int, tcp_do_autorcvbuf) = 1;
 #define	V_tcp_do_autorcvbuf	VNET(tcp_do_autorcvbuf)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_auto, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_autorcvbuf), 0,
     "Enable automatic receive buffer sizing");
 
 VNET_DEFINE(int, tcp_autorcvbuf_inc) = 16*1024;
 #define	V_tcp_autorcvbuf_inc	VNET(tcp_autorcvbuf_inc)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_inc, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_autorcvbuf_inc), 0,
     "Incrementor step size of automatic receive buffer");
 
 VNET_DEFINE(int, tcp_autorcvbuf_max) = 2*1024*1024;
 #define	V_tcp_autorcvbuf_max	VNET(tcp_autorcvbuf_max)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_autorcvbuf_max), 0,
     "Max size of automatic receive buffer");
 
 VNET_DEFINE(struct inpcbhead, tcb);
 #define	tcb6	tcb  /* for KAME src sync over BSD*'s */
 VNET_DEFINE(struct inpcbinfo, tcbinfo);
 
 /*
  * TCP statistics are stored in an "array" of counter(9)s.
  */
 VNET_PCPUSTAT_DEFINE(struct tcpstat, tcpstat);
 VNET_PCPUSTAT_SYSINIT(tcpstat);
 SYSCTL_VNET_PCPUSTAT(_net_inet_tcp, TCPCTL_STATS, stats, struct tcpstat,
     tcpstat, "TCP statistics (struct tcpstat, netinet/tcp_var.h)");
 
 #ifdef VIMAGE
 VNET_PCPUSTAT_SYSUNINIT(tcpstat);
 #endif /* VIMAGE */
 /*
  * Kernel module interface for updating tcpstat.  The argument is an index
  * into tcpstat treated as an array.
  */
 void
 kmod_tcpstat_inc(int statnum)
 {
 
 	counter_u64_add(VNET(tcpstat)[statnum], 1);
 }
 
 /*
  * Wrapper for the TCP established input helper hook.
  */
 void
 hhook_run_tcp_est_in(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to)
 {
 	struct tcp_hhook_data hhook_data;
 
 	if (V_tcp_hhh[HHOOK_TCP_EST_IN]->hhh_nhooks > 0) {
 		hhook_data.tp = tp;
 		hhook_data.th = th;
 		hhook_data.to = to;
 
 		hhook_run_hooks(V_tcp_hhh[HHOOK_TCP_EST_IN], &hhook_data,
 		    tp->osd);
 	}
 }
 
 /*
  * CC wrapper hook functions
  */
 void
 cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t type)
 {
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	tp->ccv->bytes_this_ack = BYTES_THIS_ACK(tp, th);
 	if (tp->snd_cwnd <= tp->snd_wnd)
 		tp->ccv->flags |= CCF_CWND_LIMITED;
 	else
 		tp->ccv->flags &= ~CCF_CWND_LIMITED;
 
 	if (type == CC_ACK) {
 		if (tp->snd_cwnd > tp->snd_ssthresh) {
 			tp->t_bytes_acked += min(tp->ccv->bytes_this_ack,
-			     V_tcp_abc_l_var * tp->t_maxseg);
+			     V_tcp_abc_l_var * tcp_maxseg(tp));
 			if (tp->t_bytes_acked >= tp->snd_cwnd) {
 				tp->t_bytes_acked -= tp->snd_cwnd;
 				tp->ccv->flags |= CCF_ABC_SENTAWND;
 			}
 		} else {
 				tp->ccv->flags &= ~CCF_ABC_SENTAWND;
 				tp->t_bytes_acked = 0;
 		}
 	}
 
 	if (CC_ALGO(tp)->ack_received != NULL) {
 		/* XXXLAS: Find a way to live without this */
 		tp->ccv->curack = th->th_ack;
 		CC_ALGO(tp)->ack_received(tp->ccv, type);
 	}
 }
 
 void 
 cc_conn_init(struct tcpcb *tp)
 {
 	struct hc_metrics_lite metrics;
 	struct inpcb *inp = tp->t_inpcb;
+	u_int maxseg;
 	int rtt;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	tcp_hc_get(&inp->inp_inc, &metrics);
+	maxseg = tcp_maxseg(tp);
 
 	if (tp->t_srtt == 0 && (rtt = metrics.rmx_rtt)) {
 		tp->t_srtt = rtt;
 		tp->t_rttbest = tp->t_srtt + TCP_RTT_SCALE;
 		TCPSTAT_INC(tcps_usedrtt);
 		if (metrics.rmx_rttvar) {
 			tp->t_rttvar = metrics.rmx_rttvar;
 			TCPSTAT_INC(tcps_usedrttvar);
 		} else {
 			/* default variation is +- 1 rtt */
 			tp->t_rttvar =
 			    tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
 		}
 		TCPT_RANGESET(tp->t_rxtcur,
 		    ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
 		    tp->t_rttmin, TCPTV_REXMTMAX);
 	}
 	if (metrics.rmx_ssthresh) {
 		/*
 		 * There's some sort of gateway or interface
 		 * buffer limit on the path.  Use this to set
 		 * the slow start threshhold, but set the
 		 * threshold to no less than 2*mss.
 		 */
-		tp->snd_ssthresh = max(2 * tp->t_maxseg, metrics.rmx_ssthresh);
+		tp->snd_ssthresh = max(2 * maxseg, metrics.rmx_ssthresh);
 		TCPSTAT_INC(tcps_usedssthresh);
 	}
 
 	/*
 	 * Set the initial slow-start flight size.
 	 *
 	 * RFC5681 Section 3.1 specifies the default conservative values.
 	 * RFC3390 specifies slightly more aggressive values.
 	 * RFC6928 increases it to ten segments.
 	 * Support for user specified value for initial flight size.
 	 *
 	 * If a SYN or SYN/ACK was lost and retransmitted, we have to
 	 * reduce the initial CWND to one segment as congestion is likely
 	 * requiring us to be cautious.
 	 */
 	if (tp->snd_cwnd == 1)
-		tp->snd_cwnd = tp->t_maxseg;		/* SYN(-ACK) lost */
+		tp->snd_cwnd = maxseg;		/* SYN(-ACK) lost */
 	else if (V_tcp_initcwnd_segments)
-		tp->snd_cwnd = min(V_tcp_initcwnd_segments * tp->t_maxseg,
-		    max(2 * tp->t_maxseg, V_tcp_initcwnd_segments * 1460));
+		tp->snd_cwnd = min(V_tcp_initcwnd_segments * maxseg,
+		    max(2 * maxseg, V_tcp_initcwnd_segments * 1460));
 	else if (V_tcp_do_rfc3390)
-		tp->snd_cwnd = min(4 * tp->t_maxseg,
-		    max(2 * tp->t_maxseg, 4380));
+		tp->snd_cwnd = min(4 * maxseg, max(2 * maxseg, 4380));
 	else {
 		/* Per RFC5681 Section 3.1 */
-		if (tp->t_maxseg > 2190)
-			tp->snd_cwnd = 2 * tp->t_maxseg;
-		else if (tp->t_maxseg > 1095)
-			tp->snd_cwnd = 3 * tp->t_maxseg;
+		if (maxseg > 2190)
+			tp->snd_cwnd = 2 * maxseg;
+		else if (maxseg > 1095)
+			tp->snd_cwnd = 3 * maxseg;
 		else
-			tp->snd_cwnd = 4 * tp->t_maxseg;
+			tp->snd_cwnd = 4 * maxseg;
 	}
 
 	if (CC_ALGO(tp)->conn_init != NULL)
 		CC_ALGO(tp)->conn_init(tp->ccv);
 }
 
 void inline
 cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type)
 {
+	u_int maxseg;
+
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	switch(type) {
 	case CC_NDUPACK:
 		if (!IN_FASTRECOVERY(tp->t_flags)) {
 			tp->snd_recover = tp->snd_max;
 			if (tp->t_flags & TF_ECN_PERMIT)
 				tp->t_flags |= TF_ECN_SND_CWR;
 		}
 		break;
 	case CC_ECN:
 		if (!IN_CONGRECOVERY(tp->t_flags)) {
 			TCPSTAT_INC(tcps_ecn_rcwnd);
 			tp->snd_recover = tp->snd_max;
 			if (tp->t_flags & TF_ECN_PERMIT)
 				tp->t_flags |= TF_ECN_SND_CWR;
 		}
 		break;
 	case CC_RTO:
+		maxseg = tcp_maxseg(tp);
 		tp->t_dupacks = 0;
 		tp->t_bytes_acked = 0;
 		EXIT_RECOVERY(tp->t_flags);
 		tp->snd_ssthresh = max(2, min(tp->snd_wnd, tp->snd_cwnd) / 2 /
-		    tp->t_maxseg) * tp->t_maxseg;
-		tp->snd_cwnd = tp->t_maxseg;
+		    maxseg) * maxseg;
+		tp->snd_cwnd = maxseg;
 		break;
 	case CC_RTO_ERR:
 		TCPSTAT_INC(tcps_sndrexmitbad);
 		/* RTO was unnecessary, so reset everything. */
 		tp->snd_cwnd = tp->snd_cwnd_prev;
 		tp->snd_ssthresh = tp->snd_ssthresh_prev;
 		tp->snd_recover = tp->snd_recover_prev;
 		if (tp->t_flags & TF_WASFRECOVERY)
 			ENTER_FASTRECOVERY(tp->t_flags);
 		if (tp->t_flags & TF_WASCRECOVERY)
 			ENTER_CONGRECOVERY(tp->t_flags);
 		tp->snd_nxt = tp->snd_max;
 		tp->t_flags &= ~TF_PREVVALID;
 		tp->t_badrxtwin = 0;
 		break;
 	}
 
 	if (CC_ALGO(tp)->cong_signal != NULL) {
 		if (th != NULL)
 			tp->ccv->curack = th->th_ack;
 		CC_ALGO(tp)->cong_signal(tp->ccv, type);
 	}
 }
 
 void inline
 cc_post_recovery(struct tcpcb *tp, struct tcphdr *th)
 {
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	/* XXXLAS: KASSERT that we're in recovery? */
 
 	if (CC_ALGO(tp)->post_recovery != NULL) {
 		tp->ccv->curack = th->th_ack;
 		CC_ALGO(tp)->post_recovery(tp->ccv);
 	}
 	/* XXXLAS: EXIT_RECOVERY ? */
 	tp->t_bytes_acked = 0;
 }
 
 #ifdef TCP_SIGNATURE
 static inline int
 tcp_signature_verify_input(struct mbuf *m, int off0, int tlen, int optlen,
     struct tcpopt *to, struct tcphdr *th, u_int tcpbflag)
 {
 	int ret;
 
 	tcp_fields_to_net(th);
 	ret = tcp_signature_verify(m, off0, tlen, optlen, to, th, tcpbflag);
 	tcp_fields_to_host(th);
 	return (ret);
 }
 #endif
 
 /*
  * Indicate whether this ack should be delayed.  We can delay the ack if
  * following conditions are met:
  *	- There is no delayed ack timer in progress.
  *	- Our last ack wasn't a 0-sized window. We never want to delay
  *	  the ack that opens up a 0-sized window.
  *	- LRO wasn't used for this segment. We make sure by checking that the
  *	  segment size is not larger than the MSS.
- *	- Delayed acks are enabled or this is a half-synchronized T/TCP
- *	  connection.
  */
 #define DELAY_ACK(tp, tlen)						\
 	((!tcp_timer_active(tp, TT_DELACK) &&				\
 	    (tp->t_flags & TF_RXWIN0SENT) == 0) &&			\
-	    (tlen <= tp->t_maxopd) &&					\
+	    (tlen <= tp->t_maxseg) &&					\
 	    (V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
 
 static void inline
 cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos)
 {
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	if (CC_ALGO(tp)->ecnpkt_handler != NULL) {
 		switch (iptos & IPTOS_ECN_MASK) {
 		case IPTOS_ECN_CE:
 		    tp->ccv->flags |= CCF_IPHDR_CE;
 		    break;
 		case IPTOS_ECN_ECT0:
 		    tp->ccv->flags &= ~CCF_IPHDR_CE;
 		    break;
 		case IPTOS_ECN_ECT1:
 		    tp->ccv->flags &= ~CCF_IPHDR_CE;
 		    break;
 		}
 
 		if (th->th_flags & TH_CWR)
 			tp->ccv->flags |= CCF_TCPHDR_CWR;
 		else
 			tp->ccv->flags &= ~CCF_TCPHDR_CWR;
 
 		if (tp->t_flags & TF_DELACK)
 			tp->ccv->flags |= CCF_DELACK;
 		else
 			tp->ccv->flags &= ~CCF_DELACK;
 
 		CC_ALGO(tp)->ecnpkt_handler(tp->ccv);
 
 		if (tp->ccv->flags & CCF_ACKNOW)
 			tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
 	}
 }
 
 /*
  * TCP input handling is split into multiple parts:
  *   tcp6_input is a thin wrapper around tcp_input for the extended
  *	ip6_protox[] call format in ip6_input
  *   tcp_input handles primary segment validation, inpcb lookup and
  *	SYN processing on listen sockets
  *   tcp_do_segment processes the ACK and text of the segment for
  *	establishing, established and closing connections
  */
 #ifdef INET6
 int
 tcp6_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp;
 	struct in6_ifaddr *ia6;
 	struct ip6_hdr *ip6;
 
 	IP6_EXTHDR_CHECK(m, *offp, sizeof(struct tcphdr), IPPROTO_DONE);
 
 	/*
 	 * draft-itojun-ipv6-tcp-to-anycast
 	 * better place to put this in?
 	 */
 	ip6 = mtod(m, struct ip6_hdr *);
 	ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
 	if (ia6 && (ia6->ia6_flags & IN6_IFF_ANYCAST)) {
 		struct ip6_hdr *ip6;
 
 		ifa_free(&ia6->ia_ifa);
 		ip6 = mtod(m, struct ip6_hdr *);
 		icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR,
 			    (caddr_t)&ip6->ip6_dst - (caddr_t)ip6);
 		return (IPPROTO_DONE);
 	}
 	if (ia6)
 		ifa_free(&ia6->ia_ifa);
 
 	return (tcp_input(mp, offp, proto));
 }
 #endif /* INET6 */
 
 int
 tcp_input(struct mbuf **mp, int *offp, int proto)
 {
 	struct mbuf *m = *mp;
 	struct tcphdr *th = NULL;
 	struct ip *ip = NULL;
 	struct inpcb *inp = NULL;
 	struct tcpcb *tp = NULL;
 	struct socket *so = NULL;
 	u_char *optp = NULL;
 	int off0;
 	int optlen = 0;
 #ifdef INET
 	int len;
 #endif
 	int tlen = 0, off;
 	int drop_hdrlen;
 	int thflags;
 	int rstreason = 0;	/* For badport_bandlim accounting purposes */
 #ifdef TCP_SIGNATURE
 	uint8_t sig_checked = 0;
 #endif
 	uint8_t iptos = 0;
 	struct m_tag *fwd_tag = NULL;
 #ifdef INET6
 	struct ip6_hdr *ip6 = NULL;
 	int isipv6;
 #else
 	const void *ip6 = NULL;
 #endif /* INET6 */
 	struct tcpopt to;		/* options in this segment */
 	char *s = NULL;			/* address and port logging */
 	int ti_locked;
 #ifdef TCPDEBUG
 	/*
 	 * The size of tcp_saveipgen must be the size of the max ip header,
 	 * now IPv6.
 	 */
 	u_char tcp_saveipgen[IP6_HDR_LEN];
 	struct tcphdr tcp_savetcp;
 	short ostate = 0;
 #endif
 
 #ifdef INET6
 	isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
 #endif
 
 	off0 = *offp;
 	m = *mp;
 	*mp = NULL;
 	to.to_flags = 0;
 	TCPSTAT_INC(tcps_rcvtotal);
 
 #ifdef INET6
 	if (isipv6) {
 		/* IP6_EXTHDR_CHECK() is already done at tcp6_input(). */
 
 		if (m->m_len < (sizeof(*ip6) + sizeof(*th))) {
 			m = m_pullup(m, sizeof(*ip6) + sizeof(*th));
 			if (m == NULL) {
 				TCPSTAT_INC(tcps_rcvshort);
 				return (IPPROTO_DONE);
 			}
 		}
 
 		ip6 = mtod(m, struct ip6_hdr *);
 		th = (struct tcphdr *)((caddr_t)ip6 + off0);
 		tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0;
 		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) {
 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
 				th->th_sum = m->m_pkthdr.csum_data;
 			else
 				th->th_sum = in6_cksum_pseudo(ip6, tlen,
 				    IPPROTO_TCP, m->m_pkthdr.csum_data);
 			th->th_sum ^= 0xffff;
 		} else
 			th->th_sum = in6_cksum(m, IPPROTO_TCP, off0, tlen);
 		if (th->th_sum) {
 			TCPSTAT_INC(tcps_rcvbadsum);
 			goto drop;
 		}
 
 		/*
 		 * Be proactive about unspecified IPv6 address in source.
 		 * As we use all-zero to indicate unbounded/unconnected pcb,
 		 * unspecified IPv6 address can be used to confuse us.
 		 *
 		 * Note that packets with unspecified IPv6 destination is
 		 * already dropped in ip6_input.
 		 */
 		if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
 			/* XXX stat */
 			goto drop;
 		}
 	}
 #endif
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 	{
 		/*
 		 * Get IP and TCP header together in first mbuf.
 		 * Note: IP leaves IP header in first mbuf.
 		 */
 		if (off0 > sizeof (struct ip)) {
 			ip_stripoptions(m);
 			off0 = sizeof(struct ip);
 		}
 		if (m->m_len < sizeof (struct tcpiphdr)) {
 			if ((m = m_pullup(m, sizeof (struct tcpiphdr)))
 			    == NULL) {
 				TCPSTAT_INC(tcps_rcvshort);
 				return (IPPROTO_DONE);
 			}
 		}
 		ip = mtod(m, struct ip *);
 		th = (struct tcphdr *)((caddr_t)ip + off0);
 		tlen = ntohs(ip->ip_len) - off0;
 
 		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
 				th->th_sum = m->m_pkthdr.csum_data;
 			else
 				th->th_sum = in_pseudo(ip->ip_src.s_addr,
 				    ip->ip_dst.s_addr,
 				    htonl(m->m_pkthdr.csum_data + tlen +
 				    IPPROTO_TCP));
 			th->th_sum ^= 0xffff;
 		} else {
 			struct ipovly *ipov = (struct ipovly *)ip;
 
 			/*
 			 * Checksum extended TCP header and data.
 			 */
 			len = off0 + tlen;
 			bzero(ipov->ih_x1, sizeof(ipov->ih_x1));
 			ipov->ih_len = htons(tlen);
 			th->th_sum = in_cksum(m, len);
 			/* Reset length for SDT probes. */
 			ip->ip_len = htons(tlen + off0);
 		}
 
 		if (th->th_sum) {
 			TCPSTAT_INC(tcps_rcvbadsum);
 			goto drop;
 		}
 		/* Re-initialization for later version check */
 		ip->ip_v = IPVERSION;
 	}
 #endif /* INET */
 
 #ifdef INET6
 	if (isipv6)
 		iptos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
 #endif
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 		iptos = ip->ip_tos;
 #endif
 
 	/*
 	 * Check that TCP offset makes sense,
 	 * pull out TCP options and adjust length.		XXX
 	 */
 	off = th->th_off << 2;
 	if (off < sizeof (struct tcphdr) || off > tlen) {
 		TCPSTAT_INC(tcps_rcvbadoff);
 		goto drop;
 	}
 	tlen -= off;	/* tlen is used instead of ti->ti_len */
 	if (off > sizeof (struct tcphdr)) {
 #ifdef INET6
 		if (isipv6) {
 			IP6_EXTHDR_CHECK(m, off0, off, IPPROTO_DONE);
 			ip6 = mtod(m, struct ip6_hdr *);
 			th = (struct tcphdr *)((caddr_t)ip6 + off0);
 		}
 #endif
 #if defined(INET) && defined(INET6)
 		else
 #endif
 #ifdef INET
 		{
 			if (m->m_len < sizeof(struct ip) + off) {
 				if ((m = m_pullup(m, sizeof (struct ip) + off))
 				    == NULL) {
 					TCPSTAT_INC(tcps_rcvshort);
 					return (IPPROTO_DONE);
 				}
 				ip = mtod(m, struct ip *);
 				th = (struct tcphdr *)((caddr_t)ip + off0);
 			}
 		}
 #endif
 		optlen = off - sizeof (struct tcphdr);
 		optp = (u_char *)(th + 1);
 	}
 	thflags = th->th_flags;
 
 	/*
 	 * Convert TCP protocol specific fields to host format.
 	 */
 	tcp_fields_to_host(th);
 
 	/*
 	 * Delay dropping TCP, IP headers, IPv6 ext headers, and TCP options.
 	 */
 	drop_hdrlen = off0 + off;
 
 	/*
 	 * Locate pcb for segment; if we're likely to add or remove a
 	 * connection then first acquire pcbinfo lock.  There are three cases
 	 * where we might discover later we need a write lock despite the
 	 * flags: ACKs moving a connection out of the syncache, ACKs for a
 	 * connection in TIMEWAIT and SYNs not targeting a listening socket.
 	 */
 	if ((thflags & (TH_FIN | TH_RST)) != 0) {
 		INP_INFO_RLOCK(&V_tcbinfo);
 		ti_locked = TI_RLOCKED;
 	} else
 		ti_locked = TI_UNLOCKED;
 
 	/*
 	 * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
 	 */
         if (
 #ifdef INET6
 	    (isipv6 && (m->m_flags & M_IP6_NEXTHOP))
 #ifdef INET
 	    || (!isipv6 && (m->m_flags & M_IP_NEXTHOP))
 #endif
 #endif
 #if defined(INET) && !defined(INET6)
 	    (m->m_flags & M_IP_NEXTHOP)
 #endif
 	    )
 		fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
 
 findpcb:
 #ifdef INVARIANTS
 	if (ti_locked == TI_RLOCKED) {
 		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	} else {
 		INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 	}
 #endif
 #ifdef INET6
 	if (isipv6 && fwd_tag != NULL) {
 		struct sockaddr_in6 *next_hop6;
 
 		next_hop6 = (struct sockaddr_in6 *)(fwd_tag + 1);
 		/*
 		 * Transparently forwarded. Pretend to be the destination.
 		 * Already got one like this?
 		 */
 		inp = in6_pcblookup_mbuf(&V_tcbinfo,
 		    &ip6->ip6_src, th->th_sport, &ip6->ip6_dst, th->th_dport,
 		    INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif, m);
 		if (!inp) {
 			/*
 			 * It's new.  Try to find the ambushing socket.
 			 * Because we've rewritten the destination address,
 			 * any hardware-generated hash is ignored.
 			 */
 			inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_src,
 			    th->th_sport, &next_hop6->sin6_addr,
 			    next_hop6->sin6_port ? ntohs(next_hop6->sin6_port) :
 			    th->th_dport, INPLOOKUP_WILDCARD |
 			    INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif);
 		}
 	} else if (isipv6) {
 		inp = in6_pcblookup_mbuf(&V_tcbinfo, &ip6->ip6_src,
 		    th->th_sport, &ip6->ip6_dst, th->th_dport,
 		    INPLOOKUP_WILDCARD | INPLOOKUP_WLOCKPCB,
 		    m->m_pkthdr.rcvif, m);
 	}
 #endif /* INET6 */
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET
 	if (fwd_tag != NULL) {
 		struct sockaddr_in *next_hop;
 
 		next_hop = (struct sockaddr_in *)(fwd_tag+1);
 		/*
 		 * Transparently forwarded. Pretend to be the destination.
 		 * already got one like this?
 		 */
 		inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src, th->th_sport,
 		    ip->ip_dst, th->th_dport, INPLOOKUP_WLOCKPCB,
 		    m->m_pkthdr.rcvif, m);
 		if (!inp) {
 			/*
 			 * It's new.  Try to find the ambushing socket.
 			 * Because we've rewritten the destination address,
 			 * any hardware-generated hash is ignored.
 			 */
 			inp = in_pcblookup(&V_tcbinfo, ip->ip_src,
 			    th->th_sport, next_hop->sin_addr,
 			    next_hop->sin_port ? ntohs(next_hop->sin_port) :
 			    th->th_dport, INPLOOKUP_WILDCARD |
 			    INPLOOKUP_WLOCKPCB, m->m_pkthdr.rcvif);
 		}
 	} else
 		inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src,
 		    th->th_sport, ip->ip_dst, th->th_dport,
 		    INPLOOKUP_WILDCARD | INPLOOKUP_WLOCKPCB,
 		    m->m_pkthdr.rcvif, m);
 #endif /* INET */
 
 	/*
 	 * If the INPCB does not exist then all data in the incoming
 	 * segment is discarded and an appropriate RST is sent back.
 	 * XXX MRT Send RST using which routing table?
 	 */
 	if (inp == NULL) {
 		/*
 		 * Log communication attempts to ports that are not
 		 * in use.
 		 */
 		if ((tcp_log_in_vain == 1 && (thflags & TH_SYN)) ||
 		    tcp_log_in_vain == 2) {
 			if ((s = tcp_log_vain(NULL, th, (void *)ip, ip6)))
 				log(LOG_INFO, "%s; %s: Connection attempt "
 				    "to closed port\n", s, __func__);
 		}
 		/*
 		 * When blackholing do not respond with a RST but
 		 * completely ignore the segment and drop it.
 		 */
 		if ((V_blackhole == 1 && (thflags & TH_SYN)) ||
 		    V_blackhole == 2)
 			goto dropunlock;
 
 		rstreason = BANDLIM_RST_CLOSEDPORT;
 		goto dropwithreset;
 	}
 	INP_WLOCK_ASSERT(inp);
 	if ((inp->inp_flowtype == M_HASHTYPE_NONE) &&
 	    (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) &&
 	    ((inp->inp_socket == NULL) ||
 	    (inp->inp_socket->so_options & SO_ACCEPTCONN) == 0)) {
 		inp->inp_flowid = m->m_pkthdr.flowid;
 		inp->inp_flowtype = M_HASHTYPE_GET(m);
 	}
 #ifdef IPSEC
 #ifdef INET6
 	if (isipv6 && ipsec6_in_reject(m, inp)) {
 		goto dropunlock;
 	} else
 #endif /* INET6 */
 	if (ipsec4_in_reject(m, inp) != 0) {
 		goto dropunlock;
 	}
 #endif /* IPSEC */
 
 	/*
 	 * Check the minimum TTL for socket.
 	 */
 	if (inp->inp_ip_minttl != 0) {
 #ifdef INET6
 		if (isipv6) {
 			if (inp->inp_ip_minttl > ip6->ip6_hlim)
 				goto dropunlock;
 		} else
 #endif
 		if (inp->inp_ip_minttl > ip->ip_ttl)
 			goto dropunlock;
 	}
 
 	/*
 	 * A previous connection in TIMEWAIT state is supposed to catch stray
 	 * or duplicate segments arriving late.  If this segment was a
 	 * legitimate new connection attempt, the old INPCB gets removed and
 	 * we can try again to find a listening socket.
 	 *
 	 * At this point, due to earlier optimism, we may hold only an inpcb
 	 * lock, and not the inpcbinfo write lock.  If so, we need to try to
 	 * acquire it, or if that fails, acquire a reference on the inpcb,
 	 * drop all locks, acquire a global write lock, and then re-acquire
 	 * the inpcb lock.  We may at that point discover that another thread
 	 * has tried to free the inpcb, in which case we need to loop back
 	 * and try to find a new inpcb to deliver to.
 	 *
 	 * XXXRW: It may be time to rethink timewait locking.
 	 */
 relocked:
 	if (inp->inp_flags & INP_TIMEWAIT) {
 		if (ti_locked == TI_UNLOCKED) {
 			if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) {
 				in_pcbref(inp);
 				INP_WUNLOCK(inp);
 				INP_INFO_RLOCK(&V_tcbinfo);
 				ti_locked = TI_RLOCKED;
 				INP_WLOCK(inp);
 				if (in_pcbrele_wlocked(inp)) {
 					inp = NULL;
 					goto findpcb;
 				}
 			} else
 				ti_locked = TI_RLOCKED;
 		}
 		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 
 		if (thflags & TH_SYN)
 			tcp_dooptions(&to, optp, optlen, TO_SYN);
 		/*
 		 * NB: tcp_twcheck unlocks the INP and frees the mbuf.
 		 */
 		if (tcp_twcheck(inp, &to, th, m, tlen))
 			goto findpcb;
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		return (IPPROTO_DONE);
 	}
 	/*
 	 * The TCPCB may no longer exist if the connection is winding
 	 * down or it is in the CLOSED state.  Either way we drop the
 	 * segment and send an appropriate response.
 	 */
 	tp = intotcpcb(inp);
 	if (tp == NULL || tp->t_state == TCPS_CLOSED) {
 		rstreason = BANDLIM_RST_CLOSEDPORT;
 		goto dropwithreset;
 	}
 
 #ifdef TCP_OFFLOAD
 	if (tp->t_flags & TF_TOE) {
 		tcp_offload_input(tp, m);
 		m = NULL;	/* consumed by the TOE driver */
 		goto dropunlock;
 	}
 #endif
 
 	/*
 	 * We've identified a valid inpcb, but it could be that we need an
 	 * inpcbinfo write lock but don't hold it.  In this case, attempt to
 	 * acquire using the same strategy as the TIMEWAIT case above.  If we
 	 * relock, we have to jump back to 'relocked' as the connection might
 	 * now be in TIMEWAIT.
 	 */
 #ifdef INVARIANTS
 	if ((thflags & (TH_FIN | TH_RST)) != 0)
 		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 #endif
 	if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) ||
 	      (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN) &&
 	       !(tp->t_flags & TF_FASTOPEN)))) {
 		if (ti_locked == TI_UNLOCKED) {
 			if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) {
 				in_pcbref(inp);
 				INP_WUNLOCK(inp);
 				INP_INFO_RLOCK(&V_tcbinfo);
 				ti_locked = TI_RLOCKED;
 				INP_WLOCK(inp);
 				if (in_pcbrele_wlocked(inp)) {
 					inp = NULL;
 					goto findpcb;
 				}
 				goto relocked;
 			} else
 				ti_locked = TI_RLOCKED;
 		}
 		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	}
 
 #ifdef MAC
 	INP_WLOCK_ASSERT(inp);
 	if (mac_inpcb_check_deliver(inp, m))
 		goto dropunlock;
 #endif
 	so = inp->inp_socket;
 	KASSERT(so != NULL, ("%s: so == NULL", __func__));
 #ifdef TCPDEBUG
 	if (so->so_options & SO_DEBUG) {
 		ostate = tp->t_state;
 #ifdef INET6
 		if (isipv6) {
 			bcopy((char *)ip6, (char *)tcp_saveipgen, sizeof(*ip6));
 		} else
 #endif
 			bcopy((char *)ip, (char *)tcp_saveipgen, sizeof(*ip));
 		tcp_savetcp = *th;
 	}
 #endif /* TCPDEBUG */
 	/*
 	 * When the socket is accepting connections (the INPCB is in LISTEN
 	 * state) we look into the SYN cache if this is a new connection
 	 * attempt or the completion of a previous one.
 	 */
 	if (so->so_options & SO_ACCEPTCONN) {
 		struct in_conninfo inc;
 
 		KASSERT(tp->t_state == TCPS_LISTEN, ("%s: so accepting but "
 		    "tp not listening", __func__));
 		bzero(&inc, sizeof(inc));
 #ifdef INET6
 		if (isipv6) {
 			inc.inc_flags |= INC_ISIPV6;
 			inc.inc6_faddr = ip6->ip6_src;
 			inc.inc6_laddr = ip6->ip6_dst;
 		} else
 #endif
 		{
 			inc.inc_faddr = ip->ip_src;
 			inc.inc_laddr = ip->ip_dst;
 		}
 		inc.inc_fport = th->th_sport;
 		inc.inc_lport = th->th_dport;
 		inc.inc_fibnum = so->so_fibnum;
 
 		/*
 		 * Check for an existing connection attempt in syncache if
 		 * the flag is only ACK.  A successful lookup creates a new
 		 * socket appended to the listen queue in SYN_RECEIVED state.
 		 */
 		if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) {
 
 			INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 			/*
 			 * Parse the TCP options here because
 			 * syncookies need access to the reflected
 			 * timestamp.
 			 */
 			tcp_dooptions(&to, optp, optlen, 0);
 			/*
 			 * NB: syncache_expand() doesn't unlock
 			 * inp and tcpinfo locks.
 			 */
 			if (!syncache_expand(&inc, &to, th, &so, m)) {
 				/*
 				 * No syncache entry or ACK was not
 				 * for our SYN/ACK.  Send a RST.
 				 * NB: syncache did its own logging
 				 * of the failure cause.
 				 */
 				rstreason = BANDLIM_RST_OPENPORT;
 				goto dropwithreset;
 			}
 #ifdef TCP_RFC7413
 new_tfo_socket:
 #endif
 			if (so == NULL) {
 				/*
 				 * We completed the 3-way handshake
 				 * but could not allocate a socket
 				 * either due to memory shortage,
 				 * listen queue length limits or
 				 * global socket limits.  Send RST
 				 * or wait and have the remote end
 				 * retransmit the ACK for another
 				 * try.
 				 */
 				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 					log(LOG_DEBUG, "%s; %s: Listen socket: "
 					    "Socket allocation failed due to "
 					    "limits or memory shortage, %s\n",
 					    s, __func__,
 					    V_tcp_sc_rst_sock_fail ?
 					    "sending RST" : "try again");
 				if (V_tcp_sc_rst_sock_fail) {
 					rstreason = BANDLIM_UNLIMITED;
 					goto dropwithreset;
 				} else
 					goto dropunlock;
 			}
 			/*
 			 * Socket is created in state SYN_RECEIVED.
 			 * Unlock the listen socket, lock the newly
 			 * created socket and update the tp variable.
 			 */
 			INP_WUNLOCK(inp);	/* listen socket */
 			inp = sotoinpcb(so);
 			/*
 			 * New connection inpcb is already locked by
 			 * syncache_expand().
 			 */
 			INP_WLOCK_ASSERT(inp);
 			tp = intotcpcb(inp);
 			KASSERT(tp->t_state == TCPS_SYN_RECEIVED,
 			    ("%s: ", __func__));
 #ifdef TCP_SIGNATURE
 			if (sig_checked == 0)  {
 				tcp_dooptions(&to, optp, optlen,
 				    (thflags & TH_SYN) ? TO_SYN : 0);
 				if (!tcp_signature_verify_input(m, off0, tlen,
 				    optlen, &to, th, tp->t_flags)) {
 
 					/*
 					 * In SYN_SENT state if it receives an
 					 * RST, it is allowed for further
 					 * processing.
 					 */
 					if ((thflags & TH_RST) == 0 ||
 					    (tp->t_state == TCPS_SYN_SENT) == 0)
 						goto dropunlock;
 				}
 				sig_checked = 1;
 			}
 #endif
 
 			/*
 			 * Process the segment and the data it
 			 * contains.  tcp_do_segment() consumes
 			 * the mbuf chain and unlocks the inpcb.
 			 */
 			tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen,
 			    iptos, ti_locked);
 			INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 			return (IPPROTO_DONE);
 		}
 		/*
 		 * Segment flag validation for new connection attempts:
 		 *
 		 * Our (SYN|ACK) response was rejected.
 		 * Check with syncache and remove entry to prevent
 		 * retransmits.
 		 *
 		 * NB: syncache_chkrst does its own logging of failure
 		 * causes.
 		 */
 		if (thflags & TH_RST) {
 			syncache_chkrst(&inc, th);
 			goto dropunlock;
 		}
 		/*
 		 * We can't do anything without SYN.
 		 */
 		if ((thflags & TH_SYN) == 0) {
 			if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				log(LOG_DEBUG, "%s; %s: Listen socket: "
 				    "SYN is missing, segment ignored\n",
 				    s, __func__);
 			TCPSTAT_INC(tcps_badsyn);
 			goto dropunlock;
 		}
 		/*
 		 * (SYN|ACK) is bogus on a listen socket.
 		 */
 		if (thflags & TH_ACK) {
 			if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				log(LOG_DEBUG, "%s; %s: Listen socket: "
 				    "SYN|ACK invalid, segment rejected\n",
 				    s, __func__);
 			syncache_badack(&inc);	/* XXX: Not needed! */
 			TCPSTAT_INC(tcps_badsyn);
 			rstreason = BANDLIM_RST_OPENPORT;
 			goto dropwithreset;
 		}
 		/*
 		 * If the drop_synfin option is enabled, drop all
 		 * segments with both the SYN and FIN bits set.
 		 * This prevents e.g. nmap from identifying the
 		 * TCP/IP stack.
 		 * XXX: Poor reasoning.  nmap has other methods
 		 * and is constantly refining its stack detection
 		 * strategies.
 		 * XXX: This is a violation of the TCP specification
 		 * and was used by RFC1644.
 		 */
 		if ((thflags & TH_FIN) && V_drop_synfin) {
 			if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				log(LOG_DEBUG, "%s; %s: Listen socket: "
 				    "SYN|FIN segment ignored (based on "
 				    "sysctl setting)\n", s, __func__);
 			TCPSTAT_INC(tcps_badsyn);
 			goto dropunlock;
 		}
 		/*
 		 * Segment's flags are (SYN) or (SYN|FIN).
 		 *
 		 * TH_PUSH, TH_URG, TH_ECE, TH_CWR are ignored
 		 * as they do not affect the state of the TCP FSM.
 		 * The data pointed to by TH_URG and th_urp is ignored.
 		 */
 		KASSERT((thflags & (TH_RST|TH_ACK)) == 0,
 		    ("%s: Listen socket: TH_RST or TH_ACK set", __func__));
 		KASSERT(thflags & (TH_SYN),
 		    ("%s: Listen socket: TH_SYN not set", __func__));
 #ifdef INET6
 		/*
 		 * If deprecated address is forbidden,
 		 * we do not accept SYN to deprecated interface
 		 * address to prevent any new inbound connection from
 		 * getting established.
 		 * When we do not accept SYN, we send a TCP RST,
 		 * with deprecated source address (instead of dropping
 		 * it).  We compromise it as it is much better for peer
 		 * to send a RST, and RST will be the final packet
 		 * for the exchange.
 		 *
 		 * If we do not forbid deprecated addresses, we accept
 		 * the SYN packet.  RFC2462 does not suggest dropping
 		 * SYN in this case.
 		 * If we decipher RFC2462 5.5.4, it says like this:
 		 * 1. use of deprecated addr with existing
 		 *    communication is okay - "SHOULD continue to be
 		 *    used"
 		 * 2. use of it with new communication:
 		 *   (2a) "SHOULD NOT be used if alternate address
 		 *        with sufficient scope is available"
 		 *   (2b) nothing mentioned otherwise.
 		 * Here we fall into (2b) case as we have no choice in
 		 * our source address selection - we must obey the peer.
 		 *
 		 * The wording in RFC2462 is confusing, and there are
 		 * multiple description text for deprecated address
 		 * handling - worse, they are not exactly the same.
 		 * I believe 5.5.4 is the best one, so we follow 5.5.4.
 		 */
 		if (isipv6 && !V_ip6_use_deprecated) {
 			struct in6_ifaddr *ia6;
 
 			ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */);
 			if (ia6 != NULL &&
 			    (ia6->ia6_flags & IN6_IFF_DEPRECATED)) {
 				ifa_free(&ia6->ia_ifa);
 				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				    log(LOG_DEBUG, "%s; %s: Listen socket: "
 					"Connection attempt to deprecated "
 					"IPv6 address rejected\n",
 					s, __func__);
 				rstreason = BANDLIM_RST_OPENPORT;
 				goto dropwithreset;
 			}
 			if (ia6)
 				ifa_free(&ia6->ia_ifa);
 		}
 #endif /* INET6 */
 		/*
 		 * Basic sanity checks on incoming SYN requests:
 		 *   Don't respond if the destination is a link layer
 		 *	broadcast according to RFC1122 4.2.3.10, p. 104.
 		 *   If it is from this socket it must be forged.
 		 *   Don't respond if the source or destination is a
 		 *	global or subnet broad- or multicast address.
 		 *   Note that it is quite possible to receive unicast
 		 *	link-layer packets with a broadcast IP address. Use
 		 *	in_broadcast() to find them.
 		 */
 		if (m->m_flags & (M_BCAST|M_MCAST)) {
 			if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 			    log(LOG_DEBUG, "%s; %s: Listen socket: "
 				"Connection attempt from broad- or multicast "
 				"link layer address ignored\n", s, __func__);
 			goto dropunlock;
 		}
 #ifdef INET6
 		if (isipv6) {
 			if (th->th_dport == th->th_sport &&
 			    IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6->ip6_src)) {
 				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				    log(LOG_DEBUG, "%s; %s: Listen socket: "
 					"Connection attempt to/from self "
 					"ignored\n", s, __func__);
 				goto dropunlock;
 			}
 			if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
 			    IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
 				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				    log(LOG_DEBUG, "%s; %s: Listen socket: "
 					"Connection attempt from/to multicast "
 					"address ignored\n", s, __func__);
 				goto dropunlock;
 			}
 		}
 #endif
 #if defined(INET) && defined(INET6)
 		else
 #endif
 #ifdef INET
 		{
 			if (th->th_dport == th->th_sport &&
 			    ip->ip_dst.s_addr == ip->ip_src.s_addr) {
 				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				    log(LOG_DEBUG, "%s; %s: Listen socket: "
 					"Connection attempt from/to self "
 					"ignored\n", s, __func__);
 				goto dropunlock;
 			}
 			if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
 			    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
 			    ip->ip_src.s_addr == htonl(INADDR_BROADCAST) ||
 			    in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) {
 				if ((s = tcp_log_addrs(&inc, th, NULL, NULL)))
 				    log(LOG_DEBUG, "%s; %s: Listen socket: "
 					"Connection attempt from/to broad- "
 					"or multicast address ignored\n",
 					s, __func__);
 				goto dropunlock;
 			}
 		}
 #endif
 		/*
 		 * SYN appears to be valid.  Create compressed TCP state
 		 * for syncache.
 		 */
 #ifdef TCPDEBUG
 		if (so->so_options & SO_DEBUG)
 			tcp_trace(TA_INPUT, ostate, tp,
 			    (void *)tcp_saveipgen, &tcp_savetcp, 0);
 #endif
 		TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));
 		tcp_dooptions(&to, optp, optlen, TO_SYN);
 #ifdef TCP_RFC7413
 		if (syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL))
 			goto new_tfo_socket;
 #else
 		syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL);
 #endif
 		/*
 		 * Entry added to syncache and mbuf consumed.
 		 * Only the listen socket is unlocked by syncache_add().
 		 */
 		if (ti_locked == TI_RLOCKED) {
 			INP_INFO_RUNLOCK(&V_tcbinfo);
 			ti_locked = TI_UNLOCKED;
 		}
 		INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 		return (IPPROTO_DONE);
 	} else if (tp->t_state == TCPS_LISTEN) {
 		/*
 		 * When a listen socket is torn down the SO_ACCEPTCONN
 		 * flag is removed first while connections are drained
 		 * from the accept queue in a unlock/lock cycle of the
 		 * ACCEPT_LOCK, opening a race condition allowing a SYN
 		 * attempt go through unhandled.
 		 */
 		goto dropunlock;
 	}
 
 #ifdef TCP_SIGNATURE
 	if (sig_checked == 0)  {
 		tcp_dooptions(&to, optp, optlen,
 		    (thflags & TH_SYN) ? TO_SYN : 0);
 		if (!tcp_signature_verify_input(m, off0, tlen, optlen, &to,
 		    th, tp->t_flags)) {
 
 			/*
 			 * In SYN_SENT state if it receives an RST, it is
 			 * allowed for further processing.
 			 */
 			if ((thflags & TH_RST) == 0 ||
 			    (tp->t_state == TCPS_SYN_SENT) == 0)
 				goto dropunlock;
 		}
 		sig_checked = 1;
 	}
 #endif
 
 	TCP_PROBE5(receive, NULL, tp, mtod(m, const char *), tp, th);
 
 	/*
 	 * Segment belongs to a connection in SYN_SENT, ESTABLISHED or later
 	 * state.  tcp_do_segment() always consumes the mbuf chain, unlocks
 	 * the inpcb, and unlocks pcbinfo.
 	 */
 	tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos, ti_locked);
 	INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 	return (IPPROTO_DONE);
 
 dropwithreset:
 	TCP_PROBE5(receive, NULL, tp, mtod(m, const char *), tp, th);
 
 	if (ti_locked == TI_RLOCKED) {
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		ti_locked = TI_UNLOCKED;
 	}
 #ifdef INVARIANTS
 	else {
 		KASSERT(ti_locked == TI_UNLOCKED, ("%s: dropwithreset "
 		    "ti_locked: %d", __func__, ti_locked));
 		INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 	}
 #endif
 
 	if (inp != NULL) {
 		tcp_dropwithreset(m, th, tp, tlen, rstreason);
 		INP_WUNLOCK(inp);
 	} else
 		tcp_dropwithreset(m, th, NULL, tlen, rstreason);
 	m = NULL;	/* mbuf chain got consumed. */
 	goto drop;
 
 dropunlock:
 	if (m != NULL)
 		TCP_PROBE5(receive, NULL, tp, mtod(m, const char *), tp, th);
 
 	if (ti_locked == TI_RLOCKED) {
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		ti_locked = TI_UNLOCKED;
 	}
 #ifdef INVARIANTS
 	else {
 		KASSERT(ti_locked == TI_UNLOCKED, ("%s: dropunlock "
 		    "ti_locked: %d", __func__, ti_locked));
 		INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 	}
 #endif
 
 	if (inp != NULL)
 		INP_WUNLOCK(inp);
 
 drop:
 	INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 	if (s != NULL)
 		free(s, M_TCPLOG);
 	if (m != NULL)
 		m_freem(m);
 	return (IPPROTO_DONE);
 }
 
 void
 tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
     struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
     int ti_locked)
 {
 	int thflags, acked, ourfinisacked, needoutput = 0, sack_changed;
 	int rstreason, todrop, win;
 	u_long tiwin;
 	char *s;
 	struct in_conninfo *inc;
 	struct mbuf *mfree;
 	struct tcpopt to;
 	int tfo_syn;
 	
 #ifdef TCPDEBUG
 	/*
 	 * The size of tcp_saveipgen must be the size of the max ip header,
 	 * now IPv6.
 	 */
 	u_char tcp_saveipgen[IP6_HDR_LEN];
 	struct tcphdr tcp_savetcp;
 	short ostate = 0;
 #endif
 	thflags = th->th_flags;
 	inc = &tp->t_inpcb->inp_inc;
 	tp->sackhint.last_sack_ack = 0;
 	sack_changed = 0;
 
 	/*
 	 * If this is either a state-changing packet or current state isn't
 	 * established, we require a write lock on tcbinfo.  Otherwise, we
 	 * allow the tcbinfo to be in either alocked or unlocked, as the
 	 * caller may have unnecessarily acquired a write lock due to a race.
 	 */
 	if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
 	    tp->t_state != TCPS_ESTABLISHED) {
 		KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for "
 		    "SYN/FIN/RST/!EST", __func__, ti_locked));
 		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	} else {
 #ifdef INVARIANTS
 		if (ti_locked == TI_RLOCKED)
 			INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 		else {
 			KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST "
 			    "ti_locked: %d", __func__, ti_locked));
 			INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 		}
 #endif
 	}
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
 	    __func__));
 	KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT",
 	    __func__));
 
 #ifdef TCPPCAP
 	/* Save segment, if requested. */
 	tcp_pcap_add(th, m, &(tp->t_inpkts));
 #endif
 
 	/*
 	 * Segment received on connection.
 	 * Reset idle time and keep-alive timer.
 	 * XXX: This should be done after segment
 	 * validation to ignore broken/spoofed segs.
 	 */
 	tp->t_rcvtime = ticks;
 	if (TCPS_HAVEESTABLISHED(tp->t_state))
 		tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
 
 	/*
 	 * Scale up the window into a 32-bit value.
 	 * For the SYN_SENT state the scale is zero.
 	 */
 	tiwin = th->th_win << tp->snd_scale;
 
 	/*
 	 * TCP ECN processing.
 	 */
 	if (tp->t_flags & TF_ECN_PERMIT) {
 		if (thflags & TH_CWR)
 			tp->t_flags &= ~TF_ECN_SND_ECE;
 		switch (iptos & IPTOS_ECN_MASK) {
 		case IPTOS_ECN_CE:
 			tp->t_flags |= TF_ECN_SND_ECE;
 			TCPSTAT_INC(tcps_ecn_ce);
 			break;
 		case IPTOS_ECN_ECT0:
 			TCPSTAT_INC(tcps_ecn_ect0);
 			break;
 		case IPTOS_ECN_ECT1:
 			TCPSTAT_INC(tcps_ecn_ect1);
 			break;
 		}
 
 		/* Process a packet differently from RFC3168. */
 		cc_ecnpkt_handler(tp, th, iptos);
 
 		/* Congestion experienced. */
 		if (thflags & TH_ECE) {
 			cc_cong_signal(tp, th, CC_ECN);
 		}
 	}
 
 	/*
 	 * Parse options on any incoming segment.
 	 */
 	tcp_dooptions(&to, (u_char *)(th + 1),
 	    (th->th_off << 2) - sizeof(struct tcphdr),
 	    (thflags & TH_SYN) ? TO_SYN : 0);
 
 	/*
 	 * If echoed timestamp is later than the current time,
 	 * fall back to non RFC1323 RTT calculation.  Normalize
 	 * timestamp if syncookies were used when this connection
 	 * was established.
 	 */
 	if ((to.to_flags & TOF_TS) && (to.to_tsecr != 0)) {
 		to.to_tsecr -= tp->ts_offset;
 		if (TSTMP_GT(to.to_tsecr, tcp_ts_getticks()))
 			to.to_tsecr = 0;
 	}
 	/*
 	 * If timestamps were negotiated during SYN/ACK they should
 	 * appear on every segment during this session and vice versa.
 	 */
 	if ((tp->t_flags & TF_RCVD_TSTMP) && !(to.to_flags & TOF_TS)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: Timestamp missing, "
 			    "no action\n", s, __func__);
 			free(s, M_TCPLOG);
 		}
 	}
 	if (!(tp->t_flags & TF_RCVD_TSTMP) && (to.to_flags & TOF_TS)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: Timestamp not expected, "
 			    "no action\n", s, __func__);
 			free(s, M_TCPLOG);
 		}
 	}
 
 	/*
 	 * Process options only when we get SYN/ACK back. The SYN case
 	 * for incoming connections is handled in tcp_syncache.
 	 * According to RFC1323 the window field in a SYN (i.e., a <SYN>
 	 * or <SYN,ACK>) segment itself is never scaled.
 	 * XXX this is traditional behavior, may need to be cleaned up.
 	 */
 	if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
 		if ((to.to_flags & TOF_SCALE) &&
 		    (tp->t_flags & TF_REQ_SCALE)) {
 			tp->t_flags |= TF_RCVD_SCALE;
 			tp->snd_scale = to.to_wscale;
 		}
 		/*
 		 * Initial send window.  It will be updated with
 		 * the next incoming segment to the scaled value.
 		 */
 		tp->snd_wnd = th->th_win;
 		if (to.to_flags & TOF_TS) {
 			tp->t_flags |= TF_RCVD_TSTMP;
 			tp->ts_recent = to.to_tsval;
 			tp->ts_recent_age = tcp_ts_getticks();
 		}
 		if (to.to_flags & TOF_MSS)
 			tcp_mss(tp, to.to_mss);
 		if ((tp->t_flags & TF_SACK_PERMIT) &&
 		    (to.to_flags & TOF_SACKPERM) == 0)
 			tp->t_flags &= ~TF_SACK_PERMIT;
 	}
 
 	/*
 	 * Header prediction: check for the two common cases
 	 * of a uni-directional data xfer.  If the packet has
 	 * no control flags, is in-sequence, the window didn't
 	 * change and we're not retransmitting, it's a
 	 * candidate.  If the length is zero and the ack moved
 	 * forward, we're the sender side of the xfer.  Just
 	 * free the data acked & wake any higher level process
 	 * that was blocked waiting for space.  If the length
 	 * is non-zero and the ack didn't move, we're the
 	 * receiver side.  If we're getting packets in-order
 	 * (the reassembly queue is empty), add the data to
 	 * the socket buffer and note that we need a delayed ack.
 	 * Make sure that the hidden state-flags are also off.
 	 * Since we check for TCPS_ESTABLISHED first, it can only
 	 * be TH_NEEDSYN.
 	 */
 	if (tp->t_state == TCPS_ESTABLISHED &&
 	    th->th_seq == tp->rcv_nxt &&
 	    (thflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
 	    tp->snd_nxt == tp->snd_max &&
 	    tiwin && tiwin == tp->snd_wnd && 
 	    ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&
 	    LIST_EMPTY(&tp->t_segq) &&
 	    ((to.to_flags & TOF_TS) == 0 ||
 	     TSTMP_GEQ(to.to_tsval, tp->ts_recent)) ) {
 
 		/*
 		 * If last ACK falls within this segment's sequence numbers,
 		 * record the timestamp.
 		 * NOTE that the test is modified according to the latest
 		 * proposal of the tcplw@cray.com list (Braden 1993/04/26).
 		 */
 		if ((to.to_flags & TOF_TS) != 0 &&
 		    SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
 			tp->ts_recent_age = tcp_ts_getticks();
 			tp->ts_recent = to.to_tsval;
 		}
 
 		if (tlen == 0) {
 			if (SEQ_GT(th->th_ack, tp->snd_una) &&
 			    SEQ_LEQ(th->th_ack, tp->snd_max) &&
 			    !IN_RECOVERY(tp->t_flags) &&
 			    (to.to_flags & TOF_SACK) == 0 &&
 			    TAILQ_EMPTY(&tp->snd_holes)) {
 				/*
 				 * This is a pure ack for outstanding data.
 				 */
 				if (ti_locked == TI_RLOCKED)
 					INP_INFO_RUNLOCK(&V_tcbinfo);
 				ti_locked = TI_UNLOCKED;
 
 				TCPSTAT_INC(tcps_predack);
 
 				/*
 				 * "bad retransmit" recovery.
 				 */
 				if (tp->t_rxtshift == 1 &&
 				    tp->t_flags & TF_PREVVALID &&
 				    (int)(ticks - tp->t_badrxtwin) < 0) {
 					cc_cong_signal(tp, th, CC_RTO_ERR);
 				}
 
 				/*
 				 * Recalculate the transmit timer / rtt.
 				 *
 				 * Some boxes send broken timestamp replies
 				 * during the SYN+ACK phase, ignore
 				 * timestamps of 0 or we could calculate a
 				 * huge RTT and blow up the retransmit timer.
 				 */
 				if ((to.to_flags & TOF_TS) != 0 &&
 				    to.to_tsecr) {
 					u_int t;
 
 					t = tcp_ts_getticks() - to.to_tsecr;
 					if (!tp->t_rttlow || tp->t_rttlow > t)
 						tp->t_rttlow = t;
 					tcp_xmit_timer(tp,
 					    TCP_TS_TO_TICKS(t) + 1);
 				} else if (tp->t_rtttime &&
 				    SEQ_GT(th->th_ack, tp->t_rtseq)) {
 					if (!tp->t_rttlow ||
 					    tp->t_rttlow > ticks - tp->t_rtttime)
 						tp->t_rttlow = ticks - tp->t_rtttime;
 					tcp_xmit_timer(tp,
 							ticks - tp->t_rtttime);
 				}
 				acked = BYTES_THIS_ACK(tp, th);
 
 				/* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */
 				hhook_run_tcp_est_in(tp, th, &to);
 
 				TCPSTAT_INC(tcps_rcvackpack);
 				TCPSTAT_ADD(tcps_rcvackbyte, acked);
 				sbdrop(&so->so_snd, acked);
 				if (SEQ_GT(tp->snd_una, tp->snd_recover) &&
 				    SEQ_LEQ(th->th_ack, tp->snd_recover))
 					tp->snd_recover = th->th_ack - 1;
 				
 				/*
 				 * Let the congestion control algorithm update
 				 * congestion control related information. This
 				 * typically means increasing the congestion
 				 * window.
 				 */
 				cc_ack_received(tp, th, CC_ACK);
 
 				tp->snd_una = th->th_ack;
 				/*
 				 * Pull snd_wl2 up to prevent seq wrap relative
 				 * to th_ack.
 				 */
 				tp->snd_wl2 = th->th_ack;
 				tp->t_dupacks = 0;
 				m_freem(m);
 
 				/*
 				 * If all outstanding data are acked, stop
 				 * retransmit timer, otherwise restart timer
 				 * using current (possibly backed-off) value.
 				 * If process is waiting for space,
 				 * wakeup/selwakeup/signal.  If data
 				 * are ready to send, let tcp_output
 				 * decide between more output or persist.
 				 */
 #ifdef TCPDEBUG
 				if (so->so_options & SO_DEBUG)
 					tcp_trace(TA_INPUT, ostate, tp,
 					    (void *)tcp_saveipgen,
 					    &tcp_savetcp, 0);
 #endif
 				TCP_PROBE3(debug__input, tp, th,
 					mtod(m, const char *));
 				if (tp->snd_una == tp->snd_max)
 					tcp_timer_activate(tp, TT_REXMT, 0);
 				else if (!tcp_timer_active(tp, TT_PERSIST))
 					tcp_timer_activate(tp, TT_REXMT,
 						      tp->t_rxtcur);
 				sowwakeup(so);
 				if (sbavail(&so->so_snd))
 					(void) tp->t_fb->tfb_tcp_output(tp);
 				goto check_delack;
 			}
 		} else if (th->th_ack == tp->snd_una &&
 		    tlen <= sbspace(&so->so_rcv)) {
 			int newsize = 0;	/* automatic sockbuf scaling */
 
 			/*
 			 * This is a pure, in-sequence data packet with
 			 * nothing on the reassembly queue and we have enough
 			 * buffer space to take it.
 			 */
 			if (ti_locked == TI_RLOCKED)
 				INP_INFO_RUNLOCK(&V_tcbinfo);
 			ti_locked = TI_UNLOCKED;
 
 			/* Clean receiver SACK report if present */
 			if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks)
 				tcp_clean_sackreport(tp);
 			TCPSTAT_INC(tcps_preddat);
 			tp->rcv_nxt += tlen;
 			/*
 			 * Pull snd_wl1 up to prevent seq wrap relative to
 			 * th_seq.
 			 */
 			tp->snd_wl1 = th->th_seq;
 			/*
 			 * Pull rcv_up up to prevent seq wrap relative to
 			 * rcv_nxt.
 			 */
 			tp->rcv_up = tp->rcv_nxt;
 			TCPSTAT_INC(tcps_rcvpack);
 			TCPSTAT_ADD(tcps_rcvbyte, tlen);
 #ifdef TCPDEBUG
 			if (so->so_options & SO_DEBUG)
 				tcp_trace(TA_INPUT, ostate, tp,
 				    (void *)tcp_saveipgen, &tcp_savetcp, 0);
 #endif
 			TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));
 
 		/*
 		 * Automatic sizing of receive socket buffer.  Often the send
 		 * buffer size is not optimally adjusted to the actual network
 		 * conditions at hand (delay bandwidth product).  Setting the
 		 * buffer size too small limits throughput on links with high
 		 * bandwidth and high delay (eg. trans-continental/oceanic links).
 		 *
 		 * On the receive side the socket buffer memory is only rarely
 		 * used to any significant extent.  This allows us to be much
 		 * more aggressive in scaling the receive socket buffer.  For
 		 * the case that the buffer space is actually used to a large
 		 * extent and we run out of kernel memory we can simply drop
 		 * the new segments; TCP on the sender will just retransmit it
 		 * later.  Setting the buffer size too big may only consume too
 		 * much kernel memory if the application doesn't read() from
 		 * the socket or packet loss or reordering makes use of the
 		 * reassembly queue.
 		 *
 		 * The criteria to step up the receive buffer one notch are:
 		 *  1. Application has not set receive buffer size with
 		 *     SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE.
 		 *  2. the number of bytes received during the time it takes
 		 *     one timestamp to be reflected back to us (the RTT);
 		 *  3. received bytes per RTT is within seven eighth of the
 		 *     current socket buffer size;
 		 *  4. receive buffer size has not hit maximal automatic size;
 		 *
 		 * This algorithm does one step per RTT at most and only if
 		 * we receive a bulk stream w/o packet losses or reorderings.
 		 * Shrinking the buffer during idle times is not necessary as
 		 * it doesn't consume any memory when idle.
 		 *
 		 * TODO: Only step up if the application is actually serving
 		 * the buffer to better manage the socket buffer resources.
 		 */
 			if (V_tcp_do_autorcvbuf &&
 			    (to.to_flags & TOF_TS) &&
 			    to.to_tsecr &&
 			    (so->so_rcv.sb_flags & SB_AUTOSIZE)) {
 				if (TSTMP_GT(to.to_tsecr, tp->rfbuf_ts) &&
 				    to.to_tsecr - tp->rfbuf_ts < hz) {
 					if (tp->rfbuf_cnt >
 					    (so->so_rcv.sb_hiwat / 8 * 7) &&
 					    so->so_rcv.sb_hiwat <
 					    V_tcp_autorcvbuf_max) {
 						newsize =
 						    min(so->so_rcv.sb_hiwat +
 						    V_tcp_autorcvbuf_inc,
 						    V_tcp_autorcvbuf_max);
 					}
 					/* Start over with next RTT. */
 					tp->rfbuf_ts = 0;
 					tp->rfbuf_cnt = 0;
 				} else
 					tp->rfbuf_cnt += tlen;	/* add up */
 			}
 
 			/* Add data to socket buffer. */
 			SOCKBUF_LOCK(&so->so_rcv);
 			if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 				m_freem(m);
 			} else {
 				/*
 				 * Set new socket buffer size.
 				 * Give up when limit is reached.
 				 */
 				if (newsize)
 					if (!sbreserve_locked(&so->so_rcv,
 					    newsize, so, NULL))
 						so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
 				m_adj(m, drop_hdrlen);	/* delayed header drop */
 				sbappendstream_locked(&so->so_rcv, m, 0);
 			}
 			/* NB: sorwakeup_locked() does an implicit unlock. */
 			sorwakeup_locked(so);
 			if (DELAY_ACK(tp, tlen)) {
 				tp->t_flags |= TF_DELACK;
 			} else {
 				tp->t_flags |= TF_ACKNOW;
 				tp->t_fb->tfb_tcp_output(tp);
 			}
 			goto check_delack;
 		}
 	}
 
 	/*
 	 * Calculate amount of space in receive window,
 	 * and then do TCP input processing.
 	 * Receive window is amount of space in rcv queue,
 	 * but not less than advertised window.
 	 */
 	win = sbspace(&so->so_rcv);
 	if (win < 0)
 		win = 0;
 	tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
 
 	/* Reset receive buffer auto scaling when not in bulk receive mode. */
 	tp->rfbuf_ts = 0;
 	tp->rfbuf_cnt = 0;
 
 	switch (tp->t_state) {
 
 	/*
 	 * If the state is SYN_RECEIVED:
 	 *	if seg contains an ACK, but not for our SYN/ACK, send a RST.
 	 */
 	case TCPS_SYN_RECEIVED:
 		if ((thflags & TH_ACK) &&
 		    (SEQ_LEQ(th->th_ack, tp->snd_una) ||
 		     SEQ_GT(th->th_ack, tp->snd_max))) {
 				rstreason = BANDLIM_RST_OPENPORT;
 				goto dropwithreset;
 		}
 #ifdef TCP_RFC7413
 		if (tp->t_flags & TF_FASTOPEN) {
 			/*
 			 * When a TFO connection is in SYN_RECEIVED, the
 			 * only valid packets are the initial SYN, a
 			 * retransmit/copy of the initial SYN (possibly with
 			 * a subset of the original data), a valid ACK, a
 			 * FIN, or a RST.
 			 */
 			if ((thflags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)) {
 				rstreason = BANDLIM_RST_OPENPORT;
 				goto dropwithreset;
 			} else if (thflags & TH_SYN) {
 				/* non-initial SYN is ignored */
 				if ((tcp_timer_active(tp, TT_DELACK) || 
 				     tcp_timer_active(tp, TT_REXMT)))
 					goto drop;
 			} else if (!(thflags & (TH_ACK|TH_FIN|TH_RST))) {
 				goto drop;
 			}
 		}
 #endif
 		break;
 
 	/*
 	 * If the state is SYN_SENT:
 	 *	if seg contains an ACK, but not for our SYN, drop the input.
 	 *	if seg contains a RST, then drop the connection.
 	 *	if seg does not contain SYN, then drop it.
 	 * Otherwise this is an acceptable SYN segment
 	 *	initialize tp->rcv_nxt and tp->irs
 	 *	if seg contains ack then advance tp->snd_una
 	 *	if seg contains an ECE and ECN support is enabled, the stream
 	 *	    is ECN capable.
 	 *	if SYN has been acked change to ESTABLISHED else SYN_RCVD state
 	 *	arrange for segment to be acked (eventually)
 	 *	continue processing rest of data/controls, beginning with URG
 	 */
 	case TCPS_SYN_SENT:
 		if ((thflags & TH_ACK) &&
 		    (SEQ_LEQ(th->th_ack, tp->iss) ||
 		     SEQ_GT(th->th_ack, tp->snd_max))) {
 			rstreason = BANDLIM_UNLIMITED;
 			goto dropwithreset;
 		}
 		if ((thflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) {
 			TCP_PROBE5(connect__refused, NULL, tp,
 			    mtod(m, const char *), tp, th);
 			tp = tcp_drop(tp, ECONNREFUSED);
 		}
 		if (thflags & TH_RST)
 			goto drop;
 		if (!(thflags & TH_SYN))
 			goto drop;
 
 		tp->irs = th->th_seq;
 		tcp_rcvseqinit(tp);
 		if (thflags & TH_ACK) {
 			TCPSTAT_INC(tcps_connects);
 			soisconnected(so);
 #ifdef MAC
 			mac_socketpeer_set_from_mbuf(m, so);
 #endif
 			/* Do window scaling on this connection? */
 			if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
 				(TF_RCVD_SCALE|TF_REQ_SCALE)) {
 				tp->rcv_scale = tp->request_r_scale;
 			}
 			tp->rcv_adv += imin(tp->rcv_wnd,
 			    TCP_MAXWIN << tp->rcv_scale);
 			tp->snd_una++;		/* SYN is acked */
 			/*
 			 * If there's data, delay ACK; if there's also a FIN
 			 * ACKNOW will be turned on later.
 			 */
 			if (DELAY_ACK(tp, tlen) && tlen != 0)
 				tcp_timer_activate(tp, TT_DELACK,
 				    tcp_delacktime);
 			else
 				tp->t_flags |= TF_ACKNOW;
 
 			if ((thflags & TH_ECE) && V_tcp_do_ecn) {
 				tp->t_flags |= TF_ECN_PERMIT;
 				TCPSTAT_INC(tcps_ecn_shs);
 			}
 			
 			/*
 			 * Received <SYN,ACK> in SYN_SENT[*] state.
 			 * Transitions:
 			 *	SYN_SENT  --> ESTABLISHED
 			 *	SYN_SENT* --> FIN_WAIT_1
 			 */
 			tp->t_starttime = ticks;
 			if (tp->t_flags & TF_NEEDFIN) {
 				tcp_state_change(tp, TCPS_FIN_WAIT_1);
 				tp->t_flags &= ~TF_NEEDFIN;
 				thflags &= ~TH_SYN;
 			} else {
 				tcp_state_change(tp, TCPS_ESTABLISHED);
 				TCP_PROBE5(connect__established, NULL, tp,
 				    mtod(m, const char *), tp, th);
 				cc_conn_init(tp);
 				tcp_timer_activate(tp, TT_KEEP,
 				    TP_KEEPIDLE(tp));
 			}
 		} else {
 			/*
 			 * Received initial SYN in SYN-SENT[*] state =>
 			 * simultaneous open.
 			 * If it succeeds, connection is * half-synchronized.
 			 * Otherwise, do 3-way handshake:
 			 *        SYN-SENT -> SYN-RECEIVED
 			 *        SYN-SENT* -> SYN-RECEIVED*
 			 */
 			tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
 			tcp_timer_activate(tp, TT_REXMT, 0);
 			tcp_state_change(tp, TCPS_SYN_RECEIVED);
 		}
 
 		KASSERT(ti_locked == TI_RLOCKED, ("%s: trimthenstep6: "
 		    "ti_locked %d", __func__, ti_locked));
 		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 		INP_WLOCK_ASSERT(tp->t_inpcb);
 
 		/*
 		 * Advance th->th_seq to correspond to first data byte.
 		 * If data, trim to stay within window,
 		 * dropping FIN if necessary.
 		 */
 		th->th_seq++;
 		if (tlen > tp->rcv_wnd) {
 			todrop = tlen - tp->rcv_wnd;
 			m_adj(m, -todrop);
 			tlen = tp->rcv_wnd;
 			thflags &= ~TH_FIN;
 			TCPSTAT_INC(tcps_rcvpackafterwin);
 			TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop);
 		}
 		tp->snd_wl1 = th->th_seq - 1;
 		tp->rcv_up = th->th_seq;
 		/*
 		 * Client side of transaction: already sent SYN and data.
 		 * If the remote host used T/TCP to validate the SYN,
 		 * our data will be ACK'd; if so, enter normal data segment
 		 * processing in the middle of step 5, ack processing.
 		 * Otherwise, goto step 6.
 		 */
 		if (thflags & TH_ACK)
 			goto process_ACK;
 
 		goto step6;
 
 	/*
 	 * If the state is LAST_ACK or CLOSING or TIME_WAIT:
 	 *      do normal processing.
 	 *
 	 * NB: Leftover from RFC1644 T/TCP.  Cases to be reused later.
 	 */
 	case TCPS_LAST_ACK:
 	case TCPS_CLOSING:
 		break;  /* continue normal processing */
 	}
 
 	/*
 	 * States other than LISTEN or SYN_SENT.
 	 * First check the RST flag and sequence number since reset segments
 	 * are exempt from the timestamp and connection count tests.  This
 	 * fixes a bug introduced by the Stevens, vol. 2, p. 960 bugfix
 	 * below which allowed reset segments in half the sequence space
 	 * to fall though and be processed (which gives forged reset
 	 * segments with a random sequence number a 50 percent chance of
 	 * killing a connection).
 	 * Then check timestamp, if present.
 	 * Then check the connection count, if present.
 	 * Then check that at least some bytes of segment are within
 	 * receive window.  If segment begins before rcv_nxt,
 	 * drop leading data (and SYN); if nothing left, just ack.
 	 */
 	if (thflags & TH_RST) {
 		/*
 		 * RFC5961 Section 3.2
 		 *
 		 * - RST drops connection only if SEG.SEQ == RCV.NXT.
 		 * - If RST is in window, we send challenge ACK.
 		 *
 		 * Note: to take into account delayed ACKs, we should
 		 *   test against last_ack_sent instead of rcv_nxt.
 		 * Note 2: we handle special case of closed window, not
 		 *   covered by the RFC.
 		 */
 		if ((SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
 		    SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) ||
 		    (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) {
 
 			INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 			KASSERT(ti_locked == TI_RLOCKED,
 			    ("%s: TH_RST ti_locked %d, th %p tp %p",
 			    __func__, ti_locked, th, tp));
 			KASSERT(tp->t_state != TCPS_SYN_SENT,
 			    ("%s: TH_RST for TCPS_SYN_SENT th %p tp %p",
 			    __func__, th, tp));
 
 			if (V_tcp_insecure_rst ||
 			    tp->last_ack_sent == th->th_seq) {
 				TCPSTAT_INC(tcps_drops);
 				/* Drop the connection. */
 				switch (tp->t_state) {
 				case TCPS_SYN_RECEIVED:
 					so->so_error = ECONNREFUSED;
 					goto close;
 				case TCPS_ESTABLISHED:
 				case TCPS_FIN_WAIT_1:
 				case TCPS_FIN_WAIT_2:
 				case TCPS_CLOSE_WAIT:
 					so->so_error = ECONNRESET;
 				close:
 					tcp_state_change(tp, TCPS_CLOSED);
 					/* FALLTHROUGH */
 				default:
 					tp = tcp_close(tp);
 				}
 			} else {
 				TCPSTAT_INC(tcps_badrst);
 				/* Send challenge ACK. */
 				tcp_respond(tp, mtod(m, void *), th, m,
 				    tp->rcv_nxt, tp->snd_nxt, TH_ACK);
 				tp->last_ack_sent = tp->rcv_nxt;
 				m = NULL;
 			}
 		}
 		goto drop;
 	}
 
 	/*
 	 * RFC5961 Section 4.2
 	 * Send challenge ACK for any SYN in synchronized state.
 	 */
 	if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT &&
 	    tp->t_state != TCPS_SYN_RECEIVED) {
 		KASSERT(ti_locked == TI_RLOCKED,
 		    ("tcp_do_segment: TH_SYN ti_locked %d", ti_locked));
 		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 
 		TCPSTAT_INC(tcps_badsyn);
 		if (V_tcp_insecure_syn &&
 		    SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
 		    SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) {
 			tp = tcp_drop(tp, ECONNRESET);
 			rstreason = BANDLIM_UNLIMITED;
 		} else {
 			/* Send challenge ACK. */
 			tcp_respond(tp, mtod(m, void *), th, m, tp->rcv_nxt,
 			    tp->snd_nxt, TH_ACK);
 			tp->last_ack_sent = tp->rcv_nxt;
 			m = NULL;
 		}
 		goto drop;
 	}
 
 	/*
 	 * RFC 1323 PAWS: If we have a timestamp reply on this segment
 	 * and it's less than ts_recent, drop it.
 	 */
 	if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent &&
 	    TSTMP_LT(to.to_tsval, tp->ts_recent)) {
 
 		/* Check to see if ts_recent is over 24 days old.  */
 		if (tcp_ts_getticks() - tp->ts_recent_age > TCP_PAWS_IDLE) {
 			/*
 			 * Invalidate ts_recent.  If this segment updates
 			 * ts_recent, the age will be reset later and ts_recent
 			 * will get a valid value.  If it does not, setting
 			 * ts_recent to zero will at least satisfy the
 			 * requirement that zero be placed in the timestamp
 			 * echo reply when ts_recent isn't valid.  The
 			 * age isn't reset until we get a valid ts_recent
 			 * because we don't want out-of-order segments to be
 			 * dropped when ts_recent is old.
 			 */
 			tp->ts_recent = 0;
 		} else {
 			TCPSTAT_INC(tcps_rcvduppack);
 			TCPSTAT_ADD(tcps_rcvdupbyte, tlen);
 			TCPSTAT_INC(tcps_pawsdrop);
 			if (tlen)
 				goto dropafterack;
 			goto drop;
 		}
 	}
 
 	/*
 	 * In the SYN-RECEIVED state, validate that the packet belongs to
 	 * this connection before trimming the data to fit the receive
 	 * window.  Check the sequence number versus IRS since we know
 	 * the sequence numbers haven't wrapped.  This is a partial fix
 	 * for the "LAND" DoS attack.
 	 */
 	if (tp->t_state == TCPS_SYN_RECEIVED && SEQ_LT(th->th_seq, tp->irs)) {
 		rstreason = BANDLIM_RST_OPENPORT;
 		goto dropwithreset;
 	}
 
 	todrop = tp->rcv_nxt - th->th_seq;
 	if (todrop > 0) {
 		if (thflags & TH_SYN) {
 			thflags &= ~TH_SYN;
 			th->th_seq++;
 			if (th->th_urp > 1)
 				th->th_urp--;
 			else
 				thflags &= ~TH_URG;
 			todrop--;
 		}
 		/*
 		 * Following if statement from Stevens, vol. 2, p. 960.
 		 */
 		if (todrop > tlen
 		    || (todrop == tlen && (thflags & TH_FIN) == 0)) {
 			/*
 			 * Any valid FIN must be to the left of the window.
 			 * At this point the FIN must be a duplicate or out
 			 * of sequence; drop it.
 			 */
 			thflags &= ~TH_FIN;
 
 			/*
 			 * Send an ACK to resynchronize and drop any data.
 			 * But keep on processing for RST or ACK.
 			 */
 			tp->t_flags |= TF_ACKNOW;
 			todrop = tlen;
 			TCPSTAT_INC(tcps_rcvduppack);
 			TCPSTAT_ADD(tcps_rcvdupbyte, todrop);
 		} else {
 			TCPSTAT_INC(tcps_rcvpartduppack);
 			TCPSTAT_ADD(tcps_rcvpartdupbyte, todrop);
 		}
 		drop_hdrlen += todrop;	/* drop from the top afterwards */
 		th->th_seq += todrop;
 		tlen -= todrop;
 		if (th->th_urp > todrop)
 			th->th_urp -= todrop;
 		else {
 			thflags &= ~TH_URG;
 			th->th_urp = 0;
 		}
 	}
 
 	/*
 	 * If new data are received on a connection after the
 	 * user processes are gone, then RST the other end.
 	 */
 	if ((so->so_state & SS_NOFDREF) &&
 	    tp->t_state > TCPS_CLOSE_WAIT && tlen) {
 		KASSERT(ti_locked == TI_RLOCKED, ("%s: SS_NOFDEREF && "
 		    "CLOSE_WAIT && tlen ti_locked %d", __func__, ti_locked));
 		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data "
 			    "after socket was closed, "
 			    "sending RST and removing tcpcb\n",
 			    s, __func__, tcpstates[tp->t_state], tlen);
 			free(s, M_TCPLOG);
 		}
 		tp = tcp_close(tp);
 		TCPSTAT_INC(tcps_rcvafterclose);
 		rstreason = BANDLIM_UNLIMITED;
 		goto dropwithreset;
 	}
 
 	/*
 	 * If segment ends after window, drop trailing data
 	 * (and PUSH and FIN); if nothing left, just ACK.
 	 */
 	todrop = (th->th_seq + tlen) - (tp->rcv_nxt + tp->rcv_wnd);
 	if (todrop > 0) {
 		TCPSTAT_INC(tcps_rcvpackafterwin);
 		if (todrop >= tlen) {
 			TCPSTAT_ADD(tcps_rcvbyteafterwin, tlen);
 			/*
 			 * If window is closed can only take segments at
 			 * window edge, and have to drop data and PUSH from
 			 * incoming segments.  Continue processing, but
 			 * remember to ack.  Otherwise, drop segment
 			 * and ack.
 			 */
 			if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) {
 				tp->t_flags |= TF_ACKNOW;
 				TCPSTAT_INC(tcps_rcvwinprobe);
 			} else
 				goto dropafterack;
 		} else
 			TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop);
 		m_adj(m, -todrop);
 		tlen -= todrop;
 		thflags &= ~(TH_PUSH|TH_FIN);
 	}
 
 	/*
 	 * If last ACK falls within this segment's sequence numbers,
 	 * record its timestamp.
 	 * NOTE: 
 	 * 1) That the test incorporates suggestions from the latest
 	 *    proposal of the tcplw@cray.com list (Braden 1993/04/26).
 	 * 2) That updating only on newer timestamps interferes with
 	 *    our earlier PAWS tests, so this check should be solely
 	 *    predicated on the sequence space of this segment.
 	 * 3) That we modify the segment boundary check to be 
 	 *        Last.ACK.Sent <= SEG.SEQ + SEG.Len  
 	 *    instead of RFC1323's
 	 *        Last.ACK.Sent < SEG.SEQ + SEG.Len,
 	 *    This modified check allows us to overcome RFC1323's
 	 *    limitations as described in Stevens TCP/IP Illustrated
 	 *    Vol. 2 p.869. In such cases, we can still calculate the
 	 *    RTT correctly when RCV.NXT == Last.ACK.Sent.
 	 */
 	if ((to.to_flags & TOF_TS) != 0 &&
 	    SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
 	    SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
 		((thflags & (TH_SYN|TH_FIN)) != 0))) {
 		tp->ts_recent_age = tcp_ts_getticks();
 		tp->ts_recent = to.to_tsval;
 	}
 
 	/*
 	 * If the ACK bit is off:  if in SYN-RECEIVED state or SENDSYN
 	 * flag is on (half-synchronized state), then queue data for
 	 * later processing; else drop segment and return.
 	 */
 	if ((thflags & TH_ACK) == 0) {
 		if (tp->t_state == TCPS_SYN_RECEIVED ||
 		    (tp->t_flags & TF_NEEDSYN)) {
 #ifdef TCP_RFC7413
 			if (tp->t_state == TCPS_SYN_RECEIVED &&
 			    tp->t_flags & TF_FASTOPEN) {
 				tp->snd_wnd = tiwin;
 				cc_conn_init(tp);
 			}
 #endif
 			goto step6;
 		} else if (tp->t_flags & TF_ACKNOW)
 			goto dropafterack;
 		else
 			goto drop;
 	}
 
 	/*
 	 * Ack processing.
 	 */
 	switch (tp->t_state) {
 
 	/*
 	 * In SYN_RECEIVED state, the ack ACKs our SYN, so enter
 	 * ESTABLISHED state and continue processing.
 	 * The ACK was checked above.
 	 */
 	case TCPS_SYN_RECEIVED:
 
 		TCPSTAT_INC(tcps_connects);
 		soisconnected(so);
 		/* Do window scaling? */
 		if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
 			(TF_RCVD_SCALE|TF_REQ_SCALE)) {
 			tp->rcv_scale = tp->request_r_scale;
 			tp->snd_wnd = tiwin;
 		}
 		/*
 		 * Make transitions:
 		 *      SYN-RECEIVED  -> ESTABLISHED
 		 *      SYN-RECEIVED* -> FIN-WAIT-1
 		 */
 		tp->t_starttime = ticks;
 		if (tp->t_flags & TF_NEEDFIN) {
 			tcp_state_change(tp, TCPS_FIN_WAIT_1);
 			tp->t_flags &= ~TF_NEEDFIN;
 		} else {
 			tcp_state_change(tp, TCPS_ESTABLISHED);
 			TCP_PROBE5(accept__established, NULL, tp,
 			    mtod(m, const char *), tp, th);
 #ifdef TCP_RFC7413
 			if (tp->t_tfo_pending) {
 				tcp_fastopen_decrement_counter(tp->t_tfo_pending);
 				tp->t_tfo_pending = NULL;
 
 				/*
 				 * Account for the ACK of our SYN prior to
 				 * regular ACK processing below.
 				 */ 
 				tp->snd_una++;
 			}
 			/*
 			 * TFO connections call cc_conn_init() during SYN
 			 * processing.  Calling it again here for such
 			 * connections is not harmless as it would undo the
 			 * snd_cwnd reduction that occurs when a TFO SYN|ACK
 			 * is retransmitted.
 			 */
 			if (!(tp->t_flags & TF_FASTOPEN))
 #endif
 				cc_conn_init(tp);
 			tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
 		}
 		/*
 		 * If segment contains data or ACK, will call tcp_reass()
 		 * later; if not, do so now to pass queued data to user.
 		 */
 		if (tlen == 0 && (thflags & TH_FIN) == 0)
 			(void) tcp_reass(tp, (struct tcphdr *)0, 0,
 			    (struct mbuf *)0);
 		tp->snd_wl1 = th->th_seq - 1;
 		/* FALLTHROUGH */
 
 	/*
 	 * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
 	 * ACKs.  If the ack is in the range
 	 *	tp->snd_una < th->th_ack <= tp->snd_max
 	 * then advance tp->snd_una to th->th_ack and drop
 	 * data from the retransmission queue.  If this ACK reflects
 	 * more up to date window information we update our window information.
 	 */
 	case TCPS_ESTABLISHED:
 	case TCPS_FIN_WAIT_1:
 	case TCPS_FIN_WAIT_2:
 	case TCPS_CLOSE_WAIT:
 	case TCPS_CLOSING:
 	case TCPS_LAST_ACK:
 		if (SEQ_GT(th->th_ack, tp->snd_max)) {
 			TCPSTAT_INC(tcps_rcvacktoomuch);
 			goto dropafterack;
 		}
 		if ((tp->t_flags & TF_SACK_PERMIT) &&
 		    ((to.to_flags & TOF_SACK) ||
 		     !TAILQ_EMPTY(&tp->snd_holes)))
 			sack_changed = tcp_sack_doack(tp, &to, th->th_ack);
 		else
 			/*
 			 * Reset the value so that previous (valid) value
 			 * from the last ack with SACK doesn't get used.
 			 */
 			tp->sackhint.sacked_bytes = 0;
 
 		/* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */
 		hhook_run_tcp_est_in(tp, th, &to);
 
 		if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
+			u_int maxseg;
+
+			maxseg = tcp_maxseg(tp);
 			if (tlen == 0 &&
 			    (tiwin == tp->snd_wnd ||
 			    (tp->t_flags & TF_SACK_PERMIT))) {
 				/*
 				 * If this is the first time we've seen a
 				 * FIN from the remote, this is not a
 				 * duplicate and it needs to be processed
 				 * normally.  This happens during a
 				 * simultaneous close.
 				 */
 				if ((thflags & TH_FIN) &&
 				    (TCPS_HAVERCVDFIN(tp->t_state) == 0)) {
 					tp->t_dupacks = 0;
 					break;
 				}
 				TCPSTAT_INC(tcps_rcvdupack);
 				/*
 				 * If we have outstanding data (other than
 				 * a window probe), this is a completely
 				 * duplicate ack (ie, window info didn't
 				 * change and FIN isn't set),
 				 * the ack is the biggest we've
 				 * seen and we've seen exactly our rexmt
 				 * threshhold of them, assume a packet
 				 * has been dropped and retransmit it.
 				 * Kludge snd_nxt & the congestion
 				 * window so we send only this one
 				 * packet.
 				 *
 				 * We know we're losing at the current
 				 * window size so do congestion avoidance
 				 * (set ssthresh to half the current window
 				 * and pull our congestion window back to
 				 * the new ssthresh).
 				 *
 				 * Dup acks mean that packets have left the
 				 * network (they're now cached at the receiver)
 				 * so bump cwnd by the amount in the receiver
 				 * to keep a constant cwnd packets in the
 				 * network.
 				 *
 				 * When using TCP ECN, notify the peer that
 				 * we reduced the cwnd.
 				 */
 				/*
 				 * Following 2 kinds of acks should not affect
 				 * dupack counting:
 				 * 1) Old acks
 				 * 2) Acks with SACK but without any new SACK
 				 * information in them. These could result from
 				 * any anomaly in the network like a switch
 				 * duplicating packets or a possible DoS attack.
 				 */
 				if (th->th_ack != tp->snd_una ||
 				    ((tp->t_flags & TF_SACK_PERMIT) &&
 				    !sack_changed))
 					break;
 				else if (!tcp_timer_active(tp, TT_REXMT))
 					tp->t_dupacks = 0;
 				else if (++tp->t_dupacks > tcprexmtthresh ||
 				     IN_FASTRECOVERY(tp->t_flags)) {
 					cc_ack_received(tp, th, CC_DUPACK);
 					if ((tp->t_flags & TF_SACK_PERMIT) &&
 					    IN_FASTRECOVERY(tp->t_flags)) {
 						int awnd;
 						
 						/*
 						 * Compute the amount of data in flight first.
 						 * We can inject new data into the pipe iff 
 						 * we have less than 1/2 the original window's
 						 * worth of data in flight.
 						 */
 						if (V_tcp_do_rfc6675_pipe)
 							awnd = tcp_compute_pipe(tp);
 						else
 							awnd = (tp->snd_nxt - tp->snd_fack) +
 								tp->sackhint.sack_bytes_rexmit;
 
 						if (awnd < tp->snd_ssthresh) {
-							tp->snd_cwnd += tp->t_maxseg;
+							tp->snd_cwnd += maxseg;
 							if (tp->snd_cwnd > tp->snd_ssthresh)
 								tp->snd_cwnd = tp->snd_ssthresh;
 						}
 					} else
-						tp->snd_cwnd += tp->t_maxseg;
+						tp->snd_cwnd += maxseg;
 					(void) tp->t_fb->tfb_tcp_output(tp);
 					goto drop;
 				} else if (tp->t_dupacks == tcprexmtthresh) {
 					tcp_seq onxt = tp->snd_nxt;
 
 					/*
 					 * If we're doing sack, check to
 					 * see if we're already in sack
 					 * recovery. If we're not doing sack,
 					 * check to see if we're in newreno
 					 * recovery.
 					 */
 					if (tp->t_flags & TF_SACK_PERMIT) {
 						if (IN_FASTRECOVERY(tp->t_flags)) {
 							tp->t_dupacks = 0;
 							break;
 						}
 					} else {
 						if (SEQ_LEQ(th->th_ack,
 						    tp->snd_recover)) {
 							tp->t_dupacks = 0;
 							break;
 						}
 					}
 					/* Congestion signal before ack. */
 					cc_cong_signal(tp, th, CC_NDUPACK);
 					cc_ack_received(tp, th, CC_DUPACK);
 					tcp_timer_activate(tp, TT_REXMT, 0);
 					tp->t_rtttime = 0;
 					if (tp->t_flags & TF_SACK_PERMIT) {
 						TCPSTAT_INC(
 						    tcps_sack_recovery_episode);
 						tp->sack_newdata = tp->snd_nxt;
-						tp->snd_cwnd = tp->t_maxseg;
+						tp->snd_cwnd = maxseg;
 						(void) tp->t_fb->tfb_tcp_output(tp);
 						goto drop;
 					}
 					tp->snd_nxt = th->th_ack;
-					tp->snd_cwnd = tp->t_maxseg;
+					tp->snd_cwnd = maxseg;
 					(void) tp->t_fb->tfb_tcp_output(tp);
 					KASSERT(tp->snd_limited <= 2,
 					    ("%s: tp->snd_limited too big",
 					    __func__));
 					tp->snd_cwnd = tp->snd_ssthresh +
-					     tp->t_maxseg *
+					     maxseg *
 					     (tp->t_dupacks - tp->snd_limited);
 					if (SEQ_GT(onxt, tp->snd_nxt))
 						tp->snd_nxt = onxt;
 					goto drop;
 				} else if (V_tcp_do_rfc3042) {
 					/*
 					 * Process first and second duplicate
 					 * ACKs. Each indicates a segment
 					 * leaving the network, creating room
 					 * for more. Make sure we can send a
 					 * packet on reception of each duplicate
 					 * ACK by increasing snd_cwnd by one
 					 * segment. Restore the original
 					 * snd_cwnd after packet transmission.
 					 */
 					cc_ack_received(tp, th, CC_DUPACK);
 					u_long oldcwnd = tp->snd_cwnd;
 					tcp_seq oldsndmax = tp->snd_max;
 					u_int sent;
 					int avail;
 
 					KASSERT(tp->t_dupacks == 1 ||
 					    tp->t_dupacks == 2,
 					    ("%s: dupacks not 1 or 2",
 					    __func__));
 					if (tp->t_dupacks == 1)
 						tp->snd_limited = 0;
 					tp->snd_cwnd =
 					    (tp->snd_nxt - tp->snd_una) +
 					    (tp->t_dupacks - tp->snd_limited) *
-					    tp->t_maxseg;
+					    maxseg;
 					/*
 					 * Only call tcp_output when there
 					 * is new data available to be sent.
 					 * Otherwise we would send pure ACKs.
 					 */
 					SOCKBUF_LOCK(&so->so_snd);
 					avail = sbavail(&so->so_snd) -
 					    (tp->snd_nxt - tp->snd_una);
 					SOCKBUF_UNLOCK(&so->so_snd);
 					if (avail > 0)
 						(void) tp->t_fb->tfb_tcp_output(tp);
 					sent = tp->snd_max - oldsndmax;
-					if (sent > tp->t_maxseg) {
+					if (sent > maxseg) {
 						KASSERT((tp->t_dupacks == 2 &&
 						    tp->snd_limited == 0) ||
-						   (sent == tp->t_maxseg + 1 &&
+						   (sent == maxseg + 1 &&
 						    tp->t_flags & TF_SENTFIN),
 						    ("%s: sent too much",
 						    __func__));
 						tp->snd_limited = 2;
 					} else if (sent > 0)
 						++tp->snd_limited;
 					tp->snd_cwnd = oldcwnd;
 					goto drop;
 				}
 			}
 			break;
 		} else {
 			/*
 			 * This ack is advancing the left edge, reset the
 			 * counter.
 			 */
 			tp->t_dupacks = 0;
 			/*
 			 * If this ack also has new SACK info, increment the
 			 * counter as per rfc6675.
 			 */
 			if ((tp->t_flags & TF_SACK_PERMIT) && sack_changed)
 				tp->t_dupacks++;
 		}
 
 		KASSERT(SEQ_GT(th->th_ack, tp->snd_una),
 		    ("%s: th_ack <= snd_una", __func__));
 
 		/*
 		 * If the congestion window was inflated to account
 		 * for the other side's cached packets, retract it.
 		 */
 		if (IN_FASTRECOVERY(tp->t_flags)) {
 			if (SEQ_LT(th->th_ack, tp->snd_recover)) {
 				if (tp->t_flags & TF_SACK_PERMIT)
 					tcp_sack_partialack(tp, th);
 				else
 					tcp_newreno_partial_ack(tp, th);
 			} else
 				cc_post_recovery(tp, th);
 		}
 		/*
 		 * If we reach this point, ACK is not a duplicate,
 		 *     i.e., it ACKs something we sent.
 		 */
 		if (tp->t_flags & TF_NEEDSYN) {
 			/*
 			 * T/TCP: Connection was half-synchronized, and our
 			 * SYN has been ACK'd (so connection is now fully
 			 * synchronized).  Go to non-starred state,
 			 * increment snd_una for ACK of SYN, and check if
 			 * we can do window scaling.
 			 */
 			tp->t_flags &= ~TF_NEEDSYN;
 			tp->snd_una++;
 			/* Do window scaling? */
 			if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
 				(TF_RCVD_SCALE|TF_REQ_SCALE)) {
 				tp->rcv_scale = tp->request_r_scale;
 				/* Send window already scaled. */
 			}
 		}
 
 process_ACK:
 		INP_WLOCK_ASSERT(tp->t_inpcb);
 
 		acked = BYTES_THIS_ACK(tp, th);
 		TCPSTAT_INC(tcps_rcvackpack);
 		TCPSTAT_ADD(tcps_rcvackbyte, acked);
 
 		/*
 		 * If we just performed our first retransmit, and the ACK
 		 * arrives within our recovery window, then it was a mistake
 		 * to do the retransmit in the first place.  Recover our
 		 * original cwnd and ssthresh, and proceed to transmit where
 		 * we left off.
 		 */
 		if (tp->t_rxtshift == 1 && tp->t_flags & TF_PREVVALID &&
 		    (int)(ticks - tp->t_badrxtwin) < 0)
 			cc_cong_signal(tp, th, CC_RTO_ERR);
 
 		/*
 		 * If we have a timestamp reply, update smoothed
 		 * round trip time.  If no timestamp is present but
 		 * transmit timer is running and timed sequence
 		 * number was acked, update smoothed round trip time.
 		 * Since we now have an rtt measurement, cancel the
 		 * timer backoff (cf., Phil Karn's retransmit alg.).
 		 * Recompute the initial retransmit timer.
 		 *
 		 * Some boxes send broken timestamp replies
 		 * during the SYN+ACK phase, ignore
 		 * timestamps of 0 or we could calculate a
 		 * huge RTT and blow up the retransmit timer.
 		 */
 		if ((to.to_flags & TOF_TS) != 0 && to.to_tsecr) {
 			u_int t;
 
 			t = tcp_ts_getticks() - to.to_tsecr;
 			if (!tp->t_rttlow || tp->t_rttlow > t)
 				tp->t_rttlow = t;
 			tcp_xmit_timer(tp, TCP_TS_TO_TICKS(t) + 1);
 		} else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) {
 			if (!tp->t_rttlow || tp->t_rttlow > ticks - tp->t_rtttime)
 				tp->t_rttlow = ticks - tp->t_rtttime;
 			tcp_xmit_timer(tp, ticks - tp->t_rtttime);
 		}
 
 		/*
 		 * If all outstanding data is acked, stop retransmit
 		 * timer and remember to restart (more output or persist).
 		 * If there is more data to be acked, restart retransmit
 		 * timer, using current (possibly backed-off) value.
 		 */
 		if (th->th_ack == tp->snd_max) {
 			tcp_timer_activate(tp, TT_REXMT, 0);
 			needoutput = 1;
 		} else if (!tcp_timer_active(tp, TT_PERSIST))
 			tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
 
 		/*
 		 * If no data (only SYN) was ACK'd,
 		 *    skip rest of ACK processing.
 		 */
 		if (acked == 0)
 			goto step6;
 
 		/*
 		 * Let the congestion control algorithm update congestion
 		 * control related information. This typically means increasing
 		 * the congestion window.
 		 */
 		cc_ack_received(tp, th, CC_ACK);
 
 		SOCKBUF_LOCK(&so->so_snd);
 		if (acked > sbavail(&so->so_snd)) {
 			tp->snd_wnd -= sbavail(&so->so_snd);
 			mfree = sbcut_locked(&so->so_snd,
 			    (int)sbavail(&so->so_snd));
 			ourfinisacked = 1;
 		} else {
 			mfree = sbcut_locked(&so->so_snd, acked);
 			tp->snd_wnd -= acked;
 			ourfinisacked = 0;
 		}
 		/* NB: sowwakeup_locked() does an implicit unlock. */
 		sowwakeup_locked(so);
 		m_freem(mfree);
 		/* Detect una wraparound. */
 		if (!IN_RECOVERY(tp->t_flags) &&
 		    SEQ_GT(tp->snd_una, tp->snd_recover) &&
 		    SEQ_LEQ(th->th_ack, tp->snd_recover))
 			tp->snd_recover = th->th_ack - 1;
 		/* XXXLAS: Can this be moved up into cc_post_recovery? */
 		if (IN_RECOVERY(tp->t_flags) &&
 		    SEQ_GEQ(th->th_ack, tp->snd_recover)) {
 			EXIT_RECOVERY(tp->t_flags);
 		}
 		tp->snd_una = th->th_ack;
 		if (tp->t_flags & TF_SACK_PERMIT) {
 			if (SEQ_GT(tp->snd_una, tp->snd_recover))
 				tp->snd_recover = tp->snd_una;
 		}
 		if (SEQ_LT(tp->snd_nxt, tp->snd_una))
 			tp->snd_nxt = tp->snd_una;
 
 		switch (tp->t_state) {
 
 		/*
 		 * In FIN_WAIT_1 STATE in addition to the processing
 		 * for the ESTABLISHED state if our FIN is now acknowledged
 		 * then enter FIN_WAIT_2.
 		 */
 		case TCPS_FIN_WAIT_1:
 			if (ourfinisacked) {
 				/*
 				 * If we can't receive any more
 				 * data, then closing user can proceed.
 				 * Starting the timer is contrary to the
 				 * specification, but if we don't get a FIN
 				 * we'll hang forever.
 				 *
 				 * XXXjl:
 				 * we should release the tp also, and use a
 				 * compressed state.
 				 */
 				if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 					soisdisconnected(so);
 					tcp_timer_activate(tp, TT_2MSL,
 					    (tcp_fast_finwait2_recycle ?
 					    tcp_finwait2_timeout :
 					    TP_MAXIDLE(tp)));
 				}
 				tcp_state_change(tp, TCPS_FIN_WAIT_2);
 			}
 			break;
 
 		/*
 		 * In CLOSING STATE in addition to the processing for
 		 * the ESTABLISHED state if the ACK acknowledges our FIN
 		 * then enter the TIME-WAIT state, otherwise ignore
 		 * the segment.
 		 */
 		case TCPS_CLOSING:
 			if (ourfinisacked) {
 				INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 				tcp_twstart(tp);
 				INP_INFO_RUNLOCK(&V_tcbinfo);
 				m_freem(m);
 				return;
 			}
 			break;
 
 		/*
 		 * In LAST_ACK, we may still be waiting for data to drain
 		 * and/or to be acked, as well as for the ack of our FIN.
 		 * If our FIN is now acknowledged, delete the TCB,
 		 * enter the closed state and return.
 		 */
 		case TCPS_LAST_ACK:
 			if (ourfinisacked) {
 				INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 				tp = tcp_close(tp);
 				goto drop;
 			}
 			break;
 		}
 	}
 
 step6:
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	/*
 	 * Update window information.
 	 * Don't look at window if no ACK: TAC's send garbage on first SYN.
 	 */
 	if ((thflags & TH_ACK) &&
 	    (SEQ_LT(tp->snd_wl1, th->th_seq) ||
 	    (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) ||
 	     (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) {
 		/* keep track of pure window updates */
 		if (tlen == 0 &&
 		    tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd)
 			TCPSTAT_INC(tcps_rcvwinupd);
 		tp->snd_wnd = tiwin;
 		tp->snd_wl1 = th->th_seq;
 		tp->snd_wl2 = th->th_ack;
 		if (tp->snd_wnd > tp->max_sndwnd)
 			tp->max_sndwnd = tp->snd_wnd;
 		needoutput = 1;
 	}
 
 	/*
 	 * Process segments with URG.
 	 */
 	if ((thflags & TH_URG) && th->th_urp &&
 	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
 		/*
 		 * This is a kludge, but if we receive and accept
 		 * random urgent pointers, we'll crash in
 		 * soreceive.  It's hard to imagine someone
 		 * actually wanting to send this much urgent data.
 		 */
 		SOCKBUF_LOCK(&so->so_rcv);
 		if (th->th_urp + sbavail(&so->so_rcv) > sb_max) {
 			th->th_urp = 0;			/* XXX */
 			thflags &= ~TH_URG;		/* XXX */
 			SOCKBUF_UNLOCK(&so->so_rcv);	/* XXX */
 			goto dodata;			/* XXX */
 		}
 		/*
 		 * If this segment advances the known urgent pointer,
 		 * then mark the data stream.  This should not happen
 		 * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
 		 * a FIN has been received from the remote side.
 		 * In these states we ignore the URG.
 		 *
 		 * According to RFC961 (Assigned Protocols),
 		 * the urgent pointer points to the last octet
 		 * of urgent data.  We continue, however,
 		 * to consider it to indicate the first octet
 		 * of data past the urgent section as the original
 		 * spec states (in one of two places).
 		 */
 		if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) {
 			tp->rcv_up = th->th_seq + th->th_urp;
 			so->so_oobmark = sbavail(&so->so_rcv) +
 			    (tp->rcv_up - tp->rcv_nxt) - 1;
 			if (so->so_oobmark == 0)
 				so->so_rcv.sb_state |= SBS_RCVATMARK;
 			sohasoutofband(so);
 			tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
 		}
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		/*
 		 * Remove out of band data so doesn't get presented to user.
 		 * This can happen independent of advancing the URG pointer,
 		 * but if two URG's are pending at once, some out-of-band
 		 * data may creep in... ick.
 		 */
 		if (th->th_urp <= (u_long)tlen &&
 		    !(so->so_options & SO_OOBINLINE)) {
 			/* hdr drop is delayed */
 			tcp_pulloutofband(so, th, m, drop_hdrlen);
 		}
 	} else {
 		/*
 		 * If no out of band data is expected,
 		 * pull receive urgent pointer along
 		 * with the receive window.
 		 */
 		if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
 			tp->rcv_up = tp->rcv_nxt;
 	}
 dodata:							/* XXX */
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	/*
 	 * Process the segment text, merging it into the TCP sequencing queue,
 	 * and arranging for acknowledgment of receipt if necessary.
 	 * This process logically involves adjusting tp->rcv_wnd as data
 	 * is presented to the user (this happens in tcp_usrreq.c,
 	 * case PRU_RCVD).  If a FIN has already been received on this
 	 * connection then we just ignore the text.
 	 */
 	tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) &&
 		   (tp->t_flags & TF_FASTOPEN));
 	if ((tlen || (thflags & TH_FIN) || tfo_syn) &&
 	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
 		tcp_seq save_start = th->th_seq;
 		m_adj(m, drop_hdrlen);	/* delayed header drop */
 		/*
 		 * Insert segment which includes th into TCP reassembly queue
 		 * with control block tp.  Set thflags to whether reassembly now
 		 * includes a segment with FIN.  This handles the common case
 		 * inline (segment is the next to be received on an established
 		 * connection, and the queue is empty), avoiding linkage into
 		 * and removal from the queue and repetition of various
 		 * conversions.
 		 * Set DELACK for segments received in order, but ack
 		 * immediately when segments are out of order (so
 		 * fast retransmit can work).
 		 */
 		if (th->th_seq == tp->rcv_nxt &&
 		    LIST_EMPTY(&tp->t_segq) &&
 		    (TCPS_HAVEESTABLISHED(tp->t_state) ||
 		     tfo_syn)) {
 			if (DELAY_ACK(tp, tlen) || tfo_syn)
 				tp->t_flags |= TF_DELACK;
 			else
 				tp->t_flags |= TF_ACKNOW;
 			tp->rcv_nxt += tlen;
 			thflags = th->th_flags & TH_FIN;
 			TCPSTAT_INC(tcps_rcvpack);
 			TCPSTAT_ADD(tcps_rcvbyte, tlen);
 			SOCKBUF_LOCK(&so->so_rcv);
 			if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
 				m_freem(m);
 			else
 				sbappendstream_locked(&so->so_rcv, m, 0);
 			/* NB: sorwakeup_locked() does an implicit unlock. */
 			sorwakeup_locked(so);
 		} else {
 			/*
 			 * XXX: Due to the header drop above "th" is
 			 * theoretically invalid by now.  Fortunately
 			 * m_adj() doesn't actually frees any mbufs
 			 * when trimming from the head.
 			 */
 			thflags = tcp_reass(tp, th, &tlen, m);
 			tp->t_flags |= TF_ACKNOW;
 		}
 		if (tlen > 0 && (tp->t_flags & TF_SACK_PERMIT))
 			tcp_update_sack_list(tp, save_start, save_start + tlen);
 #if 0
 		/*
 		 * Note the amount of data that peer has sent into
 		 * our window, in order to estimate the sender's
 		 * buffer size.
 		 * XXX: Unused.
 		 */
 		if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt))
 			len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
 		else
 			len = so->so_rcv.sb_hiwat;
 #endif
 	} else {
 		m_freem(m);
 		thflags &= ~TH_FIN;
 	}
 
 	/*
 	 * If FIN is received ACK the FIN and let the user know
 	 * that the connection is closing.
 	 */
 	if (thflags & TH_FIN) {
 		if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
 			socantrcvmore(so);
 			/*
 			 * If connection is half-synchronized
 			 * (ie NEEDSYN flag on) then delay ACK,
 			 * so it may be piggybacked when SYN is sent.
 			 * Otherwise, since we received a FIN then no
 			 * more input can be expected, send ACK now.
 			 */
 			if (tp->t_flags & TF_NEEDSYN)
 				tp->t_flags |= TF_DELACK;
 			else
 				tp->t_flags |= TF_ACKNOW;
 			tp->rcv_nxt++;
 		}
 		switch (tp->t_state) {
 
 		/*
 		 * In SYN_RECEIVED and ESTABLISHED STATES
 		 * enter the CLOSE_WAIT state.
 		 */
 		case TCPS_SYN_RECEIVED:
 			tp->t_starttime = ticks;
 			/* FALLTHROUGH */
 		case TCPS_ESTABLISHED:
 			tcp_state_change(tp, TCPS_CLOSE_WAIT);
 			break;
 
 		/*
 		 * If still in FIN_WAIT_1 STATE FIN has not been acked so
 		 * enter the CLOSING state.
 		 */
 		case TCPS_FIN_WAIT_1:
 			tcp_state_change(tp, TCPS_CLOSING);
 			break;
 
 		/*
 		 * In FIN_WAIT_2 state enter the TIME_WAIT state,
 		 * starting the time-wait timer, turning off the other
 		 * standard timers.
 		 */
 		case TCPS_FIN_WAIT_2:
 			INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 			KASSERT(ti_locked == TI_RLOCKED, ("%s: dodata "
 			    "TCP_FIN_WAIT_2 ti_locked: %d", __func__,
 			    ti_locked));
 
 			tcp_twstart(tp);
 			INP_INFO_RUNLOCK(&V_tcbinfo);
 			return;
 		}
 	}
 	if (ti_locked == TI_RLOCKED)
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 	ti_locked = TI_UNLOCKED;
 
 #ifdef TCPDEBUG
 	if (so->so_options & SO_DEBUG)
 		tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen,
 			  &tcp_savetcp, 0);
 #endif
 	TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));
 
 	/*
 	 * Return any desired output.
 	 */
 	if (needoutput || (tp->t_flags & TF_ACKNOW))
 		(void) tp->t_fb->tfb_tcp_output(tp);
 
 check_delack:
 	KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d",
 	    __func__, ti_locked));
 	INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	if (tp->t_flags & TF_DELACK) {
 		tp->t_flags &= ~TF_DELACK;
 		tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
 	}
 	INP_WUNLOCK(tp->t_inpcb);
 	return;
 
 dropafterack:
 	/*
 	 * Generate an ACK dropping incoming segment if it occupies
 	 * sequence space, where the ACK reflects our state.
 	 *
 	 * We can now skip the test for the RST flag since all
 	 * paths to this code happen after packets containing
 	 * RST have been dropped.
 	 *
 	 * In the SYN-RECEIVED state, don't send an ACK unless the
 	 * segment we received passes the SYN-RECEIVED ACK test.
 	 * If it fails send a RST.  This breaks the loop in the
 	 * "LAND" DoS attack, and also prevents an ACK storm
 	 * between two listening ports that have been sent forged
 	 * SYN segments, each with the source address of the other.
 	 */
 	if (tp->t_state == TCPS_SYN_RECEIVED && (thflags & TH_ACK) &&
 	    (SEQ_GT(tp->snd_una, th->th_ack) ||
 	     SEQ_GT(th->th_ack, tp->snd_max)) ) {
 		rstreason = BANDLIM_RST_OPENPORT;
 		goto dropwithreset;
 	}
 #ifdef TCPDEBUG
 	if (so->so_options & SO_DEBUG)
 		tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
 			  &tcp_savetcp, 0);
 #endif
 	TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));
 	if (ti_locked == TI_RLOCKED)
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 	ti_locked = TI_UNLOCKED;
 
 	tp->t_flags |= TF_ACKNOW;
 	(void) tp->t_fb->tfb_tcp_output(tp);
 	INP_WUNLOCK(tp->t_inpcb);
 	m_freem(m);
 	return;
 
 dropwithreset:
 	if (ti_locked == TI_RLOCKED)
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 	ti_locked = TI_UNLOCKED;
 
 	if (tp != NULL) {
 		tcp_dropwithreset(m, th, tp, tlen, rstreason);
 		INP_WUNLOCK(tp->t_inpcb);
 	} else
 		tcp_dropwithreset(m, th, NULL, tlen, rstreason);
 	return;
 
 drop:
 	if (ti_locked == TI_RLOCKED) {
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		ti_locked = TI_UNLOCKED;
 	}
 #ifdef INVARIANTS
 	else
 		INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 #endif
 
 	/*
 	 * Drop space held by incoming segment and return.
 	 */
 #ifdef TCPDEBUG
 	if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
 		tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
 			  &tcp_savetcp, 0);
 #endif
 	TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));
 	if (tp != NULL)
 		INP_WUNLOCK(tp->t_inpcb);
 	m_freem(m);
 }
 
 /*
  * Issue RST and make ACK acceptable to originator of segment.
  * The mbuf must still include the original packet header.
  * tp may be NULL.
  */
 void
 tcp_dropwithreset(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp,
     int tlen, int rstreason)
 {
 #ifdef INET
 	struct ip *ip;
 #endif
 #ifdef INET6
 	struct ip6_hdr *ip6;
 #endif
 
 	if (tp != NULL) {
 		INP_WLOCK_ASSERT(tp->t_inpcb);
 	}
 
 	/* Don't bother if destination was broadcast/multicast. */
 	if ((th->th_flags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST))
 		goto drop;
 #ifdef INET6
 	if (mtod(m, struct ip *)->ip_v == 6) {
 		ip6 = mtod(m, struct ip6_hdr *);
 		if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
 		    IN6_IS_ADDR_MULTICAST(&ip6->ip6_src))
 			goto drop;
 		/* IPv6 anycast check is done at tcp6_input() */
 	}
 #endif
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 	{
 		ip = mtod(m, struct ip *);
 		if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
 		    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
 		    ip->ip_src.s_addr == htonl(INADDR_BROADCAST) ||
 		    in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))
 			goto drop;
 	}
 #endif
 
 	/* Perform bandwidth limiting. */
 	if (badport_bandlim(rstreason) < 0)
 		goto drop;
 
 	/* tcp_respond consumes the mbuf chain. */
 	if (th->th_flags & TH_ACK) {
 		tcp_respond(tp, mtod(m, void *), th, m, (tcp_seq)0,
 		    th->th_ack, TH_RST);
 	} else {
 		if (th->th_flags & TH_SYN)
 			tlen++;
 		tcp_respond(tp, mtod(m, void *), th, m, th->th_seq+tlen,
 		    (tcp_seq)0, TH_RST|TH_ACK);
 	}
 	return;
 drop:
 	m_freem(m);
 }
 
 /*
  * Parse TCP options and place in tcpopt.
  */
 void
 tcp_dooptions(struct tcpopt *to, u_char *cp, int cnt, int flags)
 {
 	int opt, optlen;
 
 	to->to_flags = 0;
 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
 		opt = cp[0];
 		if (opt == TCPOPT_EOL)
 			break;
 		if (opt == TCPOPT_NOP)
 			optlen = 1;
 		else {
 			if (cnt < 2)
 				break;
 			optlen = cp[1];
 			if (optlen < 2 || optlen > cnt)
 				break;
 		}
 		switch (opt) {
 		case TCPOPT_MAXSEG:
 			if (optlen != TCPOLEN_MAXSEG)
 				continue;
 			if (!(flags & TO_SYN))
 				continue;
 			to->to_flags |= TOF_MSS;
 			bcopy((char *)cp + 2,
 			    (char *)&to->to_mss, sizeof(to->to_mss));
 			to->to_mss = ntohs(to->to_mss);
 			break;
 		case TCPOPT_WINDOW:
 			if (optlen != TCPOLEN_WINDOW)
 				continue;
 			if (!(flags & TO_SYN))
 				continue;
 			to->to_flags |= TOF_SCALE;
 			to->to_wscale = min(cp[2], TCP_MAX_WINSHIFT);
 			break;
 		case TCPOPT_TIMESTAMP:
 			if (optlen != TCPOLEN_TIMESTAMP)
 				continue;
 			to->to_flags |= TOF_TS;
 			bcopy((char *)cp + 2,
 			    (char *)&to->to_tsval, sizeof(to->to_tsval));
 			to->to_tsval = ntohl(to->to_tsval);
 			bcopy((char *)cp + 6,
 			    (char *)&to->to_tsecr, sizeof(to->to_tsecr));
 			to->to_tsecr = ntohl(to->to_tsecr);
 			break;
 #ifdef TCP_SIGNATURE
 		/*
 		 * XXX In order to reply to a host which has set the
 		 * TCP_SIGNATURE option in its initial SYN, we have to
 		 * record the fact that the option was observed here
 		 * for the syncache code to perform the correct response.
 		 */
 		case TCPOPT_SIGNATURE:
 			if (optlen != TCPOLEN_SIGNATURE)
 				continue;
 			to->to_flags |= TOF_SIGNATURE;
 			to->to_signature = cp + 2;
 			break;
 #endif
 		case TCPOPT_SACK_PERMITTED:
 			if (optlen != TCPOLEN_SACK_PERMITTED)
 				continue;
 			if (!(flags & TO_SYN))
 				continue;
 			if (!V_tcp_do_sack)
 				continue;
 			to->to_flags |= TOF_SACKPERM;
 			break;
 		case TCPOPT_SACK:
 			if (optlen <= 2 || (optlen - 2) % TCPOLEN_SACK != 0)
 				continue;
 			if (flags & TO_SYN)
 				continue;
 			to->to_flags |= TOF_SACK;
 			to->to_nsacks = (optlen - 2) / TCPOLEN_SACK;
 			to->to_sacks = cp + 2;
 			TCPSTAT_INC(tcps_sack_rcv_blocks);
 			break;
 #ifdef TCP_RFC7413
 		case TCPOPT_FAST_OPEN:
 			if ((optlen != TCPOLEN_FAST_OPEN_EMPTY) &&
 			    (optlen < TCPOLEN_FAST_OPEN_MIN) &&
 			    (optlen > TCPOLEN_FAST_OPEN_MAX))
 				continue;
 			if (!(flags & TO_SYN))
 				continue;
 			if (!V_tcp_fastopen_enabled)
 				continue;
 			to->to_flags |= TOF_FASTOPEN;
 			to->to_tfo_len = optlen - 2;
 			to->to_tfo_cookie = to->to_tfo_len ? cp + 2 : NULL;
 			break;
 #endif
 		default:
 			continue;
 		}
 	}
 }
 
 /*
  * Pull out of band byte out of a segment so
  * it doesn't appear in the user's data queue.
  * It is still reflected in the segment length for
  * sequencing purposes.
  */
 void
 tcp_pulloutofband(struct socket *so, struct tcphdr *th, struct mbuf *m,
     int off)
 {
 	int cnt = off + th->th_urp - 1;
 
 	while (cnt >= 0) {
 		if (m->m_len > cnt) {
 			char *cp = mtod(m, caddr_t) + cnt;
 			struct tcpcb *tp = sototcpcb(so);
 
 			INP_WLOCK_ASSERT(tp->t_inpcb);
 
 			tp->t_iobc = *cp;
 			tp->t_oobflags |= TCPOOB_HAVEDATA;
 			bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1));
 			m->m_len--;
 			if (m->m_flags & M_PKTHDR)
 				m->m_pkthdr.len--;
 			return;
 		}
 		cnt -= m->m_len;
 		m = m->m_next;
 		if (m == NULL)
 			break;
 	}
 	panic("tcp_pulloutofband");
 }
 
 /*
  * Collect new round-trip time estimate
  * and update averages and current timeout.
  */
 void
 tcp_xmit_timer(struct tcpcb *tp, int rtt)
 {
 	int delta;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	TCPSTAT_INC(tcps_rttupdated);
 	tp->t_rttupdated++;
 	if (tp->t_srtt != 0) {
 		/*
 		 * srtt is stored as fixed point with 5 bits after the
 		 * binary point (i.e., scaled by 8).  The following magic
 		 * is equivalent to the smoothing algorithm in rfc793 with
 		 * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
 		 * point).  Adjust rtt to origin 0.
 		 */
 		delta = ((rtt - 1) << TCP_DELTA_SHIFT)
 			- (tp->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT));
 
 		if ((tp->t_srtt += delta) <= 0)
 			tp->t_srtt = 1;
 
 		/*
 		 * We accumulate a smoothed rtt variance (actually, a
 		 * smoothed mean difference), then set the retransmit
 		 * timer to smoothed rtt + 4 times the smoothed variance.
 		 * rttvar is stored as fixed point with 4 bits after the
 		 * binary point (scaled by 16).  The following is
 		 * equivalent to rfc793 smoothing with an alpha of .75
 		 * (rttvar = rttvar*3/4 + |delta| / 4).  This replaces
 		 * rfc793's wired-in beta.
 		 */
 		if (delta < 0)
 			delta = -delta;
 		delta -= tp->t_rttvar >> (TCP_RTTVAR_SHIFT - TCP_DELTA_SHIFT);
 		if ((tp->t_rttvar += delta) <= 0)
 			tp->t_rttvar = 1;
 		if (tp->t_rttbest > tp->t_srtt + tp->t_rttvar)
 		    tp->t_rttbest = tp->t_srtt + tp->t_rttvar;
 	} else {
 		/*
 		 * No rtt measurement yet - use the unsmoothed rtt.
 		 * Set the variance to half the rtt (so our first
 		 * retransmit happens at 3*rtt).
 		 */
 		tp->t_srtt = rtt << TCP_RTT_SHIFT;
 		tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
 		tp->t_rttbest = tp->t_srtt + tp->t_rttvar;
 	}
 	tp->t_rtttime = 0;
 	tp->t_rxtshift = 0;
 
 	/*
 	 * the retransmit should happen at rtt + 4 * rttvar.
 	 * Because of the way we do the smoothing, srtt and rttvar
 	 * will each average +1/2 tick of bias.  When we compute
 	 * the retransmit timer, we want 1/2 tick of rounding and
 	 * 1 extra tick because of +-1/2 tick uncertainty in the
 	 * firing of the timer.  The bias will give us exactly the
 	 * 1.5 tick we need.  But, because the bias is
 	 * statistical, we have to test that we don't drop below
 	 * the minimum feasible timer (which is 2 ticks).
 	 */
 	TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
 		      max(tp->t_rttmin, rtt + 2), TCPTV_REXMTMAX);
 
 	/*
 	 * We received an ack for a packet that wasn't retransmitted;
 	 * it is probably safe to discard any error indications we've
 	 * received recently.  This isn't quite right, but close enough
 	 * for now (a route might have failed after we sent a segment,
 	 * and the return path might not be symmetrical).
 	 */
 	tp->t_softerror = 0;
 }
 
 /*
  * Determine a reasonable value for maxseg size.
  * If the route is known, check route for mtu.
  * If none, use an mss that can be handled on the outgoing interface
  * without forcing IP to fragment.  If no route is found, route has no mtu,
  * or the destination isn't local, use a default, hopefully conservative
  * size (usually 512 or the default IP max size, but no more than the mtu
  * of the interface), as we can't discover anything about intervening
  * gateways or networks.  We also initialize the congestion/slow start
  * window to be a single segment if the destination isn't local.
  * While looking at the routing entry, we also initialize other path-dependent
  * parameters from pre-set or cached values in the routing entry.
  *
- * Also take into account the space needed for options that we
- * send regularly.  Make maxseg shorter by that amount to assure
- * that we can send maxseg amount of data even when the options
- * are present.  Store the upper limit of the length of options plus
- * data in maxopd.
+ * NOTE that resulting t_maxseg doesn't include space for TCP options or
+ * IP options, e.g. IPSEC data, since length of this data may vary, and
+ * thus it is calculated for every segment separately in tcp_output().
  *
  * NOTE that this routine is only called when we process an incoming
  * segment, or an ICMP need fragmentation datagram. Outgoing SYN/ACK MSS
  * settings are handled in tcp_mssopt().
  */
 void
 tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer,
     struct hc_metrics_lite *metricptr, struct tcp_ifcap *cap)
 {
 	int mss = 0;
 	u_long maxmtu = 0;
 	struct inpcb *inp = tp->t_inpcb;
 	struct hc_metrics_lite metrics;
-	int origoffer;
 #ifdef INET6
 	int isipv6 = ((inp->inp_vflag & INP_IPV6) != 0) ? 1 : 0;
 	size_t min_protoh = isipv6 ?
 			    sizeof (struct ip6_hdr) + sizeof (struct tcphdr) :
 			    sizeof (struct tcpiphdr);
 #else
 	const size_t min_protoh = sizeof(struct tcpiphdr);
 #endif
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	if (mtuoffer != -1) {
 		KASSERT(offer == -1, ("%s: conflict", __func__));
 		offer = mtuoffer - min_protoh;
 	}
-	origoffer = offer;
 
 	/* Initialize. */
 #ifdef INET6
 	if (isipv6) {
 		maxmtu = tcp_maxmtu6(&inp->inp_inc, cap);
-		tp->t_maxopd = tp->t_maxseg = V_tcp_v6mssdflt;
+		tp->t_maxseg = V_tcp_v6mssdflt;
 	}
 #endif
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 	{
 		maxmtu = tcp_maxmtu(&inp->inp_inc, cap);
-		tp->t_maxopd = tp->t_maxseg = V_tcp_mssdflt;
+		tp->t_maxseg = V_tcp_mssdflt;
 	}
 #endif
 
 	/*
 	 * No route to sender, stay with default mss and return.
 	 */
 	if (maxmtu == 0) {
 		/*
 		 * In case we return early we need to initialize metrics
 		 * to a defined state as tcp_hc_get() would do for us
 		 * if there was no cache hit.
 		 */
 		if (metricptr != NULL)
 			bzero(metricptr, sizeof(struct hc_metrics_lite));
 		return;
 	}
 
 	/* What have we got? */
 	switch (offer) {
 		case 0:
 			/*
 			 * Offer == 0 means that there was no MSS on the SYN
 			 * segment, in this case we use tcp_mssdflt as
-			 * already assigned to t_maxopd above.
+			 * already assigned to t_maxseg above.
 			 */
-			offer = tp->t_maxopd;
+			offer = tp->t_maxseg;
 			break;
 
 		case -1:
 			/*
 			 * Offer == -1 means that we didn't receive SYN yet.
 			 */
 			/* FALLTHROUGH */
 
 		default:
 			/*
 			 * Prevent DoS attack with too small MSS. Round up
 			 * to at least minmss.
 			 */
 			offer = max(offer, V_tcp_minmss);
 	}
 
 	/*
 	 * rmx information is now retrieved from tcp_hostcache.
 	 */
 	tcp_hc_get(&inp->inp_inc, &metrics);
 	if (metricptr != NULL)
 		bcopy(&metrics, metricptr, sizeof(struct hc_metrics_lite));
 
 	/*
 	 * If there's a discovered mtu in tcp hostcache, use it.
 	 * Else, use the link mtu.
 	 */
 	if (metrics.rmx_mtu)
 		mss = min(metrics.rmx_mtu, maxmtu) - min_protoh;
 	else {
 #ifdef INET6
 		if (isipv6) {
 			mss = maxmtu - min_protoh;
 			if (!V_path_mtu_discovery &&
 			    !in6_localaddr(&inp->in6p_faddr))
 				mss = min(mss, V_tcp_v6mssdflt);
 		}
 #endif
 #if defined(INET) && defined(INET6)
 		else
 #endif
 #ifdef INET
 		{
 			mss = maxmtu - min_protoh;
 			if (!V_path_mtu_discovery &&
 			    !in_localaddr(inp->inp_faddr))
 				mss = min(mss, V_tcp_mssdflt);
 		}
 #endif
 		/*
 		 * XXX - The above conditional (mss = maxmtu - min_protoh)
 		 * probably violates the TCP spec.
 		 * The problem is that, since we don't know the
 		 * other end's MSS, we are supposed to use a conservative
 		 * default.  But, if we do that, then MTU discovery will
 		 * never actually take place, because the conservative
 		 * default is much less than the MTUs typically seen
 		 * on the Internet today.  For the moment, we'll sweep
 		 * this under the carpet.
 		 *
 		 * The conservative default might not actually be a problem
 		 * if the only case this occurs is when sending an initial
 		 * SYN with options and data to a host we've never talked
 		 * to before.  Then, they will reply with an MSS value which
 		 * will get recorded and the new parameters should get
 		 * recomputed.  For Further Study.
 		 */
 	}
 	mss = min(mss, offer);
 
 	/*
-	 * Sanity check: make sure that maxopd will be large
+	 * Sanity check: make sure that maxseg will be large
 	 * enough to allow some data on segments even if the
 	 * all the option space is used (40bytes).  Otherwise
 	 * funny things may happen in tcp_output.
+	 *
+	 * XXXGL: shouldn't we reserve space for IP/IPv6 options?
 	 */
 	mss = max(mss, 64);
 
-	/*
-	 * maxopd stores the maximum length of data AND options
-	 * in a segment; maxseg is the amount of data in a normal
-	 * segment.  We need to store this value (maxopd) apart
-	 * from maxseg, because now every segment carries options
-	 * and thus we normally have somewhat less data in segments.
-	 */
-	tp->t_maxopd = mss;
-
-	/*
-	 * origoffer==-1 indicates that no segments were received yet.
-	 * In this case we just guess.
-	 */
-	if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
-	    (origoffer == -1 ||
-	     (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP))
-		mss -= TCPOLEN_TSTAMP_APPA;
-
 	tp->t_maxseg = mss;
 }
 
 void
 tcp_mss(struct tcpcb *tp, int offer)
 {
 	int mss;
 	u_long bufsize;
 	struct inpcb *inp;
 	struct socket *so;
 	struct hc_metrics_lite metrics;
 	struct tcp_ifcap cap;
 
 	KASSERT(tp != NULL, ("%s: tp == NULL", __func__));
 
 	bzero(&cap, sizeof(cap));
 	tcp_mss_update(tp, offer, -1, &metrics, &cap);
 
 	mss = tp->t_maxseg;
 	inp = tp->t_inpcb;
 
 	/*
 	 * If there's a pipesize, change the socket buffer to that size,
 	 * don't change if sb_hiwat is different than default (then it
 	 * has been changed on purpose with setsockopt).
 	 * Make the socket buffers an integral number of mss units;
 	 * if the mss is larger than the socket buffer, decrease the mss.
 	 */
 	so = inp->inp_socket;
 	SOCKBUF_LOCK(&so->so_snd);
 	if ((so->so_snd.sb_hiwat == V_tcp_sendspace) && metrics.rmx_sendpipe)
 		bufsize = metrics.rmx_sendpipe;
 	else
 		bufsize = so->so_snd.sb_hiwat;
 	if (bufsize < mss)
 		mss = bufsize;
 	else {
 		bufsize = roundup(bufsize, mss);
 		if (bufsize > sb_max)
 			bufsize = sb_max;
 		if (bufsize > so->so_snd.sb_hiwat)
 			(void)sbreserve_locked(&so->so_snd, bufsize, so, NULL);
 	}
 	SOCKBUF_UNLOCK(&so->so_snd);
 	tp->t_maxseg = mss;
 
 	SOCKBUF_LOCK(&so->so_rcv);
 	if ((so->so_rcv.sb_hiwat == V_tcp_recvspace) && metrics.rmx_recvpipe)
 		bufsize = metrics.rmx_recvpipe;
 	else
 		bufsize = so->so_rcv.sb_hiwat;
 	if (bufsize > mss) {
 		bufsize = roundup(bufsize, mss);
 		if (bufsize > sb_max)
 			bufsize = sb_max;
 		if (bufsize > so->so_rcv.sb_hiwat)
 			(void)sbreserve_locked(&so->so_rcv, bufsize, so, NULL);
 	}
 	SOCKBUF_UNLOCK(&so->so_rcv);
 
 	/* Check the interface for TSO capabilities. */
 	if (cap.ifcap & CSUM_TSO) {
 		tp->t_flags |= TF_TSO;
 		tp->t_tsomax = cap.tsomax;
 		tp->t_tsomaxsegcount = cap.tsomaxsegcount;
 		tp->t_tsomaxsegsize = cap.tsomaxsegsize;
 	}
 }
 
 /*
  * Determine the MSS option to send on an outgoing SYN.
  */
 int
 tcp_mssopt(struct in_conninfo *inc)
 {
 	int mss = 0;
 	u_long maxmtu = 0;
 	u_long thcmtu = 0;
 	size_t min_protoh;
 
 	KASSERT(inc != NULL, ("tcp_mssopt with NULL in_conninfo pointer"));
 
 #ifdef INET6
 	if (inc->inc_flags & INC_ISIPV6) {
 		mss = V_tcp_v6mssdflt;
 		maxmtu = tcp_maxmtu6(inc, NULL);
 		min_protoh = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
 	}
 #endif
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 	{
 		mss = V_tcp_mssdflt;
 		maxmtu = tcp_maxmtu(inc, NULL);
 		min_protoh = sizeof(struct tcpiphdr);
 	}
 #endif
 #if defined(INET6) || defined(INET)
 	thcmtu = tcp_hc_getmtu(inc); /* IPv4 and IPv6 */
 #endif
 
 	if (maxmtu && thcmtu)
 		mss = min(maxmtu, thcmtu) - min_protoh;
 	else if (maxmtu || thcmtu)
 		mss = max(maxmtu, thcmtu) - min_protoh;
 
 	return (mss);
 }
 
 
 /*
  * On a partial ack arrives, force the retransmission of the
  * next unacknowledged segment.  Do not clear tp->t_dupacks.
  * By setting snd_nxt to ti_ack, this forces retransmission timer to
  * be started again.
  */
 void
 tcp_newreno_partial_ack(struct tcpcb *tp, struct tcphdr *th)
 {
 	tcp_seq onxt = tp->snd_nxt;
-	u_long  ocwnd = tp->snd_cwnd;
+	u_long ocwnd = tp->snd_cwnd;
+	u_int maxseg = tcp_maxseg(tp);
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	tcp_timer_activate(tp, TT_REXMT, 0);
 	tp->t_rtttime = 0;
 	tp->snd_nxt = th->th_ack;
 	/*
 	 * Set snd_cwnd to one segment beyond acknowledged offset.
 	 * (tp->snd_una has not yet been updated when this function is called.)
 	 */
-	tp->snd_cwnd = tp->t_maxseg + BYTES_THIS_ACK(tp, th);
+	tp->snd_cwnd = maxseg + BYTES_THIS_ACK(tp, th);
 	tp->t_flags |= TF_ACKNOW;
 	(void) tp->t_fb->tfb_tcp_output(tp);
 	tp->snd_cwnd = ocwnd;
 	if (SEQ_GT(onxt, tp->snd_nxt))
 		tp->snd_nxt = onxt;
 	/*
 	 * Partial window deflation.  Relies on fact that tp->snd_una
 	 * not updated yet.
 	 */
 	if (tp->snd_cwnd > BYTES_THIS_ACK(tp, th))
 		tp->snd_cwnd -= BYTES_THIS_ACK(tp, th);
 	else
 		tp->snd_cwnd = 0;
-	tp->snd_cwnd += tp->t_maxseg;
+	tp->snd_cwnd += maxseg;
 }
 
 int
 tcp_compute_pipe(struct tcpcb *tp)
 {
 	return (tp->snd_max - tp->snd_una +
 		tp->sackhint.sack_bytes_rexmit -
 		tp->sackhint.sacked_bytes);
 }
Index: projects/release-pkg/sys/netinet/tcp_output.c
===================================================================
--- projects/release-pkg/sys/netinet/tcp_output.c	(revision 293335)
+++ projects/release-pkg/sys/netinet/tcp_output.c	(revision 293336)
@@ -1,1807 +1,1807 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)tcp_output.c	8.4 (Berkeley) 5/24/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_tcpdebug.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/domain.h>
 #include <sys/hhook.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/protosw.h>
 #include <sys/sdt.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/cc.h>
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #ifdef INET6
 #include <netinet6/in6_pcb.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #endif
 #ifdef TCP_RFC7413
 #include <netinet/tcp_fastopen.h>
 #endif
 #define	TCPOUTFLAGS
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcpip.h>
 #ifdef TCPPCAP
 #include <netinet/tcp_pcap.h>
 #endif
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif
 #ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
 #endif
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #endif /*IPSEC*/
 
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
 VNET_DEFINE(int, path_mtu_discovery) = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, path_mtu_discovery, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(path_mtu_discovery), 1,
 	"Enable Path MTU Discovery");
 
 VNET_DEFINE(int, tcp_do_tso) = 1;
 #define	V_tcp_do_tso		VNET(tcp_do_tso)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(tcp_do_tso), 0,
 	"Enable TCP Segmentation Offload");
 
 VNET_DEFINE(int, tcp_sendspace) = 1024*32;
 #define	V_tcp_sendspace	VNET(tcp_sendspace)
 SYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(tcp_sendspace), 0, "Initial send socket buffer size");
 
 VNET_DEFINE(int, tcp_do_autosndbuf) = 1;
 #define	V_tcp_do_autosndbuf	VNET(tcp_do_autosndbuf)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(tcp_do_autosndbuf), 0,
 	"Enable automatic send buffer sizing");
 
 VNET_DEFINE(int, tcp_autosndbuf_inc) = 8*1024;
 #define	V_tcp_autosndbuf_inc	VNET(tcp_autosndbuf_inc)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_inc, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(tcp_autosndbuf_inc), 0,
 	"Incrementor step size of automatic send buffer");
 
 VNET_DEFINE(int, tcp_autosndbuf_max) = 2*1024*1024;
 #define	V_tcp_autosndbuf_max	VNET(tcp_autosndbuf_max)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_max, CTLFLAG_VNET | CTLFLAG_RW,
 	&VNET_NAME(tcp_autosndbuf_max), 0,
 	"Max size of automatic send buffer");
 
 static void inline	hhook_run_tcp_est_out(struct tcpcb *tp,
 			    struct tcphdr *th, struct tcpopt *to,
 			    long len, int tso);
 static void inline	cc_after_idle(struct tcpcb *tp);
 
 /*
  * Wrapper for the TCP established output helper hook.
  */
 static void inline
 hhook_run_tcp_est_out(struct tcpcb *tp, struct tcphdr *th,
     struct tcpopt *to, long len, int tso)
 {
 	struct tcp_hhook_data hhook_data;
 
 	if (V_tcp_hhh[HHOOK_TCP_EST_OUT]->hhh_nhooks > 0) {
 		hhook_data.tp = tp;
 		hhook_data.th = th;
 		hhook_data.to = to;
 		hhook_data.len = len;
 		hhook_data.tso = tso;
 
 		hhook_run_hooks(V_tcp_hhh[HHOOK_TCP_EST_OUT], &hhook_data,
 		    tp->osd);
 	}
 }
 
 /*
  * CC wrapper hook functions
  */
 static void inline
 cc_after_idle(struct tcpcb *tp)
 {
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	if (CC_ALGO(tp)->after_idle != NULL)
 		CC_ALGO(tp)->after_idle(tp->ccv);
 }
 
 /*
  * Tcp output routine: figure out what should be sent and send it.
  */
 int
 tcp_output(struct tcpcb *tp)
 {
 	struct socket *so = tp->t_inpcb->inp_socket;
 	long len, recwin, sendwin;
 	int off, flags, error = 0;	/* Keep compiler happy */
 	struct mbuf *m;
 	struct ip *ip = NULL;
 	struct ipovly *ipov = NULL;
 	struct tcphdr *th;
 	u_char opt[TCP_MAXOLEN];
 	unsigned ipoptlen, optlen, hdrlen;
 #ifdef IPSEC
 	unsigned ipsec_optlen = 0;
 #endif
 	int idle, sendalot;
 	int sack_rxmit, sack_bytes_rxmt;
 	struct sackhole *p;
 	int tso, mtu;
 	struct tcpopt to;
 #if 0
 	int maxburst = TCP_MAXBURST;
 #endif
 #ifdef INET6
 	struct ip6_hdr *ip6 = NULL;
 	int isipv6;
 
 	isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0;
 #endif
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 #ifdef TCP_OFFLOAD
 	if (tp->t_flags & TF_TOE)
 		return (tcp_offload_output(tp));
 #endif
 
 #ifdef TCP_RFC7413
 	/*
 	 * For TFO connections in SYN_RECEIVED, only allow the initial
 	 * SYN|ACK and those sent by the retransmit timer.
 	 */
 	if ((tp->t_flags & TF_FASTOPEN) &&
 	    (tp->t_state == TCPS_SYN_RECEIVED) &&
 	    SEQ_GT(tp->snd_max, tp->snd_una) &&    /* inital SYN|ACK sent */
 	    (tp->snd_nxt != tp->snd_una))          /* not a retransmit */
 		return (0);
 #endif
 	/*
 	 * Determine length of data that should be transmitted,
 	 * and flags that will be used.
 	 * If there is some data or critical controls (SYN, RST)
 	 * to send, then transmit; otherwise, investigate further.
 	 */
 	idle = (tp->t_flags & TF_LASTIDLE) || (tp->snd_max == tp->snd_una);
 	if (idle && ticks - tp->t_rcvtime >= tp->t_rxtcur)
 		cc_after_idle(tp);
 	tp->t_flags &= ~TF_LASTIDLE;
 	if (idle) {
 		if (tp->t_flags & TF_MORETOCOME) {
 			tp->t_flags |= TF_LASTIDLE;
 			idle = 0;
 		}
 	}
 again:
 	/*
 	 * If we've recently taken a timeout, snd_max will be greater than
 	 * snd_nxt.  There may be SACK information that allows us to avoid
 	 * resending already delivered data.  Adjust snd_nxt accordingly.
 	 */
 	if ((tp->t_flags & TF_SACK_PERMIT) &&
 	    SEQ_LT(tp->snd_nxt, tp->snd_max))
 		tcp_sack_adjust(tp);
 	sendalot = 0;
 	tso = 0;
 	mtu = 0;
 	off = tp->snd_nxt - tp->snd_una;
 	sendwin = min(tp->snd_wnd, tp->snd_cwnd);
 
 	flags = tcp_outflags[tp->t_state];
 	/*
 	 * Send any SACK-generated retransmissions.  If we're explicitly trying
 	 * to send out new data (when sendalot is 1), bypass this function.
 	 * If we retransmit in fast recovery mode, decrement snd_cwnd, since
 	 * we're replacing a (future) new transmission with a retransmission
 	 * now, and we previously incremented snd_cwnd in tcp_input().
 	 */
 	/*
 	 * Still in sack recovery , reset rxmit flag to zero.
 	 */
 	sack_rxmit = 0;
 	sack_bytes_rxmt = 0;
 	len = 0;
 	p = NULL;
 	if ((tp->t_flags & TF_SACK_PERMIT) && IN_FASTRECOVERY(tp->t_flags) &&
 	    (p = tcp_sack_output(tp, &sack_bytes_rxmt))) {
 		long cwin;
 		
 		cwin = min(tp->snd_wnd, tp->snd_cwnd) - sack_bytes_rxmt;
 		if (cwin < 0)
 			cwin = 0;
 		/* Do not retransmit SACK segments beyond snd_recover */
 		if (SEQ_GT(p->end, tp->snd_recover)) {
 			/*
 			 * (At least) part of sack hole extends beyond
 			 * snd_recover. Check to see if we can rexmit data
 			 * for this hole.
 			 */
 			if (SEQ_GEQ(p->rxmit, tp->snd_recover)) {
 				/*
 				 * Can't rexmit any more data for this hole.
 				 * That data will be rexmitted in the next
 				 * sack recovery episode, when snd_recover
 				 * moves past p->rxmit.
 				 */
 				p = NULL;
 				goto after_sack_rexmit;
 			} else
 				/* Can rexmit part of the current hole */
 				len = ((long)ulmin(cwin,
 						   tp->snd_recover - p->rxmit));
 		} else
 			len = ((long)ulmin(cwin, p->end - p->rxmit));
 		off = p->rxmit - tp->snd_una;
 		KASSERT(off >= 0,("%s: sack block to the left of una : %d",
 		    __func__, off));
 		if (len > 0) {
 			sack_rxmit = 1;
 			sendalot = 1;
 			TCPSTAT_INC(tcps_sack_rexmits);
 			TCPSTAT_ADD(tcps_sack_rexmit_bytes,
 			    min(len, tp->t_maxseg));
 		}
 	}
 after_sack_rexmit:
 	/*
 	 * Get standard flags, and add SYN or FIN if requested by 'hidden'
 	 * state flags.
 	 */
 	if (tp->t_flags & TF_NEEDFIN)
 		flags |= TH_FIN;
 	if (tp->t_flags & TF_NEEDSYN)
 		flags |= TH_SYN;
 
 	SOCKBUF_LOCK(&so->so_snd);
 	/*
 	 * If in persist timeout with window of 0, send 1 byte.
 	 * Otherwise, if window is small but nonzero
 	 * and timer expired, we will send what we can
 	 * and go to transmit state.
 	 */
 	if (tp->t_flags & TF_FORCEDATA) {
 		if (sendwin == 0) {
 			/*
 			 * If we still have some data to send, then
 			 * clear the FIN bit.  Usually this would
 			 * happen below when it realizes that we
 			 * aren't sending all the data.  However,
 			 * if we have exactly 1 byte of unsent data,
 			 * then it won't clear the FIN bit below,
 			 * and if we are in persist state, we wind
 			 * up sending the packet without recording
 			 * that we sent the FIN bit.
 			 *
 			 * We can't just blindly clear the FIN bit,
 			 * because if we don't have any more data
 			 * to send then the probe will be the FIN
 			 * itself.
 			 */
 			if (off < sbused(&so->so_snd))
 				flags &= ~TH_FIN;
 			sendwin = 1;
 		} else {
 			tcp_timer_activate(tp, TT_PERSIST, 0);
 			tp->t_rxtshift = 0;
 		}
 	}
 
 	/*
 	 * If snd_nxt == snd_max and we have transmitted a FIN, the
 	 * offset will be > 0 even if so_snd.sb_cc is 0, resulting in
 	 * a negative length.  This can also occur when TCP opens up
 	 * its congestion window while receiving additional duplicate
 	 * acks after fast-retransmit because TCP will reset snd_nxt
 	 * to snd_max after the fast-retransmit.
 	 *
 	 * In the normal retransmit-FIN-only case, however, snd_nxt will
 	 * be set to snd_una, the offset will be 0, and the length may
 	 * wind up 0.
 	 *
 	 * If sack_rxmit is true we are retransmitting from the scoreboard
 	 * in which case len is already set.
 	 */
 	if (sack_rxmit == 0) {
 		if (sack_bytes_rxmt == 0)
 			len = ((long)ulmin(sbavail(&so->so_snd), sendwin) -
 			    off);
 		else {
 			long cwin;
 
                         /*
 			 * We are inside of a SACK recovery episode and are
 			 * sending new data, having retransmitted all the
 			 * data possible in the scoreboard.
 			 */
 			len = ((long)ulmin(sbavail(&so->so_snd), tp->snd_wnd) -
 			    off);
 			/*
 			 * Don't remove this (len > 0) check !
 			 * We explicitly check for len > 0 here (although it 
 			 * isn't really necessary), to work around a gcc 
 			 * optimization issue - to force gcc to compute
 			 * len above. Without this check, the computation
 			 * of len is bungled by the optimizer.
 			 */
 			if (len > 0) {
 				cwin = tp->snd_cwnd - 
 					(tp->snd_nxt - tp->sack_newdata) -
 					sack_bytes_rxmt;
 				if (cwin < 0)
 					cwin = 0;
 				len = lmin(len, cwin);
 			}
 		}
 	}
 
 	/*
 	 * Lop off SYN bit if it has already been sent.  However, if this
 	 * is SYN-SENT state and if segment contains data and if we don't
 	 * know that foreign host supports TAO, suppress sending segment.
 	 */
 	if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
 		if (tp->t_state != TCPS_SYN_RECEIVED)
 			flags &= ~TH_SYN;
 #ifdef TCP_RFC7413
 		/*
 		 * When sending additional segments following a TFO SYN|ACK,
 		 * do not include the SYN bit.
 		 */
 		if ((tp->t_flags & TF_FASTOPEN) &&
 		    (tp->t_state == TCPS_SYN_RECEIVED))
 			flags &= ~TH_SYN;
 #endif
 		off--, len++;
 	}
 
 	/*
 	 * Be careful not to send data and/or FIN on SYN segments.
 	 * This measure is needed to prevent interoperability problems
 	 * with not fully conformant TCP implementations.
 	 */
 	if ((flags & TH_SYN) && (tp->t_flags & TF_NOOPT)) {
 		len = 0;
 		flags &= ~TH_FIN;
 	}
 
 #ifdef TCP_RFC7413
 	/*
 	 * When retransmitting SYN|ACK on a passively-created TFO socket,
 	 * don't include data, as the presence of data may have caused the
 	 * original SYN|ACK to have been dropped by a middlebox.
 	 */
 	if ((tp->t_flags & TF_FASTOPEN) &&
 	    (((tp->t_state == TCPS_SYN_RECEIVED) && (tp->t_rxtshift > 0)) ||
 	     (flags & TH_RST)))
 		len = 0;
 #endif
 	if (len <= 0) {
 		/*
 		 * If FIN has been sent but not acked,
 		 * but we haven't been called to retransmit,
 		 * len will be < 0.  Otherwise, window shrank
 		 * after we sent into it.  If window shrank to 0,
 		 * cancel pending retransmit, pull snd_nxt back
 		 * to (closed) window, and set the persist timer
 		 * if it isn't already going.  If the window didn't
 		 * close completely, just wait for an ACK.
 		 *
 		 * We also do a general check here to ensure that
 		 * we will set the persist timer when we have data
 		 * to send, but a 0-byte window. This makes sure
 		 * the persist timer is set even if the packet
 		 * hits one of the "goto send" lines below.
 		 */
 		len = 0;
 		if ((sendwin == 0) && (TCPS_HAVEESTABLISHED(tp->t_state)) &&
 			(off < (int) sbavail(&so->so_snd))) {
 			tcp_timer_activate(tp, TT_REXMT, 0);
 			tp->t_rxtshift = 0;
 			tp->snd_nxt = tp->snd_una;
 			if (!tcp_timer_active(tp, TT_PERSIST))
 				tcp_setpersist(tp);
 		}
 	}
 
 	/* len will be >= 0 after this point. */
 	KASSERT(len >= 0, ("[%s:%d]: len < 0", __func__, __LINE__));
 
 	/*
 	 * Automatic sizing of send socket buffer.  Often the send buffer
 	 * size is not optimally adjusted to the actual network conditions
 	 * at hand (delay bandwidth product).  Setting the buffer size too
 	 * small limits throughput on links with high bandwidth and high
 	 * delay (eg. trans-continental/oceanic links).  Setting the
 	 * buffer size too big consumes too much real kernel memory,
 	 * especially with many connections on busy servers.
 	 *
 	 * The criteria to step up the send buffer one notch are:
 	 *  1. receive window of remote host is larger than send buffer
 	 *     (with a fudge factor of 5/4th);
 	 *  2. send buffer is filled to 7/8th with data (so we actually
 	 *     have data to make use of it);
 	 *  3. send buffer fill has not hit maximal automatic size;
 	 *  4. our send window (slow start and cogestion controlled) is
 	 *     larger than sent but unacknowledged data in send buffer.
 	 *
 	 * The remote host receive window scaling factor may limit the
 	 * growing of the send buffer before it reaches its allowed
 	 * maximum.
 	 *
 	 * It scales directly with slow start or congestion window
 	 * and does at most one step per received ACK.  This fast
 	 * scaling has the drawback of growing the send buffer beyond
 	 * what is strictly necessary to make full use of a given
 	 * delay*bandwith product.  However testing has shown this not
 	 * to be much of an problem.  At worst we are trading wasting
 	 * of available bandwith (the non-use of it) for wasting some
 	 * socket buffer memory.
 	 *
 	 * TODO: Shrink send buffer during idle periods together
 	 * with congestion window.  Requires another timer.  Has to
 	 * wait for upcoming tcp timer rewrite.
 	 *
 	 * XXXGL: should there be used sbused() or sbavail()?
 	 */
 	if (V_tcp_do_autosndbuf && so->so_snd.sb_flags & SB_AUTOSIZE) {
 		if ((tp->snd_wnd / 4 * 5) >= so->so_snd.sb_hiwat &&
 		    sbused(&so->so_snd) >= (so->so_snd.sb_hiwat / 8 * 7) &&
 		    sbused(&so->so_snd) < V_tcp_autosndbuf_max &&
 		    sendwin >= (sbused(&so->so_snd) -
 		    (tp->snd_nxt - tp->snd_una))) {
 			if (!sbreserve_locked(&so->so_snd,
 			    min(so->so_snd.sb_hiwat + V_tcp_autosndbuf_inc,
 			     V_tcp_autosndbuf_max), so, curthread))
 				so->so_snd.sb_flags &= ~SB_AUTOSIZE;
 		}
 	}
 
 	/*
 	 * Decide if we can use TCP Segmentation Offloading (if supported by
 	 * hardware).
 	 *
 	 * TSO may only be used if we are in a pure bulk sending state.  The
 	 * presence of TCP-MD5, SACK retransmits, SACK advertizements and
 	 * IP options prevent using TSO.  With TSO the TCP header is the same
 	 * (except for the sequence number) for all generated packets.  This
 	 * makes it impossible to transmit any options which vary per generated
 	 * segment or packet.
 	 */
 #ifdef IPSEC
 	/*
 	 * Pre-calculate here as we save another lookup into the darknesses
 	 * of IPsec that way and can actually decide if TSO is ok.
 	 */
 	ipsec_optlen = ipsec_hdrsiz_tcp(tp);
 #endif
 	if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > tp->t_maxseg &&
 	    ((tp->t_flags & TF_SIGNATURE) == 0) &&
 	    tp->rcv_numsacks == 0 && sack_rxmit == 0 &&
 #ifdef IPSEC
 	    ipsec_optlen == 0 &&
 #endif
 	    tp->t_inpcb->inp_options == NULL &&
 	    tp->t_inpcb->in6p_options == NULL)
 		tso = 1;
 
 	if (sack_rxmit) {
 		if (SEQ_LT(p->rxmit + len, tp->snd_una + sbused(&so->so_snd)))
 			flags &= ~TH_FIN;
 	} else {
 		if (SEQ_LT(tp->snd_nxt + len, tp->snd_una +
 		    sbused(&so->so_snd)))
 			flags &= ~TH_FIN;
 	}
 
 	recwin = sbspace(&so->so_rcv);
 
 	/*
 	 * Sender silly window avoidance.   We transmit under the following
 	 * conditions when len is non-zero:
 	 *
 	 *	- We have a full segment (or more with TSO)
 	 *	- This is the last buffer in a write()/send() and we are
 	 *	  either idle or running NODELAY
 	 *	- we've timed out (e.g. persist timer)
 	 *	- we have more then 1/2 the maximum send window's worth of
 	 *	  data (receiver may be limited the window size)
 	 *	- we need to retransmit
 	 */
 	if (len) {
 		if (len >= tp->t_maxseg)
 			goto send;
 		/*
 		 * NOTE! on localhost connections an 'ack' from the remote
 		 * end may occur synchronously with the output and cause
 		 * us to flush a buffer queued with moretocome.  XXX
 		 *
 		 * note: the len + off check is almost certainly unnecessary.
 		 */
 		if (!(tp->t_flags & TF_MORETOCOME) &&	/* normal case */
 		    (idle || (tp->t_flags & TF_NODELAY)) &&
 		    len + off >= sbavail(&so->so_snd) &&
 		    (tp->t_flags & TF_NOPUSH) == 0) {
 			goto send;
 		}
 		if (tp->t_flags & TF_FORCEDATA)		/* typ. timeout case */
 			goto send;
 		if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0)
 			goto send;
 		if (SEQ_LT(tp->snd_nxt, tp->snd_max))	/* retransmit case */
 			goto send;
 		if (sack_rxmit)
 			goto send;
 	}
 
 	/*
 	 * Sending of standalone window updates.
 	 *
 	 * Window updates are important when we close our window due to a
 	 * full socket buffer and are opening it again after the application
 	 * reads data from it.  Once the window has opened again and the
 	 * remote end starts to send again the ACK clock takes over and
 	 * provides the most current window information.
 	 *
 	 * We must avoid the silly window syndrome whereas every read
 	 * from the receive buffer, no matter how small, causes a window
 	 * update to be sent.  We also should avoid sending a flurry of
 	 * window updates when the socket buffer had queued a lot of data
 	 * and the application is doing small reads.
 	 *
 	 * Prevent a flurry of pointless window updates by only sending
 	 * an update when we can increase the advertized window by more
 	 * than 1/4th of the socket buffer capacity.  When the buffer is
 	 * getting full or is very small be more aggressive and send an
 	 * update whenever we can increase by two mss sized segments.
 	 * In all other situations the ACK's to new incoming data will
 	 * carry further window increases.
 	 *
 	 * Don't send an independent window update if a delayed
 	 * ACK is pending (it will get piggy-backed on it) or the
 	 * remote side already has done a half-close and won't send
 	 * more data.  Skip this if the connection is in T/TCP
 	 * half-open state.
 	 */
 	if (recwin > 0 && !(tp->t_flags & TF_NEEDSYN) &&
 	    !(tp->t_flags & TF_DELACK) &&
 	    !TCPS_HAVERCVDFIN(tp->t_state)) {
 		/*
 		 * "adv" is the amount we could increase the window,
 		 * taking into account that we are limited by
 		 * TCP_MAXWIN << tp->rcv_scale.
 		 */
 		long adv;
 		int oldwin;
 
 		adv = min(recwin, (long)TCP_MAXWIN << tp->rcv_scale);
 		if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) {
 			oldwin = (tp->rcv_adv - tp->rcv_nxt);
 			adv -= oldwin;
 		} else
 			oldwin = 0;
 
 		/* 
 		 * If the new window size ends up being the same as the old
 		 * size when it is scaled, then don't force a window update.
 		 */
 		if (oldwin >> tp->rcv_scale == (adv + oldwin) >> tp->rcv_scale)
 			goto dontupdate;
 
 		if (adv >= (long)(2 * tp->t_maxseg) &&
 		    (adv >= (long)(so->so_rcv.sb_hiwat / 4) ||
 		     recwin <= (long)(so->so_rcv.sb_hiwat / 8) ||
 		     so->so_rcv.sb_hiwat <= 8 * tp->t_maxseg))
 			goto send;
 	}
 dontupdate:
 
 	/*
 	 * Send if we owe the peer an ACK, RST, SYN, or urgent data.  ACKNOW
 	 * is also a catch-all for the retransmit timer timeout case.
 	 */
 	if (tp->t_flags & TF_ACKNOW)
 		goto send;
 	if ((flags & TH_RST) ||
 	    ((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0))
 		goto send;
 	if (SEQ_GT(tp->snd_up, tp->snd_una))
 		goto send;
 	/*
 	 * If our state indicates that FIN should be sent
 	 * and we have not yet done so, then we need to send.
 	 */
 	if (flags & TH_FIN &&
 	    ((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una))
 		goto send;
 	/*
 	 * In SACK, it is possible for tcp_output to fail to send a segment
 	 * after the retransmission timer has been turned off.  Make sure
 	 * that the retransmission timer is set.
 	 */
 	if ((tp->t_flags & TF_SACK_PERMIT) &&
 	    SEQ_GT(tp->snd_max, tp->snd_una) &&
 	    !tcp_timer_active(tp, TT_REXMT) &&
 	    !tcp_timer_active(tp, TT_PERSIST)) {
 		tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
 		goto just_return;
 	} 
 	/*
 	 * TCP window updates are not reliable, rather a polling protocol
 	 * using ``persist'' packets is used to insure receipt of window
 	 * updates.  The three ``states'' for the output side are:
 	 *	idle			not doing retransmits or persists
 	 *	persisting		to move a small or zero window
 	 *	(re)transmitting	and thereby not persisting
 	 *
 	 * tcp_timer_active(tp, TT_PERSIST)
 	 *	is true when we are in persist state.
 	 * (tp->t_flags & TF_FORCEDATA)
 	 *	is set when we are called to send a persist packet.
 	 * tcp_timer_active(tp, TT_REXMT)
 	 *	is set when we are retransmitting
 	 * The output side is idle when both timers are zero.
 	 *
 	 * If send window is too small, there is data to transmit, and no
 	 * retransmit or persist is pending, then go to persist state.
 	 * If nothing happens soon, send when timer expires:
 	 * if window is nonzero, transmit what we can,
 	 * otherwise force out a byte.
 	 */
 	if (sbavail(&so->so_snd) && !tcp_timer_active(tp, TT_REXMT) &&
 	    !tcp_timer_active(tp, TT_PERSIST)) {
 		tp->t_rxtshift = 0;
 		tcp_setpersist(tp);
 	}
 
 	/*
 	 * No reason to send a segment, just return.
 	 */
 just_return:
 	SOCKBUF_UNLOCK(&so->so_snd);
 	return (0);
 
 send:
 	SOCKBUF_LOCK_ASSERT(&so->so_snd);
 	if (len > 0) {
 		if (len >= tp->t_maxseg)
 			tp->t_flags2 |= TF2_PLPMTU_MAXSEGSNT;
 		else
 			tp->t_flags2 &= ~TF2_PLPMTU_MAXSEGSNT;
 	}
 	/*
 	 * Before ESTABLISHED, force sending of initial options
 	 * unless TCP set not to do any options.
 	 * NOTE: we assume that the IP/TCP header plus TCP options
 	 * always fit in a single mbuf, leaving room for a maximum
 	 * link header, i.e.
 	 *	max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MCLBYTES
 	 */
 	optlen = 0;
 #ifdef INET6
 	if (isipv6)
 		hdrlen = sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
 	else
 #endif
 		hdrlen = sizeof (struct tcpiphdr);
 
 	/*
 	 * Compute options for segment.
 	 * We only have to care about SYN and established connection
 	 * segments.  Options for SYN-ACK segments are handled in TCP
 	 * syncache.
 	 */
 	if ((tp->t_flags & TF_NOOPT) == 0) {
 		to.to_flags = 0;
 		/* Maximum segment size. */
 		if (flags & TH_SYN) {
 			tp->snd_nxt = tp->iss;
 			to.to_mss = tcp_mssopt(&tp->t_inpcb->inp_inc);
 			to.to_flags |= TOF_MSS;
 #ifdef TCP_RFC7413
 			/*
 			 * Only include the TFO option on the first
 			 * transmission of the SYN|ACK on a
 			 * passively-created TFO socket, as the presence of
 			 * the TFO option may have caused the original
 			 * SYN|ACK to have been dropped by a middlebox.
 			 */
 			if ((tp->t_flags & TF_FASTOPEN) &&
 			    (tp->t_state == TCPS_SYN_RECEIVED) &&
 			    (tp->t_rxtshift == 0)) {
 				to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN;
 				to.to_tfo_cookie = (u_char *)&tp->t_tfo_cookie;
 				to.to_flags |= TOF_FASTOPEN;
 			}
 #endif
 		}
 		/* Window scaling. */
 		if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) {
 			to.to_wscale = tp->request_r_scale;
 			to.to_flags |= TOF_SCALE;
 		}
 		/* Timestamps. */
 		if ((tp->t_flags & TF_RCVD_TSTMP) ||
 		    ((flags & TH_SYN) && (tp->t_flags & TF_REQ_TSTMP))) {
 			to.to_tsval = tcp_ts_getticks() + tp->ts_offset;
 			to.to_tsecr = tp->ts_recent;
 			to.to_flags |= TOF_TS;
 			/* Set receive buffer autosizing timestamp. */
 			if (tp->rfbuf_ts == 0 &&
 			    (so->so_rcv.sb_flags & SB_AUTOSIZE))
 				tp->rfbuf_ts = tcp_ts_getticks();
 		}
 		/* Selective ACK's. */
 		if (tp->t_flags & TF_SACK_PERMIT) {
 			if (flags & TH_SYN)
 				to.to_flags |= TOF_SACKPERM;
 			else if (TCPS_HAVEESTABLISHED(tp->t_state) &&
 			    (tp->t_flags & TF_SACK_PERMIT) &&
 			    tp->rcv_numsacks > 0) {
 				to.to_flags |= TOF_SACK;
 				to.to_nsacks = tp->rcv_numsacks;
 				to.to_sacks = (u_char *)tp->sackblks;
 			}
 		}
 #ifdef TCP_SIGNATURE
 		/* TCP-MD5 (RFC2385). */
 		if (tp->t_flags & TF_SIGNATURE)
 			to.to_flags |= TOF_SIGNATURE;
 #endif /* TCP_SIGNATURE */
 
 		/* Processing the options. */
 		hdrlen += optlen = tcp_addoptions(&to, opt);
 	}
 
 #ifdef INET6
 	if (isipv6)
 		ipoptlen = ip6_optlen(tp->t_inpcb);
 	else
 #endif
 	if (tp->t_inpcb->inp_options)
 		ipoptlen = tp->t_inpcb->inp_options->m_len -
 				offsetof(struct ipoption, ipopt_list);
 	else
 		ipoptlen = 0;
 #ifdef IPSEC
 	ipoptlen += ipsec_optlen;
 #endif
 
 	/*
 	 * Adjust data length if insertion of options will
-	 * bump the packet length beyond the t_maxopd length.
+	 * bump the packet length beyond the t_maxseg length.
 	 * Clear the FIN bit because we cut off the tail of
 	 * the segment.
 	 */
-	if (len + optlen + ipoptlen > tp->t_maxopd) {
+	if (len + optlen + ipoptlen > tp->t_maxseg) {
 		flags &= ~TH_FIN;
 
 		if (tso) {
 			u_int if_hw_tsomax;
 			u_int if_hw_tsomaxsegcount;
 			u_int if_hw_tsomaxsegsize;
 			struct mbuf *mb;
 			u_int moff;
 			int max_len;
 
 			/* extract TSO information */
 			if_hw_tsomax = tp->t_tsomax;
 			if_hw_tsomaxsegcount = tp->t_tsomaxsegcount;
 			if_hw_tsomaxsegsize = tp->t_tsomaxsegsize;
 
 			/*
 			 * Limit a TSO burst to prevent it from
 			 * overflowing or exceeding the maximum length
 			 * allowed by the network interface:
 			 */
 			KASSERT(ipoptlen == 0,
 			    ("%s: TSO can't do IP options", __func__));
 
 			/*
 			 * Check if we should limit by maximum payload
 			 * length:
 			 */
 			if (if_hw_tsomax != 0) {
 				/* compute maximum TSO length */
 				max_len = (if_hw_tsomax - hdrlen -
 				    max_linkhdr);
 				if (max_len <= 0) {
 					len = 0;
 				} else if (len > max_len) {
 					sendalot = 1;
 					len = max_len;
 				}
 			}
 
 			/*
 			 * Check if we should limit by maximum segment
 			 * size and count:
 			 */
 			if (if_hw_tsomaxsegcount != 0 &&
 			    if_hw_tsomaxsegsize != 0) {
 				/*
 				 * Subtract one segment for the LINK
 				 * and TCP/IP headers mbuf that will
 				 * be prepended to this mbuf chain
 				 * after the code in this section
 				 * limits the number of mbufs in the
 				 * chain to if_hw_tsomaxsegcount.
 				 */
 				if_hw_tsomaxsegcount -= 1;
 				max_len = 0;
 				mb = sbsndmbuf(&so->so_snd, off, &moff);
 
 				while (mb != NULL && max_len < len) {
 					u_int mlen;
 					u_int frags;
 
 					/*
 					 * Get length of mbuf fragment
 					 * and how many hardware frags,
 					 * rounded up, it would use:
 					 */
 					mlen = (mb->m_len - moff);
 					frags = howmany(mlen,
 					    if_hw_tsomaxsegsize);
 
 					/* Handle special case: Zero Length Mbuf */
 					if (frags == 0)
 						frags = 1;
 
 					/*
 					 * Check if the fragment limit
 					 * will be reached or exceeded:
 					 */
 					if (frags >= if_hw_tsomaxsegcount) {
 						max_len += min(mlen,
 						    if_hw_tsomaxsegcount *
 						    if_hw_tsomaxsegsize);
 						break;
 					}
 					max_len += mlen;
 					if_hw_tsomaxsegcount -= frags;
 					moff = 0;
 					mb = mb->m_next;
 				}
 				if (max_len <= 0) {
 					len = 0;
 				} else if (len > max_len) {
 					sendalot = 1;
 					len = max_len;
 				}
 			}
 
 			/*
 			 * Prevent the last segment from being
 			 * fractional unless the send sockbuf can be
 			 * emptied:
 			 */
-			max_len = (tp->t_maxopd - optlen);
+			max_len = (tp->t_maxseg - optlen);
 			if ((off + len) < sbavail(&so->so_snd)) {
 				moff = len % max_len;
 				if (moff != 0) {
 					len -= moff;
 					sendalot = 1;
 				}
 			}
 
 			/*
 			 * In case there are too many small fragments
 			 * don't use TSO:
 			 */
 			if (len <= max_len) {
 				len = max_len;
 				sendalot = 1;
 				tso = 0;
 			}
 
 			/*
 			 * Send the FIN in a separate segment
 			 * after the bulk sending is done.
 			 * We don't trust the TSO implementations
 			 * to clear the FIN flag on all but the
 			 * last segment.
 			 */
 			if (tp->t_flags & TF_NEEDFIN)
 				sendalot = 1;
 
 		} else {
-			len = tp->t_maxopd - optlen - ipoptlen;
+			len = tp->t_maxseg - optlen - ipoptlen;
 			sendalot = 1;
 		}
 	} else
 		tso = 0;
 
 	KASSERT(len + hdrlen + ipoptlen <= IP_MAXPACKET,
 	    ("%s: len > IP_MAXPACKET", __func__));
 
 /*#ifdef DIAGNOSTIC*/
 #ifdef INET6
 	if (max_linkhdr + hdrlen > MCLBYTES)
 #else
 	if (max_linkhdr + hdrlen > MHLEN)
 #endif
 		panic("tcphdr too big");
 /*#endif*/
 
 	/*
 	 * This KASSERT is here to catch edge cases at a well defined place.
 	 * Before, those had triggered (random) panic conditions further down.
 	 */
 	KASSERT(len >= 0, ("[%s:%d]: len < 0", __func__, __LINE__));
 
 	/*
 	 * Grab a header mbuf, attaching a copy of data to
 	 * be transmitted, and initialize the header from
 	 * the template for sends on this connection.
 	 */
 	if (len) {
 		struct mbuf *mb;
 		u_int moff;
 
 		if ((tp->t_flags & TF_FORCEDATA) && len == 1)
 			TCPSTAT_INC(tcps_sndprobe);
 		else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) {
 			tp->t_sndrexmitpack++;
 			TCPSTAT_INC(tcps_sndrexmitpack);
 			TCPSTAT_ADD(tcps_sndrexmitbyte, len);
 		} else {
 			TCPSTAT_INC(tcps_sndpack);
 			TCPSTAT_ADD(tcps_sndbyte, len);
 		}
 #ifdef INET6
 		if (MHLEN < hdrlen + max_linkhdr)
 			m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
 		else
 #endif
 			m = m_gethdr(M_NOWAIT, MT_DATA);
 
 		if (m == NULL) {
 			SOCKBUF_UNLOCK(&so->so_snd);
 			error = ENOBUFS;
 			sack_rxmit = 0;
 			goto out;
 		}
 
 		m->m_data += max_linkhdr;
 		m->m_len = hdrlen;
 
 		/*
 		 * Start the m_copy functions from the closest mbuf
 		 * to the offset in the socket buffer chain.
 		 */
 		mb = sbsndptr(&so->so_snd, off, len, &moff);
 
 		if (len <= MHLEN - hdrlen - max_linkhdr) {
 			m_copydata(mb, moff, (int)len,
 			    mtod(m, caddr_t) + hdrlen);
 			m->m_len += len;
 		} else {
 			m->m_next = m_copy(mb, moff, (int)len);
 			if (m->m_next == NULL) {
 				SOCKBUF_UNLOCK(&so->so_snd);
 				(void) m_free(m);
 				error = ENOBUFS;
 				sack_rxmit = 0;
 				goto out;
 			}
 		}
 
 		/*
 		 * If we're sending everything we've got, set PUSH.
 		 * (This will keep happy those implementations which only
 		 * give data to the user when a buffer fills or
 		 * a PUSH comes in.)
 		 */
 		if ((off + len == sbused(&so->so_snd)) && !(flags & TH_SYN))
 			flags |= TH_PUSH;
 		SOCKBUF_UNLOCK(&so->so_snd);
 	} else {
 		SOCKBUF_UNLOCK(&so->so_snd);
 		if (tp->t_flags & TF_ACKNOW)
 			TCPSTAT_INC(tcps_sndacks);
 		else if (flags & (TH_SYN|TH_FIN|TH_RST))
 			TCPSTAT_INC(tcps_sndctrl);
 		else if (SEQ_GT(tp->snd_up, tp->snd_una))
 			TCPSTAT_INC(tcps_sndurg);
 		else
 			TCPSTAT_INC(tcps_sndwinup);
 
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m == NULL) {
 			error = ENOBUFS;
 			sack_rxmit = 0;
 			goto out;
 		}
 #ifdef INET6
 		if (isipv6 && (MHLEN < hdrlen + max_linkhdr) &&
 		    MHLEN >= hdrlen) {
 			M_ALIGN(m, hdrlen);
 		} else
 #endif
 		m->m_data += max_linkhdr;
 		m->m_len = hdrlen;
 	}
 	SOCKBUF_UNLOCK_ASSERT(&so->so_snd);
 	m->m_pkthdr.rcvif = (struct ifnet *)0;
 #ifdef MAC
 	mac_inpcb_create_mbuf(tp->t_inpcb, m);
 #endif
 #ifdef INET6
 	if (isipv6) {
 		ip6 = mtod(m, struct ip6_hdr *);
 		th = (struct tcphdr *)(ip6 + 1);
 		tcpip_fillheaders(tp->t_inpcb, ip6, th);
 	} else
 #endif /* INET6 */
 	{
 		ip = mtod(m, struct ip *);
 		ipov = (struct ipovly *)ip;
 		th = (struct tcphdr *)(ip + 1);
 		tcpip_fillheaders(tp->t_inpcb, ip, th);
 	}
 
 	/*
 	 * Fill in fields, remembering maximum advertised
 	 * window for use in delaying messages about window sizes.
 	 * If resending a FIN, be sure not to use a new sequence number.
 	 */
 	if (flags & TH_FIN && tp->t_flags & TF_SENTFIN &&
 	    tp->snd_nxt == tp->snd_max)
 		tp->snd_nxt--;
 	/*
 	 * If we are starting a connection, send ECN setup
 	 * SYN packet. If we are on a retransmit, we may
 	 * resend those bits a number of times as per
 	 * RFC 3168.
 	 */
 	if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
 		if (tp->t_rxtshift >= 1) {
 			if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
 				flags |= TH_ECE|TH_CWR;
 		} else
 			flags |= TH_ECE|TH_CWR;
 	}
 	
 	if (tp->t_state == TCPS_ESTABLISHED &&
 	    (tp->t_flags & TF_ECN_PERMIT)) {
 		/*
 		 * If the peer has ECN, mark data packets with
 		 * ECN capable transmission (ECT).
 		 * Ignore pure ack packets, retransmissions and window probes.
 		 */
 		if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
 		    !((tp->t_flags & TF_FORCEDATA) && len == 1)) {
 #ifdef INET6
 			if (isipv6)
 				ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
 			else
 #endif
 				ip->ip_tos |= IPTOS_ECN_ECT0;
 			TCPSTAT_INC(tcps_ecn_ect0);
 		}
 		
 		/*
 		 * Reply with proper ECN notifications.
 		 */
 		if (tp->t_flags & TF_ECN_SND_CWR) {
 			flags |= TH_CWR;
 			tp->t_flags &= ~TF_ECN_SND_CWR;
 		} 
 		if (tp->t_flags & TF_ECN_SND_ECE)
 			flags |= TH_ECE;
 	}
 	
 	/*
 	 * If we are doing retransmissions, then snd_nxt will
 	 * not reflect the first unsent octet.  For ACK only
 	 * packets, we do not want the sequence number of the
 	 * retransmitted packet, we want the sequence number
 	 * of the next unsent octet.  So, if there is no data
 	 * (and no SYN or FIN), use snd_max instead of snd_nxt
 	 * when filling in ti_seq.  But if we are in persist
 	 * state, snd_max might reflect one byte beyond the
 	 * right edge of the window, so use snd_nxt in that
 	 * case, since we know we aren't doing a retransmission.
 	 * (retransmit and persist are mutually exclusive...)
 	 */
 	if (sack_rxmit == 0) {
 		if (len || (flags & (TH_SYN|TH_FIN)) ||
 		    tcp_timer_active(tp, TT_PERSIST))
 			th->th_seq = htonl(tp->snd_nxt);
 		else
 			th->th_seq = htonl(tp->snd_max);
 	} else {
 		th->th_seq = htonl(p->rxmit);
 		p->rxmit += len;
 		tp->sackhint.sack_bytes_rexmit += len;
 	}
 	th->th_ack = htonl(tp->rcv_nxt);
 	if (optlen) {
 		bcopy(opt, th + 1, optlen);
 		th->th_off = (sizeof (struct tcphdr) + optlen) >> 2;
 	}
 	th->th_flags = flags;
 	/*
 	 * Calculate receive window.  Don't shrink window,
 	 * but avoid silly window syndrome.
 	 */
 	if (recwin < (long)(so->so_rcv.sb_hiwat / 4) &&
 	    recwin < (long)tp->t_maxseg)
 		recwin = 0;
 	if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt) &&
 	    recwin < (long)(tp->rcv_adv - tp->rcv_nxt))
 		recwin = (long)(tp->rcv_adv - tp->rcv_nxt);
 	if (recwin > (long)TCP_MAXWIN << tp->rcv_scale)
 		recwin = (long)TCP_MAXWIN << tp->rcv_scale;
 
 	/*
 	 * According to RFC1323 the window field in a SYN (i.e., a <SYN>
 	 * or <SYN,ACK>) segment itself is never scaled.  The <SYN,ACK>
 	 * case is handled in syncache.
 	 */
 	if (flags & TH_SYN)
 		th->th_win = htons((u_short)
 				(min(sbspace(&so->so_rcv), TCP_MAXWIN)));
 	else
 		th->th_win = htons((u_short)(recwin >> tp->rcv_scale));
 
 	/*
 	 * Adjust the RXWIN0SENT flag - indicate that we have advertised
 	 * a 0 window.  This may cause the remote transmitter to stall.  This
 	 * flag tells soreceive() to disable delayed acknowledgements when
 	 * draining the buffer.  This can occur if the receiver is attempting
 	 * to read more data than can be buffered prior to transmitting on
 	 * the connection.
 	 */
 	if (th->th_win == 0) {
 		tp->t_sndzerowin++;
 		tp->t_flags |= TF_RXWIN0SENT;
 	} else
 		tp->t_flags &= ~TF_RXWIN0SENT;
 	if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
 		th->th_urp = htons((u_short)(tp->snd_up - tp->snd_nxt));
 		th->th_flags |= TH_URG;
 	} else
 		/*
 		 * If no urgent pointer to send, then we pull
 		 * the urgent pointer to the left edge of the send window
 		 * so that it doesn't drift into the send window on sequence
 		 * number wraparound.
 		 */
 		tp->snd_up = tp->snd_una;		/* drag it along */
 
 #ifdef TCP_SIGNATURE
 	if (tp->t_flags & TF_SIGNATURE) {
 		int sigoff = to.to_signature - opt;
 		tcp_signature_compute(m, 0, len, optlen,
 		    (u_char *)(th + 1) + sigoff, IPSEC_DIR_OUTBOUND);
 	}
 #endif
 
 	/*
 	 * Put TCP length in extended header, and then
 	 * checksum extended header and data.
 	 */
 	m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */
 	m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 #ifdef INET6
 	if (isipv6) {
 		/*
 		 * ip6_plen is not need to be filled now, and will be filled
 		 * in ip6_output.
 		 */
 		m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
 		th->th_sum = in6_cksum_pseudo(ip6, sizeof(struct tcphdr) +
 		    optlen + len, IPPROTO_TCP, 0);
 	}
 #endif
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET
 	{
 		m->m_pkthdr.csum_flags = CSUM_TCP;
 		th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 		    htons(sizeof(struct tcphdr) + IPPROTO_TCP + len + optlen));
 
 		/* IP version must be set here for ipv4/ipv6 checking later */
 		KASSERT(ip->ip_v == IPVERSION,
 		    ("%s: IP version incorrect: %d", __func__, ip->ip_v));
 	}
 #endif
 
 	/*
 	 * Enable TSO and specify the size of the segments.
 	 * The TCP pseudo header checksum is always provided.
 	 */
 	if (tso) {
-		KASSERT(len > tp->t_maxopd - optlen,
+		KASSERT(len > tp->t_maxseg - optlen,
 		    ("%s: len <= tso_segsz", __func__));
 		m->m_pkthdr.csum_flags |= CSUM_TSO;
-		m->m_pkthdr.tso_segsz = tp->t_maxopd - optlen;
+		m->m_pkthdr.tso_segsz = tp->t_maxseg - optlen;
 	}
 
 #ifdef IPSEC
 	KASSERT(len + hdrlen + ipoptlen - ipsec_optlen == m_length(m, NULL),
 	    ("%s: mbuf chain shorter than expected: %ld + %u + %u - %u != %u",
 	    __func__, len, hdrlen, ipoptlen, ipsec_optlen, m_length(m, NULL)));
 #else
 	KASSERT(len + hdrlen + ipoptlen == m_length(m, NULL),
 	    ("%s: mbuf chain shorter than expected: %ld + %u + %u != %u",
 	    __func__, len, hdrlen, ipoptlen, m_length(m, NULL)));
 #endif
 
 	/* Run HHOOK_TCP_ESTABLISHED_OUT helper hooks. */
 	hhook_run_tcp_est_out(tp, th, &to, len, tso);
 
 #ifdef TCPDEBUG
 	/*
 	 * Trace.
 	 */
 	if (so->so_options & SO_DEBUG) {
 		u_short save = 0;
 #ifdef INET6
 		if (!isipv6)
 #endif
 		{
 			save = ipov->ih_len;
 			ipov->ih_len = htons(m->m_pkthdr.len /* - hdrlen + (th->th_off << 2) */);
 		}
 		tcp_trace(TA_OUTPUT, tp->t_state, tp, mtod(m, void *), th, 0);
 #ifdef INET6
 		if (!isipv6)
 #endif
 		ipov->ih_len = save;
 	}
 #endif /* TCPDEBUG */
 	TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));
 
 	/*
 	 * Fill in IP length and desired time to live and
 	 * send to IP level.  There should be a better way
 	 * to handle ttl and tos; we could keep them in
 	 * the template, but need a way to checksum without them.
 	 */
 	/*
 	 * m->m_pkthdr.len should have been set before checksum calculation,
 	 * because in6_cksum() need it.
 	 */
 #ifdef INET6
 	if (isipv6) {
 		struct route_in6 ro;
 
 		bzero(&ro, sizeof(ro));
 		/*
 		 * we separately set hoplimit for every segment, since the
 		 * user might want to change the value via setsockopt.
 		 * Also, desired default hop limit might be changed via
 		 * Neighbor Discovery.
 		 */
 		ip6->ip6_hlim = in6_selecthlim(tp->t_inpcb, NULL);
 
 		/*
 		 * Set the packet size here for the benefit of DTrace probes.
 		 * ip6_output() will set it properly; it's supposed to include
 		 * the option header lengths as well.
 		 */
 		ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6));
 
-		if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss)
+		if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss)
 			tp->t_flags2 |= TF2_PLPMTU_PMTUD;
 		else
 			tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
 
 		if (tp->t_state == TCPS_SYN_SENT)
 			TCP_PROBE5(connect__request, NULL, tp, ip6, tp, th);
 
 		TCP_PROBE5(send, NULL, tp, ip6, tp, th);
 
 #ifdef TCPPCAP
 		/* Save packet, if requested. */
 		tcp_pcap_add(th, m, &(tp->t_outpkts));
 #endif
 
 		/* TODO: IPv6 IP6TOS_ECT bit on */
 		error = ip6_output(m, tp->t_inpcb->in6p_outputopts, &ro,
 		    ((so->so_options & SO_DONTROUTE) ?  IP_ROUTETOIF : 0),
 		    NULL, NULL, tp->t_inpcb);
 
 		if (error == EMSGSIZE && ro.ro_rt != NULL)
 			mtu = ro.ro_rt->rt_mtu;
 		RO_RTFREE(&ro);
 	}
 #endif /* INET6 */
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
     {
 	struct route ro;
 
 	bzero(&ro, sizeof(ro));
 	ip->ip_len = htons(m->m_pkthdr.len);
 #ifdef INET6
 	if (tp->t_inpcb->inp_vflag & INP_IPV6PROTO)
 		ip->ip_ttl = in6_selecthlim(tp->t_inpcb, NULL);
 #endif /* INET6 */
 	/*
 	 * If we do path MTU discovery, then we set DF on every packet.
 	 * This might not be the best thing to do according to RFC3390
 	 * Section 2. However the tcp hostcache migitates the problem
 	 * so it affects only the first tcp connection with a host.
 	 *
 	 * NB: Don't set DF on small MTU/MSS to have a safe fallback.
 	 */
-	if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss) {
+	if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss) {
 		ip->ip_off |= htons(IP_DF);
 		tp->t_flags2 |= TF2_PLPMTU_PMTUD;
 	} else {
 		tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
 	}
 
 	if (tp->t_state == TCPS_SYN_SENT)
 		TCP_PROBE5(connect__request, NULL, tp, ip, tp, th);
 
 	TCP_PROBE5(send, NULL, tp, ip, tp, th);
 
 #ifdef TCPPCAP
 	/* Save packet, if requested. */
 	tcp_pcap_add(th, m, &(tp->t_outpkts));
 #endif
 
 	error = ip_output(m, tp->t_inpcb->inp_options, &ro,
 	    ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), 0,
 	    tp->t_inpcb);
 
 	if (error == EMSGSIZE && ro.ro_rt != NULL)
 		mtu = ro.ro_rt->rt_mtu;
 	RO_RTFREE(&ro);
     }
 #endif /* INET */
 
 out:
 	/*
 	 * In transmit state, time the transmission and arrange for
 	 * the retransmit.  In persist state, just set snd_max.
 	 */
 	if ((tp->t_flags & TF_FORCEDATA) == 0 || 
 	    !tcp_timer_active(tp, TT_PERSIST)) {
 		tcp_seq startseq = tp->snd_nxt;
 
 		/*
 		 * Advance snd_nxt over sequence space of this segment.
 		 */
 		if (flags & (TH_SYN|TH_FIN)) {
 			if (flags & TH_SYN)
 				tp->snd_nxt++;
 			if (flags & TH_FIN) {
 				tp->snd_nxt++;
 				tp->t_flags |= TF_SENTFIN;
 			}
 		}
 		if (sack_rxmit)
 			goto timer;
 		tp->snd_nxt += len;
 		if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
 			tp->snd_max = tp->snd_nxt;
 			/*
 			 * Time this transmission if not a retransmission and
 			 * not currently timing anything.
 			 */
 			if (tp->t_rtttime == 0) {
 				tp->t_rtttime = ticks;
 				tp->t_rtseq = startseq;
 				TCPSTAT_INC(tcps_segstimed);
 			}
 		}
 
 		/*
 		 * Set retransmit timer if not currently set,
 		 * and not doing a pure ack or a keep-alive probe.
 		 * Initial value for retransmit timer is smoothed
 		 * round-trip time + 2 * round-trip time variance.
 		 * Initialize shift counter which is used for backoff
 		 * of retransmit time.
 		 */
 timer:
 		if (!tcp_timer_active(tp, TT_REXMT) &&
 		    ((sack_rxmit && tp->snd_nxt != tp->snd_max) ||
 		     (tp->snd_nxt != tp->snd_una))) {
 			if (tcp_timer_active(tp, TT_PERSIST)) {
 				tcp_timer_activate(tp, TT_PERSIST, 0);
 				tp->t_rxtshift = 0;
 			}
 			tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
 		} else if (len == 0 && sbavail(&so->so_snd) &&
 		    !tcp_timer_active(tp, TT_REXMT) &&
 		    !tcp_timer_active(tp, TT_PERSIST)) {
 			/*
 			 * Avoid a situation where we do not set persist timer
 			 * after a zero window condition. For example:
 			 * 1) A -> B: packet with enough data to fill the window
 			 * 2) B -> A: ACK for #1 + new data (0 window
 			 *    advertisement)
 			 * 3) A -> B: ACK for #2, 0 len packet
 			 *
 			 * In this case, A will not activate the persist timer,
 			 * because it chose to send a packet. Unless tcp_output
 			 * is called for some other reason (delayed ack timer,
 			 * another input packet from B, socket syscall), A will
 			 * not send zero window probes.
 			 *
 			 * So, if you send a 0-length packet, but there is data
 			 * in the socket buffer, and neither the rexmt or
 			 * persist timer is already set, then activate the
 			 * persist timer.
 			 */
 			tp->t_rxtshift = 0;
 			tcp_setpersist(tp);
 		}
 	} else {
 		/*
 		 * Persist case, update snd_max but since we are in
 		 * persist mode (no window) we do not update snd_nxt.
 		 */
 		int xlen = len;
 		if (flags & TH_SYN)
 			++xlen;
 		if (flags & TH_FIN) {
 			++xlen;
 			tp->t_flags |= TF_SENTFIN;
 		}
 		if (SEQ_GT(tp->snd_nxt + xlen, tp->snd_max))
 			tp->snd_max = tp->snd_nxt + len;
 	}
 
 	if (error) {
 
 		/*
 		 * We know that the packet was lost, so back out the
 		 * sequence number advance, if any.
 		 *
 		 * If the error is EPERM the packet got blocked by the
 		 * local firewall.  Normally we should terminate the
 		 * connection but the blocking may have been spurious
 		 * due to a firewall reconfiguration cycle.  So we treat
 		 * it like a packet loss and let the retransmit timer and
 		 * timeouts do their work over time.
 		 * XXX: It is a POLA question whether calling tcp_drop right
 		 * away would be the really correct behavior instead.
 		 */
 		if (((tp->t_flags & TF_FORCEDATA) == 0 ||
 		    !tcp_timer_active(tp, TT_PERSIST)) &&
 		    ((flags & TH_SYN) == 0) &&
 		    (error != EPERM)) {
 			if (sack_rxmit) {
 				p->rxmit -= len;
 				tp->sackhint.sack_bytes_rexmit -= len;
 				KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
 				    ("sackhint bytes rtx >= 0"));
 			} else
 				tp->snd_nxt -= len;
 		}
 		SOCKBUF_UNLOCK_ASSERT(&so->so_snd);	/* Check gotos. */
 		switch (error) {
 		case EPERM:
 			tp->t_softerror = error;
 			return (error);
 		case ENOBUFS:
 	                if (!tcp_timer_active(tp, TT_REXMT) &&
 			    !tcp_timer_active(tp, TT_PERSIST))
 	                        tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
 			tp->snd_cwnd = tp->t_maxseg;
 			return (0);
 		case EMSGSIZE:
 			/*
 			 * For some reason the interface we used initially
 			 * to send segments changed to another or lowered
 			 * its MTU.
 			 * If TSO was active we either got an interface
 			 * without TSO capabilits or TSO was turned off.
 			 * If we obtained mtu from ip_output() then update
 			 * it and try again.
 			 */
 			if (tso)
 				tp->t_flags &= ~TF_TSO;
 			if (mtu != 0) {
 				tcp_mss_update(tp, -1, mtu, NULL, NULL);
 				goto again;
 			}
 			return (error);
 		case EHOSTDOWN:
 		case EHOSTUNREACH:
 		case ENETDOWN:
 		case ENETUNREACH:
 			if (TCPS_HAVERCVDSYN(tp->t_state)) {
 				tp->t_softerror = error;
 				return (0);
 			}
 			/* FALLTHROUGH */
 		default:
 			return (error);
 		}
 	}
 	TCPSTAT_INC(tcps_sndtotal);
 
 	/*
 	 * Data sent (as far as we can tell).
 	 * If this advertises a larger window than any other segment,
 	 * then remember the size of the advertised window.
 	 * Any pending ACK has now been sent.
 	 */
 	if (recwin >= 0 && SEQ_GT(tp->rcv_nxt + recwin, tp->rcv_adv))
 		tp->rcv_adv = tp->rcv_nxt + recwin;
 	tp->last_ack_sent = tp->rcv_nxt;
 	tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
 	if (tcp_timer_active(tp, TT_DELACK))
 		tcp_timer_activate(tp, TT_DELACK, 0);
 #if 0
 	/*
 	 * This completely breaks TCP if newreno is turned on.  What happens
 	 * is that if delayed-acks are turned on on the receiver, this code
 	 * on the transmitter effectively destroys the TCP window, forcing
 	 * it to four packets (1.5Kx4 = 6K window).
 	 */
 	if (sendalot && --maxburst)
 		goto again;
 #endif
 	if (sendalot)
 		goto again;
 	return (0);
 }
 
 void
 tcp_setpersist(struct tcpcb *tp)
 {
 	int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
 	int tt;
 
 	tp->t_flags &= ~TF_PREVVALID;
 	if (tcp_timer_active(tp, TT_REXMT))
 		panic("tcp_setpersist: retransmit pending");
 	/*
 	 * Start/restart persistance timer.
 	 */
 	TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift],
 		      TCPTV_PERSMIN, TCPTV_PERSMAX);
 	tcp_timer_activate(tp, TT_PERSIST, tt);
 	if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
 		tp->t_rxtshift++;
 }
 
 /*
  * Insert TCP options according to the supplied parameters to the place
  * optp in a consistent way.  Can handle unaligned destinations.
  *
  * The order of the option processing is crucial for optimal packing and
  * alignment for the scarce option space.
  *
  * The optimal order for a SYN/SYN-ACK segment is:
  *   MSS (4) + NOP (1) + Window scale (3) + SACK permitted (2) +
  *   Timestamp (10) + Signature (18) = 38 bytes out of a maximum of 40.
  *
  * The SACK options should be last.  SACK blocks consume 8*n+2 bytes.
  * So a full size SACK blocks option is 34 bytes (with 4 SACK blocks).
  * At minimum we need 10 bytes (to generate 1 SACK block).  If both
  * TCP Timestamps (12 bytes) and TCP Signatures (18 bytes) are present,
  * we only have 10 bytes for SACK options (40 - (12 + 18)).
  */
 int
 tcp_addoptions(struct tcpopt *to, u_char *optp)
 {
 	u_int mask, optlen = 0;
 
 	for (mask = 1; mask < TOF_MAXOPT; mask <<= 1) {
 		if ((to->to_flags & mask) != mask)
 			continue;
 		if (optlen == TCP_MAXOLEN)
 			break;
 		switch (to->to_flags & mask) {
 		case TOF_MSS:
 			while (optlen % 4) {
 				optlen += TCPOLEN_NOP;
 				*optp++ = TCPOPT_NOP;
 			}
 			if (TCP_MAXOLEN - optlen < TCPOLEN_MAXSEG)
 				continue;
 			optlen += TCPOLEN_MAXSEG;
 			*optp++ = TCPOPT_MAXSEG;
 			*optp++ = TCPOLEN_MAXSEG;
 			to->to_mss = htons(to->to_mss);
 			bcopy((u_char *)&to->to_mss, optp, sizeof(to->to_mss));
 			optp += sizeof(to->to_mss);
 			break;
 		case TOF_SCALE:
 			while (!optlen || optlen % 2 != 1) {
 				optlen += TCPOLEN_NOP;
 				*optp++ = TCPOPT_NOP;
 			}
 			if (TCP_MAXOLEN - optlen < TCPOLEN_WINDOW)
 				continue;
 			optlen += TCPOLEN_WINDOW;
 			*optp++ = TCPOPT_WINDOW;
 			*optp++ = TCPOLEN_WINDOW;
 			*optp++ = to->to_wscale;
 			break;
 		case TOF_SACKPERM:
 			while (optlen % 2) {
 				optlen += TCPOLEN_NOP;
 				*optp++ = TCPOPT_NOP;
 			}
 			if (TCP_MAXOLEN - optlen < TCPOLEN_SACK_PERMITTED)
 				continue;
 			optlen += TCPOLEN_SACK_PERMITTED;
 			*optp++ = TCPOPT_SACK_PERMITTED;
 			*optp++ = TCPOLEN_SACK_PERMITTED;
 			break;
 		case TOF_TS:
 			while (!optlen || optlen % 4 != 2) {
 				optlen += TCPOLEN_NOP;
 				*optp++ = TCPOPT_NOP;
 			}
 			if (TCP_MAXOLEN - optlen < TCPOLEN_TIMESTAMP)
 				continue;
 			optlen += TCPOLEN_TIMESTAMP;
 			*optp++ = TCPOPT_TIMESTAMP;
 			*optp++ = TCPOLEN_TIMESTAMP;
 			to->to_tsval = htonl(to->to_tsval);
 			to->to_tsecr = htonl(to->to_tsecr);
 			bcopy((u_char *)&to->to_tsval, optp, sizeof(to->to_tsval));
 			optp += sizeof(to->to_tsval);
 			bcopy((u_char *)&to->to_tsecr, optp, sizeof(to->to_tsecr));
 			optp += sizeof(to->to_tsecr);
 			break;
 		case TOF_SIGNATURE:
 			{
 			int siglen = TCPOLEN_SIGNATURE - 2;
 
 			while (!optlen || optlen % 4 != 2) {
 				optlen += TCPOLEN_NOP;
 				*optp++ = TCPOPT_NOP;
 			}
 			if (TCP_MAXOLEN - optlen < TCPOLEN_SIGNATURE)
 				continue;
 			optlen += TCPOLEN_SIGNATURE;
 			*optp++ = TCPOPT_SIGNATURE;
 			*optp++ = TCPOLEN_SIGNATURE;
 			to->to_signature = optp;
 			while (siglen--)
 				 *optp++ = 0;
 			break;
 			}
 		case TOF_SACK:
 			{
 			int sackblks = 0;
 			struct sackblk *sack = (struct sackblk *)to->to_sacks;
 			tcp_seq sack_seq;
 
 			while (!optlen || optlen % 4 != 2) {
 				optlen += TCPOLEN_NOP;
 				*optp++ = TCPOPT_NOP;
 			}
 			if (TCP_MAXOLEN - optlen < TCPOLEN_SACKHDR + TCPOLEN_SACK)
 				continue;
 			optlen += TCPOLEN_SACKHDR;
 			*optp++ = TCPOPT_SACK;
 			sackblks = min(to->to_nsacks,
 					(TCP_MAXOLEN - optlen) / TCPOLEN_SACK);
 			*optp++ = TCPOLEN_SACKHDR + sackblks * TCPOLEN_SACK;
 			while (sackblks--) {
 				sack_seq = htonl(sack->start);
 				bcopy((u_char *)&sack_seq, optp, sizeof(sack_seq));
 				optp += sizeof(sack_seq);
 				sack_seq = htonl(sack->end);
 				bcopy((u_char *)&sack_seq, optp, sizeof(sack_seq));
 				optp += sizeof(sack_seq);
 				optlen += TCPOLEN_SACK;
 				sack++;
 			}
 			TCPSTAT_INC(tcps_sack_send_blocks);
 			break;
 			}
 #ifdef TCP_RFC7413
 		case TOF_FASTOPEN:
 			{
 			int total_len;
 
 			/* XXX is there any point to aligning this option? */
 			total_len = TCPOLEN_FAST_OPEN_EMPTY + to->to_tfo_len;
 			if (TCP_MAXOLEN - optlen < total_len)
 				continue;
 			*optp++ = TCPOPT_FAST_OPEN;
 			*optp++ = total_len;
 			if (to->to_tfo_len > 0) {
 				bcopy(to->to_tfo_cookie, optp, to->to_tfo_len);
 				optp += to->to_tfo_len;
 			}
 			optlen += total_len;
 			break;
 			}
 #endif
 		default:
 			panic("%s: unknown TCP option type", __func__);
 			break;
 		}
 	}
 
 	/* Terminate and pad TCP options to a 4 byte boundary. */
 	if (optlen % 4) {
 		optlen += TCPOLEN_EOL;
 		*optp++ = TCPOPT_EOL;
 	}
 	/*
 	 * According to RFC 793 (STD0007):
 	 *   "The content of the header beyond the End-of-Option option
 	 *    must be header padding (i.e., zero)."
 	 *   and later: "The padding is composed of zeros."
 	 */
 	while (optlen % 4) {
 		optlen += TCPOLEN_PAD;
 		*optp++ = TCPOPT_PAD;
 	}
 
 	KASSERT(optlen <= TCP_MAXOLEN, ("%s: TCP options too long", __func__));
 	return (optlen);
 }
Index: projects/release-pkg/sys/netinet/tcp_stacks/fastpath.c
===================================================================
--- projects/release-pkg/sys/netinet/tcp_stacks/fastpath.c	(revision 293335)
+++ projects/release-pkg/sys/netinet/tcp_stacks/fastpath.c	(revision 293336)
@@ -1,2461 +1,2459 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
  *	The Regents of the University of California.  All rights reserved.
  * Copyright (c) 2007-2008,2010
  *	Swinburne University of Technology, Melbourne, Australia.
  * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
  * Copyright (c) 2010 The FreeBSD Foundation
  * Copyright (c) 2010-2011 Juniper Networks, Inc.
  * Copyright (c) 2015 Netflix Inc.
  * All rights reserved.
  *
  * Portions of this software were developed at the Centre for Advanced Internet
  * Architectures, Swinburne University of Technology, by Lawrence Stewart,
  * James Healy and David Hayes, made possible in part by a grant from the Cisco
  * University Research Program Fund at Community Foundation Silicon Valley.
  *
  * Portions of this software were developed at the Centre for Advanced
  * Internet Architectures, Swinburne University of Technology, Melbourne,
  * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
  *
  * Portions of this software were developed by Robert N. M. Watson under
  * contract to Juniper Networks, Inc.
  *
  * Portions of this software were developed by Randall R. Stewart while
  * working for Netflix Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)tcp_input.c	8.12 (Berkeley) 5/24/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ipfw.h"		/* for ipfw_fwd	*/
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_kdtrace.h"
 #include "opt_tcpdebug.h"
 
 #include <sys/param.h>
 #include <sys/module.h>
 #include <sys/kernel.h>
 #include <sys/hhook.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>		/* for proc0 declaration */
 #include <sys/protosw.h>
 #include <sys/sdt.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 
 #include <machine/cpu.h>	/* before tcp_seq.h, for tcp_random18() */
 
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #define TCPSTATES		/* for logging */
 
 #include <netinet/cc.h>
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>	/* required for icmp_var.h */
 #include <netinet/icmp_var.h>	/* for ICMP_BANDLIM */
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet6/tcp6_var.h>
 #include <netinet/tcpip.h>
 #include <netinet/tcp_syncache.h>
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif /* TCPDEBUG */
 #ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
 #endif
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #endif /*IPSEC*/
 
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
 const int tcprexmtthresh;
 
 VNET_DECLARE(int, tcp_autorcvbuf_inc);
 #define	V_tcp_autorcvbuf_inc	VNET(tcp_autorcvbuf_inc)
 VNET_DECLARE(int, tcp_autorcvbuf_max);
 #define	V_tcp_autorcvbuf_max	VNET(tcp_autorcvbuf_max)
 VNET_DECLARE(int, tcp_do_rfc3042);
 #define	V_tcp_do_rfc3042	VNET(tcp_do_rfc3042)
 VNET_DECLARE(int, tcp_do_autorcvbuf);
 #define	V_tcp_do_autorcvbuf	VNET(tcp_do_autorcvbuf)
 VNET_DECLARE(int, tcp_insecure_rst);
 #define	V_tcp_insecure_rst	VNET(tcp_insecure_rst)
 VNET_DECLARE(int, tcp_insecure_syn);
 #define	V_tcp_insecure_syn	VNET(tcp_insecure_syn)
 
 static void	 tcp_do_segment_fastslow(struct mbuf *, struct tcphdr *,
 			struct socket *, struct tcpcb *, int, int, uint8_t,
 			int);
 
 static void	 tcp_do_segment_fastack(struct mbuf *, struct tcphdr *,
 			struct socket *, struct tcpcb *, int, int, uint8_t,
 			int);
 
 /*
  * Indicate whether this ack should be delayed.  We can delay the ack if
  * following conditions are met:
  *	- There is no delayed ack timer in progress.
  *	- Our last ack wasn't a 0-sized window. We never want to delay
  *	  the ack that opens up a 0-sized window.
  *	- LRO wasn't used for this segment. We make sure by checking that the
  *	  segment size is not larger than the MSS.
- *	- Delayed acks are enabled or this is a half-synchronized T/TCP
- *	  connection.
  */
 #define DELAY_ACK(tp, tlen)						\
 	((!tcp_timer_active(tp, TT_DELACK) &&				\
 	    (tp->t_flags & TF_RXWIN0SENT) == 0) &&			\
-	    (tlen <= tp->t_maxopd) &&					\
+	    (tlen <= tp->t_maxseg) &&					\
 	    (V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
 
 /*
  * So how is this faster than the normal fast ack?
  * It basically allows us to also stay in the fastpath
  * when a window-update ack also arrives. In testing
  * we saw only 25-30% of connections doing fastpath 
  * due to the fact that along with moving forward
  * in sequence the window was also updated.
  */
 static void
 tcp_do_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	       struct tcpcb *tp, struct tcpopt *to, int drop_hdrlen, int tlen, 
 	       int ti_locked, u_long tiwin)
 {
 	int acked;
 	int winup_only=0;
 #ifdef TCPDEBUG
 	/*
 	 * The size of tcp_saveipgen must be the size of the max ip header,
 	 * now IPv6.
 	 */
 	u_char tcp_saveipgen[IP6_HDR_LEN];
 	struct tcphdr tcp_savetcp;
 	short ostate = 0;
 #endif
         /*
 	 * The following if statment will be true if
 	 * we are doing the win_up_in_fp <and>
 	 * - We have more new data (SEQ_LT(tp->snd_wl1, th->th_seq)) <or>
 	 * - No more new data, but we have an ack for new data
 	 *   (tp->snd_wl1 == th->th_seq && SEQ_LT(tp->snd_wl2, th->th_ack))
 	 * - No more new data, the same ack point but the window grew
 	 *   (tp->snd_wl1 == th->th_seq && tp->snd_wl2 == th->th_ack && twin > tp->snd_wnd)
 	 */
 	if ((SEQ_LT(tp->snd_wl1, th->th_seq) ||
 	     (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) ||
 					    (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) {
 		/* keep track of pure window updates */
 		if (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd) {
 			winup_only = 1;
 			TCPSTAT_INC(tcps_rcvwinupd);
 		}
 		tp->snd_wnd = tiwin;
 		tp->snd_wl1 = th->th_seq;
 		tp->snd_wl2 = th->th_ack;
 		if (tp->snd_wnd > tp->max_sndwnd)
 			tp->max_sndwnd = tp->snd_wnd;
 	}
 	/*
 	 * If last ACK falls within this segment's sequence numbers,
 	 * record the timestamp.
 	 * NOTE that the test is modified according to the latest
 	 * proposal of the tcplw@cray.com list (Braden 1993/04/26).
 	 */
 	if ((to->to_flags & TOF_TS) != 0 &&
 	    SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
 		tp->ts_recent_age = tcp_ts_getticks();
 		tp->ts_recent = to->to_tsval;
 	}
 	/*
 	 * This is a pure ack for outstanding data.
 	 */
 	if (ti_locked == TI_RLOCKED) {
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 	}
 	ti_locked = TI_UNLOCKED;
 
 	TCPSTAT_INC(tcps_predack);
 
 	/*
 	 * "bad retransmit" recovery.
 	 */
 	if (tp->t_rxtshift == 1 &&
 	    tp->t_flags & TF_PREVVALID &&
 	    (int)(ticks - tp->t_badrxtwin) < 0) {
 		cc_cong_signal(tp, th, CC_RTO_ERR);
 	}
 
 	/*
 	 * Recalculate the transmit timer / rtt.
 	 *
 	 * Some boxes send broken timestamp replies
 	 * during the SYN+ACK phase, ignore
 	 * timestamps of 0 or we could calculate a
 	 * huge RTT and blow up the retransmit timer.
 	 */
 	if ((to->to_flags & TOF_TS) != 0 &&
 	    to->to_tsecr) {
 		u_int t;
 
 		t = tcp_ts_getticks() - to->to_tsecr;
 		if (!tp->t_rttlow || tp->t_rttlow > t)
 			tp->t_rttlow = t;
 		tcp_xmit_timer(tp,
 			       TCP_TS_TO_TICKS(t) + 1);
 	} else if (tp->t_rtttime &&
 		   SEQ_GT(th->th_ack, tp->t_rtseq)) {
 		if (!tp->t_rttlow ||
 		    tp->t_rttlow > ticks - tp->t_rtttime)
 			tp->t_rttlow = ticks - tp->t_rtttime;
 		tcp_xmit_timer(tp,
 			       ticks - tp->t_rtttime);
 	}
 	if (winup_only == 0) {
 		acked = BYTES_THIS_ACK(tp, th);
 
 		/* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */
 		hhook_run_tcp_est_in(tp, th, to);
 
 		TCPSTAT_ADD(tcps_rcvackbyte, acked);
 		sbdrop(&so->so_snd, acked);
 		if (SEQ_GT(tp->snd_una, tp->snd_recover) &&
 		    SEQ_LEQ(th->th_ack, tp->snd_recover))
 			tp->snd_recover = th->th_ack - 1;
 				
 		/*
 		 * Let the congestion control algorithm update
 		 * congestion control related information. This
 		 * typically means increasing the congestion
 		 * window.
 		 */
 		cc_ack_received(tp, th, CC_ACK);
 
 		tp->snd_una = th->th_ack;
 		/*
 		 * Pull snd_wl2 up to prevent seq wrap relative
 		 * to th_ack.
 		 */
 		tp->snd_wl2 = th->th_ack;
 		tp->t_dupacks = 0;
 		m_freem(m);
 
 		/*
 		 * If all outstanding data are acked, stop
 		 * retransmit timer, otherwise restart timer
 		 * using current (possibly backed-off) value.
 		 * If process is waiting for space,
 		 * wakeup/selwakeup/signal.  If data
 		 * are ready to send, let tcp_output
 		 * decide between more output or persist.
 		 */
 #ifdef TCPDEBUG
 		if (so->so_options & SO_DEBUG)
 			tcp_trace(TA_INPUT, ostate, tp,
 				  (void *)tcp_saveipgen,
 				  &tcp_savetcp, 0);
 #endif
 		if (tp->snd_una == tp->snd_max)
 			tcp_timer_activate(tp, TT_REXMT, 0);
 		else if (!tcp_timer_active(tp, TT_PERSIST))
 			tcp_timer_activate(tp, TT_REXMT,
 					   tp->t_rxtcur);
 	} else {
 		/* 
 		 * Window update only, just free the mbufs and
 		 * send out whatever we can.
 		 */
 		m_freem(m);
 	}
 	sowwakeup(so);
 	if (sbavail(&so->so_snd))
 		(void) tcp_output(tp);
 	KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d",
 					    __func__, ti_locked));
 	INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	if (tp->t_flags & TF_DELACK) {
 		tp->t_flags &= ~TF_DELACK;
 		tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
 	}
 	INP_WUNLOCK(tp->t_inpcb);
 }
 
 /*
  * Here nothing is really faster, its just that we
  * have broken out the fast-data path also just like
  * the fast-ack. 
  */
 static void
 tcp_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		   struct tcpcb *tp, struct tcpopt *to, int drop_hdrlen, int tlen, 
 		   int ti_locked, u_long tiwin)
 {
 	int newsize = 0;	/* automatic sockbuf scaling */
 #ifdef TCPDEBUG
 	/*
 	 * The size of tcp_saveipgen must be the size of the max ip header,
 	 * now IPv6.
 	 */
 	u_char tcp_saveipgen[IP6_HDR_LEN];
 	struct tcphdr tcp_savetcp;
 	short ostate = 0;
 #endif
 	/*
 	 * If last ACK falls within this segment's sequence numbers,
 	 * record the timestamp.
 	 * NOTE that the test is modified according to the latest
 	 * proposal of the tcplw@cray.com list (Braden 1993/04/26).
 	 */
 	if ((to->to_flags & TOF_TS) != 0 &&
 	    SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
 		tp->ts_recent_age = tcp_ts_getticks();
 		tp->ts_recent = to->to_tsval;
 	}
 
 	/*
 	 * This is a pure, in-sequence data packet with
 	 * nothing on the reassembly queue and we have enough
 	 * buffer space to take it.
 	 */
 	if (ti_locked == TI_RLOCKED) {
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 	}
 	ti_locked = TI_UNLOCKED;
 
 	/* Clean receiver SACK report if present */
 	if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks)
 		tcp_clean_sackreport(tp);
 	TCPSTAT_INC(tcps_preddat);
 	tp->rcv_nxt += tlen;
 	/*
 	 * Pull snd_wl1 up to prevent seq wrap relative to
 	 * th_seq.
 	 */
 	tp->snd_wl1 = th->th_seq;
 	/*
 	 * Pull rcv_up up to prevent seq wrap relative to
 	 * rcv_nxt.
 	 */
 	tp->rcv_up = tp->rcv_nxt;
 	TCPSTAT_ADD(tcps_rcvbyte, tlen);
 #ifdef TCPDEBUG
 	if (so->so_options & SO_DEBUG)
 		tcp_trace(TA_INPUT, ostate, tp,
 			  (void *)tcp_saveipgen, &tcp_savetcp, 0);
 #endif
 	/*
 	 * Automatic sizing of receive socket buffer.  Often the send
 	 * buffer size is not optimally adjusted to the actual network
 	 * conditions at hand (delay bandwidth product).  Setting the
 	 * buffer size too small limits throughput on links with high
 	 * bandwidth and high delay (eg. trans-continental/oceanic links).
 	 *
 	 * On the receive side the socket buffer memory is only rarely
 	 * used to any significant extent.  This allows us to be much
 	 * more aggressive in scaling the receive socket buffer.  For
 	 * the case that the buffer space is actually used to a large
 	 * extent and we run out of kernel memory we can simply drop
 	 * the new segments; TCP on the sender will just retransmit it
 	 * later.  Setting the buffer size too big may only consume too
 	 * much kernel memory if the application doesn't read() from
 	 * the socket or packet loss or reordering makes use of the
 	 * reassembly queue.
 	 *
 	 * The criteria to step up the receive buffer one notch are:
 	 *  1. Application has not set receive buffer size with
 	 *     SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE.
 	 *  2. the number of bytes received during the time it takes
 	 *     one timestamp to be reflected back to us (the RTT);
 	 *  3. received bytes per RTT is within seven eighth of the
 	 *     current socket buffer size;
 	 *  4. receive buffer size has not hit maximal automatic size;
 	 *
 	 * This algorithm does one step per RTT at most and only if
 	 * we receive a bulk stream w/o packet losses or reorderings.
 	 * Shrinking the buffer during idle times is not necessary as
 	 * it doesn't consume any memory when idle.
 	 *
 	 * TODO: Only step up if the application is actually serving
 	 * the buffer to better manage the socket buffer resources.
 	 */
 	if (V_tcp_do_autorcvbuf &&
 	    (to->to_flags & TOF_TS) &&
 	    to->to_tsecr &&
 	    (so->so_rcv.sb_flags & SB_AUTOSIZE)) {
 		if (TSTMP_GT(to->to_tsecr, tp->rfbuf_ts) &&
 		    to->to_tsecr - tp->rfbuf_ts < hz) {
 			if (tp->rfbuf_cnt >
 			    (so->so_rcv.sb_hiwat / 8 * 7) &&
 			    so->so_rcv.sb_hiwat <
 			    V_tcp_autorcvbuf_max) {
 				newsize =
 					min(so->so_rcv.sb_hiwat +
 					    V_tcp_autorcvbuf_inc,
 					    V_tcp_autorcvbuf_max);
 			}
 			/* Start over with next RTT. */
 			tp->rfbuf_ts = 0;
 			tp->rfbuf_cnt = 0;
 		} else
 			tp->rfbuf_cnt += tlen;	/* add up */
 	}
 
 	/* Add data to socket buffer. */
 	SOCKBUF_LOCK(&so->so_rcv);
 	if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 		m_freem(m);
 	} else {
 		/*
 		 * Set new socket buffer size.
 		 * Give up when limit is reached.
 		 */
 		if (newsize)
 			if (!sbreserve_locked(&so->so_rcv,
 					      newsize, so, NULL))
 				so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
 		m_adj(m, drop_hdrlen);	/* delayed header drop */
 		sbappendstream_locked(&so->so_rcv, m, 0);
 	}
 	/* NB: sorwakeup_locked() does an implicit unlock. */
 	sorwakeup_locked(so);
 	if (DELAY_ACK(tp, tlen)) {
 		tp->t_flags |= TF_DELACK;
 	} else {
 		tp->t_flags |= TF_ACKNOW;
 		tcp_output(tp);
 	}
 	KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d",
 					    __func__, ti_locked));
 	INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	if (tp->t_flags & TF_DELACK) {
 		tp->t_flags &= ~TF_DELACK;
 		tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
 	}
 	INP_WUNLOCK(tp->t_inpcb);
 }
 
 /*
  * The slow-path is the clone of the long long part
  * of tcp_do_segment past all the fast-path stuff. We
  * use it here by two different callers, the fast/slow and
  * the fastack only.
  */
 static void
 tcp_do_slowpath(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		struct tcpcb *tp, struct tcpopt *to, int drop_hdrlen, int tlen, 
 		int ti_locked, u_long tiwin, int thflags)
 {
 	int  acked, ourfinisacked, needoutput = 0;
 	int rstreason, todrop, win;
 	char *s;
 	struct in_conninfo *inc;
 	struct mbuf *mfree = NULL;
 #ifdef TCPDEBUG
 	/*
 	 * The size of tcp_saveipgen must be the size of the max ip header,
 	 * now IPv6.
 	 */
 	u_char tcp_saveipgen[IP6_HDR_LEN];
 	struct tcphdr tcp_savetcp;
 	short ostate = 0;
 #endif
 	/*
 	 * Calculate amount of space in receive window,
 	 * and then do TCP input processing.
 	 * Receive window is amount of space in rcv queue,
 	 * but not less than advertised window.
 	 */
 	inc = &tp->t_inpcb->inp_inc;
 	win = sbspace(&so->so_rcv);
 	if (win < 0)
 		win = 0;
 	tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
 
 	/* Reset receive buffer auto scaling when not in bulk receive mode. */
 	tp->rfbuf_ts = 0;
 	tp->rfbuf_cnt = 0;
 
 	switch (tp->t_state) {
 
 	/*
 	 * If the state is SYN_RECEIVED:
 	 *	if seg contains an ACK, but not for our SYN/ACK, send a RST.
 	 */
 	case TCPS_SYN_RECEIVED:
 		if ((thflags & TH_ACK) &&
 		    (SEQ_LEQ(th->th_ack, tp->snd_una) ||
 		     SEQ_GT(th->th_ack, tp->snd_max))) {
 				rstreason = BANDLIM_RST_OPENPORT;
 				goto dropwithreset;
 		}
 		break;
 
 	/*
 	 * If the state is SYN_SENT:
 	 *	if seg contains an ACK, but not for our SYN, drop the input.
 	 *	if seg contains a RST, then drop the connection.
 	 *	if seg does not contain SYN, then drop it.
 	 * Otherwise this is an acceptable SYN segment
 	 *	initialize tp->rcv_nxt and tp->irs
 	 *	if seg contains ack then advance tp->snd_una
 	 *	if seg contains an ECE and ECN support is enabled, the stream
 	 *	    is ECN capable.
 	 *	if SYN has been acked change to ESTABLISHED else SYN_RCVD state
 	 *	arrange for segment to be acked (eventually)
 	 *	continue processing rest of data/controls, beginning with URG
 	 */
 	case TCPS_SYN_SENT:
 		if ((thflags & TH_ACK) &&
 		    (SEQ_LEQ(th->th_ack, tp->iss) ||
 		     SEQ_GT(th->th_ack, tp->snd_max))) {
 			rstreason = BANDLIM_UNLIMITED;
 			goto dropwithreset;
 		}
 		if ((thflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) {
 			TCP_PROBE5(connect__refused, NULL, tp,
 			    mtod(m, const char *), tp, th);
 			tp = tcp_drop(tp, ECONNREFUSED);
 		}
 		if (thflags & TH_RST)
 			goto drop;
 		if (!(thflags & TH_SYN))
 			goto drop;
 
 		tp->irs = th->th_seq;
 		tcp_rcvseqinit(tp);
 		if (thflags & TH_ACK) {
 			TCPSTAT_INC(tcps_connects);
 			soisconnected(so);
 #ifdef MAC
 			mac_socketpeer_set_from_mbuf(m, so);
 #endif
 			/* Do window scaling on this connection? */
 			if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
 				(TF_RCVD_SCALE|TF_REQ_SCALE)) {
 				tp->rcv_scale = tp->request_r_scale;
 			}
 			tp->rcv_adv += imin(tp->rcv_wnd,
 			    TCP_MAXWIN << tp->rcv_scale);
 			tp->snd_una++;		/* SYN is acked */
 			/*
 			 * If there's data, delay ACK; if there's also a FIN
 			 * ACKNOW will be turned on later.
 			 */
 			if (DELAY_ACK(tp, tlen) && tlen != 0)
 				tcp_timer_activate(tp, TT_DELACK,
 				    tcp_delacktime);
 			else
 				tp->t_flags |= TF_ACKNOW;
 
 			if ((thflags & TH_ECE) && V_tcp_do_ecn) {
 				tp->t_flags |= TF_ECN_PERMIT;
 				TCPSTAT_INC(tcps_ecn_shs);
 			}
 			
 			/*
 			 * Received <SYN,ACK> in SYN_SENT[*] state.
 			 * Transitions:
 			 *	SYN_SENT  --> ESTABLISHED
 			 *	SYN_SENT* --> FIN_WAIT_1
 			 */
 			tp->t_starttime = ticks;
 			if (tp->t_flags & TF_NEEDFIN) {
 				tcp_state_change(tp, TCPS_FIN_WAIT_1);
 				tp->t_flags &= ~TF_NEEDFIN;
 				thflags &= ~TH_SYN;
 			} else {
 				tcp_state_change(tp, TCPS_ESTABLISHED);
 				TCP_PROBE5(connect__established, NULL, tp,
 				    mtod(m, const char *), tp, th);
 				cc_conn_init(tp);
 				tcp_timer_activate(tp, TT_KEEP,
 				    TP_KEEPIDLE(tp));
 			}
 		} else {
 			/*
 			 * Received initial SYN in SYN-SENT[*] state =>
 			 * simultaneous open.
 			 * If it succeeds, connection is * half-synchronized.
 			 * Otherwise, do 3-way handshake:
 			 *        SYN-SENT -> SYN-RECEIVED
 			 *        SYN-SENT* -> SYN-RECEIVED*
 			 */
 			tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
 			tcp_timer_activate(tp, TT_REXMT, 0);
 			tcp_state_change(tp, TCPS_SYN_RECEIVED);
 		}
 
 		KASSERT(ti_locked == TI_RLOCKED, ("%s: trimthenstep6: "
 		    "ti_locked %d", __func__, ti_locked));
 		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 		INP_WLOCK_ASSERT(tp->t_inpcb);
 
 		/*
 		 * Advance th->th_seq to correspond to first data byte.
 		 * If data, trim to stay within window,
 		 * dropping FIN if necessary.
 		 */
 		th->th_seq++;
 		if (tlen > tp->rcv_wnd) {
 			todrop = tlen - tp->rcv_wnd;
 			m_adj(m, -todrop);
 			tlen = tp->rcv_wnd;
 			thflags &= ~TH_FIN;
 			TCPSTAT_INC(tcps_rcvpackafterwin);
 			TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop);
 		}
 		tp->snd_wl1 = th->th_seq - 1;
 		tp->rcv_up = th->th_seq;
 		/*
 		 * Client side of transaction: already sent SYN and data.
 		 * If the remote host used T/TCP to validate the SYN,
 		 * our data will be ACK'd; if so, enter normal data segment
 		 * processing in the middle of step 5, ack processing.
 		 * Otherwise, goto step 6.
 		 */
 		if (thflags & TH_ACK)
 			goto process_ACK;
 
 		goto step6;
 
 	/*
 	 * If the state is LAST_ACK or CLOSING or TIME_WAIT:
 	 *      do normal processing.
 	 *
 	 * NB: Leftover from RFC1644 T/TCP.  Cases to be reused later.
 	 */
 	case TCPS_LAST_ACK:
 	case TCPS_CLOSING:
 		break;  /* continue normal processing */
 	}
 
 	/*
 	 * States other than LISTEN or SYN_SENT.
 	 * First check the RST flag and sequence number since reset segments
 	 * are exempt from the timestamp and connection count tests.  This
 	 * fixes a bug introduced by the Stevens, vol. 2, p. 960 bugfix
 	 * below which allowed reset segments in half the sequence space
 	 * to fall though and be processed (which gives forged reset
 	 * segments with a random sequence number a 50 percent chance of
 	 * killing a connection).
 	 * Then check timestamp, if present.
 	 * Then check the connection count, if present.
 	 * Then check that at least some bytes of segment are within
 	 * receive window.  If segment begins before rcv_nxt,
 	 * drop leading data (and SYN); if nothing left, just ack.
 	 */
 	if (thflags & TH_RST) {
 		/*
 		 * RFC5961 Section 3.2
 		 *
 		 * - RST drops connection only if SEG.SEQ == RCV.NXT.
 		 * - If RST is in window, we send challenge ACK.
 		 *
 		 * Note: to take into account delayed ACKs, we should
 		 *   test against last_ack_sent instead of rcv_nxt.
 		 * Note 2: we handle special case of closed window, not
 		 *   covered by the RFC.
 		 */
 		if ((SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
 		    SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) ||
 		    (tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) {
 			INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 			KASSERT(ti_locked == TI_RLOCKED,
 			    ("%s: TH_RST ti_locked %d, th %p tp %p",
 			    __func__, ti_locked, th, tp));
 			KASSERT(tp->t_state != TCPS_SYN_SENT,
 			    ("%s: TH_RST for TCPS_SYN_SENT th %p tp %p",
 			    __func__, th, tp));
 
 			if (V_tcp_insecure_rst ||
 			    tp->last_ack_sent == th->th_seq) {
 				TCPSTAT_INC(tcps_drops);
 				/* Drop the connection. */
 				switch (tp->t_state) {
 				case TCPS_SYN_RECEIVED:
 					so->so_error = ECONNREFUSED;
 					goto close;
 				case TCPS_ESTABLISHED:
 				case TCPS_FIN_WAIT_1:
 				case TCPS_FIN_WAIT_2:
 				case TCPS_CLOSE_WAIT:
 					so->so_error = ECONNRESET;
 				close:
 					tcp_state_change(tp, TCPS_CLOSED);
 					/* FALLTHROUGH */
 				default:
 					tp = tcp_close(tp);
 				}
 			} else {
 				TCPSTAT_INC(tcps_badrst);
 				/* Send challenge ACK. */
 				tcp_respond(tp, mtod(m, void *), th, m,
 				    tp->rcv_nxt, tp->snd_nxt, TH_ACK);
 				tp->last_ack_sent = tp->rcv_nxt;
 				m = NULL;
 			}
 		}
 		goto drop;
 	}
 
 	/*
 	 * RFC5961 Section 4.2
 	 * Send challenge ACK for any SYN in synchronized state.
 	 */
 	if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT) {
 		KASSERT(ti_locked == TI_RLOCKED,
 		    ("tcp_do_segment: TH_SYN ti_locked %d", ti_locked));
 		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 
 		TCPSTAT_INC(tcps_badsyn);
 		if (V_tcp_insecure_syn &&
 		    SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
 		    SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) {
 			tp = tcp_drop(tp, ECONNRESET);
 			rstreason = BANDLIM_UNLIMITED;
 		} else {
 			/* Send challenge ACK. */
 			tcp_respond(tp, mtod(m, void *), th, m, tp->rcv_nxt,
 			    tp->snd_nxt, TH_ACK);
 			tp->last_ack_sent = tp->rcv_nxt;
 			m = NULL;
 		}
 		goto drop;
 	}
 
 	/*
 	 * RFC 1323 PAWS: If we have a timestamp reply on this segment
 	 * and it's less than ts_recent, drop it.
 	 */
 	if ((to->to_flags & TOF_TS) != 0 && tp->ts_recent &&
 	    TSTMP_LT(to->to_tsval, tp->ts_recent)) {
 
 		/* Check to see if ts_recent is over 24 days old.  */
 		if (tcp_ts_getticks() - tp->ts_recent_age > TCP_PAWS_IDLE) {
 			/*
 			 * Invalidate ts_recent.  If this segment updates
 			 * ts_recent, the age will be reset later and ts_recent
 			 * will get a valid value.  If it does not, setting
 			 * ts_recent to zero will at least satisfy the
 			 * requirement that zero be placed in the timestamp
 			 * echo reply when ts_recent isn't valid.  The
 			 * age isn't reset until we get a valid ts_recent
 			 * because we don't want out-of-order segments to be
 			 * dropped when ts_recent is old.
 			 */
 			tp->ts_recent = 0;
 		} else {
 			TCPSTAT_INC(tcps_rcvduppack);
 			TCPSTAT_ADD(tcps_rcvdupbyte, tlen);
 			TCPSTAT_INC(tcps_pawsdrop);
 			if (tlen)
 				goto dropafterack;
 			goto drop;
 		}
 	}
 
 	/*
 	 * In the SYN-RECEIVED state, validate that the packet belongs to
 	 * this connection before trimming the data to fit the receive
 	 * window.  Check the sequence number versus IRS since we know
 	 * the sequence numbers haven't wrapped.  This is a partial fix
 	 * for the "LAND" DoS attack.
 	 */
 	if (tp->t_state == TCPS_SYN_RECEIVED && SEQ_LT(th->th_seq, tp->irs)) {
 		rstreason = BANDLIM_RST_OPENPORT;
 		goto dropwithreset;
 	}
 
 	todrop = tp->rcv_nxt - th->th_seq;
 	if (todrop > 0) {
 		if (thflags & TH_SYN) {
 			thflags &= ~TH_SYN;
 			th->th_seq++;
 			if (th->th_urp > 1)
 				th->th_urp--;
 			else
 				thflags &= ~TH_URG;
 			todrop--;
 		}
 		/*
 		 * Following if statement from Stevens, vol. 2, p. 960.
 		 */
 		if (todrop > tlen
 		    || (todrop == tlen && (thflags & TH_FIN) == 0)) {
 			/*
 			 * Any valid FIN must be to the left of the window.
 			 * At this point the FIN must be a duplicate or out
 			 * of sequence; drop it.
 			 */
 			thflags &= ~TH_FIN;
 
 			/*
 			 * Send an ACK to resynchronize and drop any data.
 			 * But keep on processing for RST or ACK.
 			 */
 			tp->t_flags |= TF_ACKNOW;
 			todrop = tlen;
 			TCPSTAT_INC(tcps_rcvduppack);
 			TCPSTAT_ADD(tcps_rcvdupbyte, todrop);
 		} else {
 			TCPSTAT_INC(tcps_rcvpartduppack);
 			TCPSTAT_ADD(tcps_rcvpartdupbyte, todrop);
 		}
 		drop_hdrlen += todrop;	/* drop from the top afterwards */
 		th->th_seq += todrop;
 		tlen -= todrop;
 		if (th->th_urp > todrop)
 			th->th_urp -= todrop;
 		else {
 			thflags &= ~TH_URG;
 			th->th_urp = 0;
 		}
 	}
 
 	/*
 	 * If new data are received on a connection after the
 	 * user processes are gone, then RST the other end.
 	 */
 	if ((so->so_state & SS_NOFDREF) &&
 	    tp->t_state > TCPS_CLOSE_WAIT && tlen) {
 		KASSERT(ti_locked == TI_RLOCKED, ("%s: SS_NOFDEREF && "
 		    "CLOSE_WAIT && tlen ti_locked %d", __func__, ti_locked));
 		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data "
 			    "after socket was closed, "
 			    "sending RST and removing tcpcb\n",
 			    s, __func__, tcpstates[tp->t_state], tlen);
 			free(s, M_TCPLOG);
 		}
 		tp = tcp_close(tp);
 		TCPSTAT_INC(tcps_rcvafterclose);
 		rstreason = BANDLIM_UNLIMITED;
 		goto dropwithreset;
 	}
 
 	/*
 	 * If segment ends after window, drop trailing data
 	 * (and PUSH and FIN); if nothing left, just ACK.
 	 */
 	todrop = (th->th_seq + tlen) - (tp->rcv_nxt + tp->rcv_wnd);
 	if (todrop > 0) {
 		TCPSTAT_INC(tcps_rcvpackafterwin);
 		if (todrop >= tlen) {
 			TCPSTAT_ADD(tcps_rcvbyteafterwin, tlen);
 			/*
 			 * If window is closed can only take segments at
 			 * window edge, and have to drop data and PUSH from
 			 * incoming segments.  Continue processing, but
 			 * remember to ack.  Otherwise, drop segment
 			 * and ack.
 			 */
 			if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) {
 				tp->t_flags |= TF_ACKNOW;
 				TCPSTAT_INC(tcps_rcvwinprobe);
 			} else
 				goto dropafterack;
 		} else
 			TCPSTAT_ADD(tcps_rcvbyteafterwin, todrop);
 		m_adj(m, -todrop);
 		tlen -= todrop;
 		thflags &= ~(TH_PUSH|TH_FIN);
 	}
 
 	/*
 	 * If last ACK falls within this segment's sequence numbers,
 	 * record its timestamp.
 	 * NOTE: 
 	 * 1) That the test incorporates suggestions from the latest
 	 *    proposal of the tcplw@cray.com list (Braden 1993/04/26).
 	 * 2) That updating only on newer timestamps interferes with
 	 *    our earlier PAWS tests, so this check should be solely
 	 *    predicated on the sequence space of this segment.
 	 * 3) That we modify the segment boundary check to be 
 	 *        Last.ACK.Sent <= SEG.SEQ + SEG.Len  
 	 *    instead of RFC1323's
 	 *        Last.ACK.Sent < SEG.SEQ + SEG.Len,
 	 *    This modified check allows us to overcome RFC1323's
 	 *    limitations as described in Stevens TCP/IP Illustrated
 	 *    Vol. 2 p.869. In such cases, we can still calculate the
 	 *    RTT correctly when RCV.NXT == Last.ACK.Sent.
 	 */
 	if ((to->to_flags & TOF_TS) != 0 &&
 	    SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
 	    SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
 		((thflags & (TH_SYN|TH_FIN)) != 0))) {
 		tp->ts_recent_age = tcp_ts_getticks();
 		tp->ts_recent = to->to_tsval;
 	}
 
 	/*
 	 * If the ACK bit is off:  if in SYN-RECEIVED state or SENDSYN
 	 * flag is on (half-synchronized state), then queue data for
 	 * later processing; else drop segment and return.
 	 */
 	if ((thflags & TH_ACK) == 0) {
 		if (tp->t_state == TCPS_SYN_RECEIVED ||
 		    (tp->t_flags & TF_NEEDSYN))
 			goto step6;
 		else if (tp->t_flags & TF_ACKNOW)
 			goto dropafterack;
 		else
 			goto drop;
 	}
 
 	/*
 	 * Ack processing.
 	 */
 	switch (tp->t_state) {
 
 	/*
 	 * In SYN_RECEIVED state, the ack ACKs our SYN, so enter
 	 * ESTABLISHED state and continue processing.
 	 * The ACK was checked above.
 	 */
 	case TCPS_SYN_RECEIVED:
 
 		TCPSTAT_INC(tcps_connects);
 		soisconnected(so);
 		/* Do window scaling? */
 		if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
 			(TF_RCVD_SCALE|TF_REQ_SCALE)) {
 			tp->rcv_scale = tp->request_r_scale;
 			tp->snd_wnd = tiwin;
 		}
 		/*
 		 * Make transitions:
 		 *      SYN-RECEIVED  -> ESTABLISHED
 		 *      SYN-RECEIVED* -> FIN-WAIT-1
 		 */
 		tp->t_starttime = ticks;
 		if (tp->t_flags & TF_NEEDFIN) {
 			tcp_state_change(tp, TCPS_FIN_WAIT_1);
 			tp->t_flags &= ~TF_NEEDFIN;
 		} else {
 			tcp_state_change(tp, TCPS_ESTABLISHED);
 			TCP_PROBE5(accept__established, NULL, tp,
 			    mtod(m, const char *), tp, th);
 			cc_conn_init(tp);
 			tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
 		}
 		/*
 		 * If segment contains data or ACK, will call tcp_reass()
 		 * later; if not, do so now to pass queued data to user.
 		 */
 		if (tlen == 0 && (thflags & TH_FIN) == 0)
 			(void) tcp_reass(tp, (struct tcphdr *)0, 0,
 			    (struct mbuf *)0);
 		tp->snd_wl1 = th->th_seq - 1;
 		/* FALLTHROUGH */
 
 	/*
 	 * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
 	 * ACKs.  If the ack is in the range
 	 *	tp->snd_una < th->th_ack <= tp->snd_max
 	 * then advance tp->snd_una to th->th_ack and drop
 	 * data from the retransmission queue.  If this ACK reflects
 	 * more up to date window information we update our window information.
 	 */
 	case TCPS_ESTABLISHED:
 	case TCPS_FIN_WAIT_1:
 	case TCPS_FIN_WAIT_2:
 	case TCPS_CLOSE_WAIT:
 	case TCPS_CLOSING:
 	case TCPS_LAST_ACK:
 		if (SEQ_GT(th->th_ack, tp->snd_max)) {
 			TCPSTAT_INC(tcps_rcvacktoomuch);
 			goto dropafterack;
 		}
 		if ((tp->t_flags & TF_SACK_PERMIT) &&
 		    ((to->to_flags & TOF_SACK) ||
 		     !TAILQ_EMPTY(&tp->snd_holes)))
 			tcp_sack_doack(tp, to, th->th_ack);
 		else
 			/*
 			 * Reset the value so that previous (valid) value
 			 * from the last ack with SACK doesn't get used.
 			 */
 			tp->sackhint.sacked_bytes = 0;
 
 		/* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */
 		hhook_run_tcp_est_in(tp, th, to);
 
 		if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
 			if (tlen == 0 && tiwin == tp->snd_wnd) {
 				/*
 				 * If this is the first time we've seen a
 				 * FIN from the remote, this is not a
 				 * duplicate and it needs to be processed
 				 * normally.  This happens during a
 				 * simultaneous close.
 				 */
 				if ((thflags & TH_FIN) &&
 				    (TCPS_HAVERCVDFIN(tp->t_state) == 0)) {
 					tp->t_dupacks = 0;
 					break;
 				}
 				TCPSTAT_INC(tcps_rcvdupack);
 				/*
 				 * If we have outstanding data (other than
 				 * a window probe), this is a completely
 				 * duplicate ack (ie, window info didn't
 				 * change and FIN isn't set),
 				 * the ack is the biggest we've
 				 * seen and we've seen exactly our rexmt
 				 * threshhold of them, assume a packet
 				 * has been dropped and retransmit it.
 				 * Kludge snd_nxt & the congestion
 				 * window so we send only this one
 				 * packet.
 				 *
 				 * We know we're losing at the current
 				 * window size so do congestion avoidance
 				 * (set ssthresh to half the current window
 				 * and pull our congestion window back to
 				 * the new ssthresh).
 				 *
 				 * Dup acks mean that packets have left the
 				 * network (they're now cached at the receiver)
 				 * so bump cwnd by the amount in the receiver
 				 * to keep a constant cwnd packets in the
 				 * network.
 				 *
 				 * When using TCP ECN, notify the peer that
 				 * we reduced the cwnd.
 				 */
 				if (!tcp_timer_active(tp, TT_REXMT) ||
 				    th->th_ack != tp->snd_una)
 					tp->t_dupacks = 0;
 				else if (++tp->t_dupacks > tcprexmtthresh ||
 				     IN_FASTRECOVERY(tp->t_flags)) {
 					cc_ack_received(tp, th, CC_DUPACK);
 					if ((tp->t_flags & TF_SACK_PERMIT) &&
 					    IN_FASTRECOVERY(tp->t_flags)) {
 						int awnd;
 						
 						/*
 						 * Compute the amount of data in flight first.
 						 * We can inject new data into the pipe iff 
 						 * we have less than 1/2 the original window's
 						 * worth of data in flight.
 						 */
 						if (V_tcp_do_rfc6675_pipe)
 							awnd = tcp_compute_pipe(tp);
 						else
 							awnd = (tp->snd_nxt - tp->snd_fack) +
 								tp->sackhint.sack_bytes_rexmit;
 
 						if (awnd < tp->snd_ssthresh) {
 							tp->snd_cwnd += tp->t_maxseg;
 							if (tp->snd_cwnd > tp->snd_ssthresh)
 								tp->snd_cwnd = tp->snd_ssthresh;
 						}
 					} else
 						tp->snd_cwnd += tp->t_maxseg;
 					(void) tp->t_fb->tfb_tcp_output(tp);
 					goto drop;
 				} else if (tp->t_dupacks == tcprexmtthresh) {
 					tcp_seq onxt = tp->snd_nxt;
 
 					/*
 					 * If we're doing sack, check to
 					 * see if we're already in sack
 					 * recovery. If we're not doing sack,
 					 * check to see if we're in newreno
 					 * recovery.
 					 */
 					if (tp->t_flags & TF_SACK_PERMIT) {
 						if (IN_FASTRECOVERY(tp->t_flags)) {
 							tp->t_dupacks = 0;
 							break;
 						}
 					} else {
 						if (SEQ_LEQ(th->th_ack,
 						    tp->snd_recover)) {
 							tp->t_dupacks = 0;
 							break;
 						}
 					}
 					/* Congestion signal before ack. */
 					cc_cong_signal(tp, th, CC_NDUPACK);
 					cc_ack_received(tp, th, CC_DUPACK);
 					tcp_timer_activate(tp, TT_REXMT, 0);
 					tp->t_rtttime = 0;
 					if (tp->t_flags & TF_SACK_PERMIT) {
 						TCPSTAT_INC(
 						    tcps_sack_recovery_episode);
 						tp->sack_newdata = tp->snd_nxt;
 						tp->snd_cwnd = tp->t_maxseg;
 						(void) tp->t_fb->tfb_tcp_output(tp);
 						goto drop;
 					}
 					tp->snd_nxt = th->th_ack;
 					tp->snd_cwnd = tp->t_maxseg;
 					(void) tp->t_fb->tfb_tcp_output(tp);
 					KASSERT(tp->snd_limited <= 2,
 					    ("%s: tp->snd_limited too big",
 					    __func__));
 					tp->snd_cwnd = tp->snd_ssthresh +
 					     tp->t_maxseg *
 					     (tp->t_dupacks - tp->snd_limited);
 					if (SEQ_GT(onxt, tp->snd_nxt))
 						tp->snd_nxt = onxt;
 					goto drop;
 				} else if (V_tcp_do_rfc3042) {
 					/*
 					 * Process first and second duplicate
 					 * ACKs. Each indicates a segment
 					 * leaving the network, creating room
 					 * for more. Make sure we can send a
 					 * packet on reception of each duplicate
 					 * ACK by increasing snd_cwnd by one
 					 * segment. Restore the original
 					 * snd_cwnd after packet transmission.
 					 */
 					cc_ack_received(tp, th, CC_DUPACK);
 					u_long oldcwnd = tp->snd_cwnd;
 					tcp_seq oldsndmax = tp->snd_max;
 					u_int sent;
 					int avail;
 
 					KASSERT(tp->t_dupacks == 1 ||
 					    tp->t_dupacks == 2,
 					    ("%s: dupacks not 1 or 2",
 					    __func__));
 					if (tp->t_dupacks == 1)
 						tp->snd_limited = 0;
 					tp->snd_cwnd =
 					    (tp->snd_nxt - tp->snd_una) +
 					    (tp->t_dupacks - tp->snd_limited) *
 					    tp->t_maxseg;
 					/*
 					 * Only call tcp_output when there
 					 * is new data available to be sent.
 					 * Otherwise we would send pure ACKs.
 					 */
 					SOCKBUF_LOCK(&so->so_snd);
 					avail = sbavail(&so->so_snd) -
 					    (tp->snd_nxt - tp->snd_una);
 					SOCKBUF_UNLOCK(&so->so_snd);
 					if (avail > 0)
 						(void) tp->t_fb->tfb_tcp_output(tp);
 					sent = tp->snd_max - oldsndmax;
 					if (sent > tp->t_maxseg) {
 						KASSERT((tp->t_dupacks == 2 &&
 						    tp->snd_limited == 0) ||
 						   (sent == tp->t_maxseg + 1 &&
 						    tp->t_flags & TF_SENTFIN),
 						    ("%s: sent too much",
 						    __func__));
 						tp->snd_limited = 2;
 					} else if (sent > 0)
 						++tp->snd_limited;
 					tp->snd_cwnd = oldcwnd;
 					goto drop;
 				}
 			} else
 				tp->t_dupacks = 0;
 			break;
 		}
 
 		KASSERT(SEQ_GT(th->th_ack, tp->snd_una),
 		    ("%s: th_ack <= snd_una", __func__));
 
 		/*
 		 * If the congestion window was inflated to account
 		 * for the other side's cached packets, retract it.
 		 */
 		if (IN_FASTRECOVERY(tp->t_flags)) {
 			if (SEQ_LT(th->th_ack, tp->snd_recover)) {
 				if (tp->t_flags & TF_SACK_PERMIT)
 					tcp_sack_partialack(tp, th);
 				else
 					tcp_newreno_partial_ack(tp, th);
 			} else
 				cc_post_recovery(tp, th);
 		}
 		tp->t_dupacks = 0;
 		/*
 		 * If we reach this point, ACK is not a duplicate,
 		 *     i.e., it ACKs something we sent.
 		 */
 		if (tp->t_flags & TF_NEEDSYN) {
 			/*
 			 * T/TCP: Connection was half-synchronized, and our
 			 * SYN has been ACK'd (so connection is now fully
 			 * synchronized).  Go to non-starred state,
 			 * increment snd_una for ACK of SYN, and check if
 			 * we can do window scaling.
 			 */
 			tp->t_flags &= ~TF_NEEDSYN;
 			tp->snd_una++;
 			/* Do window scaling? */
 			if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
 				(TF_RCVD_SCALE|TF_REQ_SCALE)) {
 				tp->rcv_scale = tp->request_r_scale;
 				/* Send window already scaled. */
 			}
 		}
 
 process_ACK:
 		INP_WLOCK_ASSERT(tp->t_inpcb);
 
 		acked = BYTES_THIS_ACK(tp, th);
 		TCPSTAT_INC(tcps_rcvackpack);
 		TCPSTAT_ADD(tcps_rcvackbyte, acked);
 
 		/*
 		 * If we just performed our first retransmit, and the ACK
 		 * arrives within our recovery window, then it was a mistake
 		 * to do the retransmit in the first place.  Recover our
 		 * original cwnd and ssthresh, and proceed to transmit where
 		 * we left off.
 		 */
 		if (tp->t_rxtshift == 1 && tp->t_flags & TF_PREVVALID &&
 		    (int)(ticks - tp->t_badrxtwin) < 0)
 			cc_cong_signal(tp, th, CC_RTO_ERR);
 
 		/*
 		 * If we have a timestamp reply, update smoothed
 		 * round trip time.  If no timestamp is present but
 		 * transmit timer is running and timed sequence
 		 * number was acked, update smoothed round trip time.
 		 * Since we now have an rtt measurement, cancel the
 		 * timer backoff (cf., Phil Karn's retransmit alg.).
 		 * Recompute the initial retransmit timer.
 		 *
 		 * Some boxes send broken timestamp replies
 		 * during the SYN+ACK phase, ignore
 		 * timestamps of 0 or we could calculate a
 		 * huge RTT and blow up the retransmit timer.
 		 */
 		if ((to->to_flags & TOF_TS) != 0 && to->to_tsecr) {
 			u_int t;
 
 			t = tcp_ts_getticks() - to->to_tsecr;
 			if (!tp->t_rttlow || tp->t_rttlow > t)
 				tp->t_rttlow = t;
 			tcp_xmit_timer(tp, TCP_TS_TO_TICKS(t) + 1);
 		} else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq)) {
 			if (!tp->t_rttlow || tp->t_rttlow > ticks - tp->t_rtttime)
 				tp->t_rttlow = ticks - tp->t_rtttime;
 			tcp_xmit_timer(tp, ticks - tp->t_rtttime);
 		}
 
 		/*
 		 * If all outstanding data is acked, stop retransmit
 		 * timer and remember to restart (more output or persist).
 		 * If there is more data to be acked, restart retransmit
 		 * timer, using current (possibly backed-off) value.
 		 */
 		if (th->th_ack == tp->snd_max) {
 			tcp_timer_activate(tp, TT_REXMT, 0);
 			needoutput = 1;
 		} else if (!tcp_timer_active(tp, TT_PERSIST))
 			tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
 
 		/*
 		 * If no data (only SYN) was ACK'd,
 		 *    skip rest of ACK processing.
 		 */
 		if (acked == 0)
 			goto step6;
 
 		/*
 		 * Let the congestion control algorithm update congestion
 		 * control related information. This typically means increasing
 		 * the congestion window.
 		 */
 		cc_ack_received(tp, th, CC_ACK);
 
 		SOCKBUF_LOCK(&so->so_snd);
 		if (acked > sbavail(&so->so_snd)) {
 			tp->snd_wnd -= sbavail(&so->so_snd);
 			mfree = sbcut_locked(&so->so_snd,
 			    (int)sbavail(&so->so_snd));
 			ourfinisacked = 1;
 		} else {
 			mfree = sbcut_locked(&so->so_snd, acked);
 			tp->snd_wnd -= acked;
 			ourfinisacked = 0;
 		}
 		/* NB: sowwakeup_locked() does an implicit unlock. */
 		sowwakeup_locked(so);
 		m_freem(mfree);
 		/* Detect una wraparound. */
 		if (!IN_RECOVERY(tp->t_flags) &&
 		    SEQ_GT(tp->snd_una, tp->snd_recover) &&
 		    SEQ_LEQ(th->th_ack, tp->snd_recover))
 			tp->snd_recover = th->th_ack - 1;
 		/* XXXLAS: Can this be moved up into cc_post_recovery? */
 		if (IN_RECOVERY(tp->t_flags) &&
 		    SEQ_GEQ(th->th_ack, tp->snd_recover)) {
 			EXIT_RECOVERY(tp->t_flags);
 		}
 		tp->snd_una = th->th_ack;
 		if (tp->t_flags & TF_SACK_PERMIT) {
 			if (SEQ_GT(tp->snd_una, tp->snd_recover))
 				tp->snd_recover = tp->snd_una;
 		}
 		if (SEQ_LT(tp->snd_nxt, tp->snd_una))
 			tp->snd_nxt = tp->snd_una;
 
 		switch (tp->t_state) {
 
 		/*
 		 * In FIN_WAIT_1 STATE in addition to the processing
 		 * for the ESTABLISHED state if our FIN is now acknowledged
 		 * then enter FIN_WAIT_2.
 		 */
 		case TCPS_FIN_WAIT_1:
 			if (ourfinisacked) {
 				/*
 				 * If we can't receive any more
 				 * data, then closing user can proceed.
 				 * Starting the timer is contrary to the
 				 * specification, but if we don't get a FIN
 				 * we'll hang forever.
 				 *
 				 * XXXjl:
 				 * we should release the tp also, and use a
 				 * compressed state.
 				 */
 				if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 					soisdisconnected(so);
 					tcp_timer_activate(tp, TT_2MSL,
 					    (tcp_fast_finwait2_recycle ?
 					    tcp_finwait2_timeout :
 					    TP_MAXIDLE(tp)));
 				}
 				tcp_state_change(tp, TCPS_FIN_WAIT_2);
 			}
 			break;
 
 		/*
 		 * In CLOSING STATE in addition to the processing for
 		 * the ESTABLISHED state if the ACK acknowledges our FIN
 		 * then enter the TIME-WAIT state, otherwise ignore
 		 * the segment.
 		 */
 		case TCPS_CLOSING:
 			if (ourfinisacked) {
 				INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 				tcp_twstart(tp);
 				INP_INFO_RUNLOCK(&V_tcbinfo);
 				m_freem(m);
 				return;
 			}
 			break;
 
 		/*
 		 * In LAST_ACK, we may still be waiting for data to drain
 		 * and/or to be acked, as well as for the ack of our FIN.
 		 * If our FIN is now acknowledged, delete the TCB,
 		 * enter the closed state and return.
 		 */
 		case TCPS_LAST_ACK:
 			if (ourfinisacked) {
 				INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 				tp = tcp_close(tp);
 				goto drop;
 			}
 			break;
 		}
 	}
 
 step6:
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	/*
 	 * Update window information.
 	 * Don't look at window if no ACK: TAC's send garbage on first SYN.
 	 */
 	if ((thflags & TH_ACK) &&
 	    (SEQ_LT(tp->snd_wl1, th->th_seq) ||
 	    (tp->snd_wl1 == th->th_seq && (SEQ_LT(tp->snd_wl2, th->th_ack) ||
 	     (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd))))) {
 		/* keep track of pure window updates */
 		if (tlen == 0 &&
 		    tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd)
 			TCPSTAT_INC(tcps_rcvwinupd);
 		tp->snd_wnd = tiwin;
 		tp->snd_wl1 = th->th_seq;
 		tp->snd_wl2 = th->th_ack;
 		if (tp->snd_wnd > tp->max_sndwnd)
 			tp->max_sndwnd = tp->snd_wnd;
 		needoutput = 1;
 	}
 
 	/*
 	 * Process segments with URG.
 	 */
 	if ((thflags & TH_URG) && th->th_urp &&
 	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
 		/*
 		 * This is a kludge, but if we receive and accept
 		 * random urgent pointers, we'll crash in
 		 * soreceive.  It's hard to imagine someone
 		 * actually wanting to send this much urgent data.
 		 */
 		SOCKBUF_LOCK(&so->so_rcv);
 		if (th->th_urp + sbavail(&so->so_rcv) > sb_max) {
 			th->th_urp = 0;			/* XXX */
 			thflags &= ~TH_URG;		/* XXX */
 			SOCKBUF_UNLOCK(&so->so_rcv);	/* XXX */
 			goto dodata;			/* XXX */
 		}
 		/*
 		 * If this segment advances the known urgent pointer,
 		 * then mark the data stream.  This should not happen
 		 * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
 		 * a FIN has been received from the remote side.
 		 * In these states we ignore the URG.
 		 *
 		 * According to RFC961 (Assigned Protocols),
 		 * the urgent pointer points to the last octet
 		 * of urgent data.  We continue, however,
 		 * to consider it to indicate the first octet
 		 * of data past the urgent section as the original
 		 * spec states (in one of two places).
 		 */
 		if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) {
 			tp->rcv_up = th->th_seq + th->th_urp;
 			so->so_oobmark = sbavail(&so->so_rcv) +
 			    (tp->rcv_up - tp->rcv_nxt) - 1;
 			if (so->so_oobmark == 0)
 				so->so_rcv.sb_state |= SBS_RCVATMARK;
 			sohasoutofband(so);
 			tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
 		}
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		/*
 		 * Remove out of band data so doesn't get presented to user.
 		 * This can happen independent of advancing the URG pointer,
 		 * but if two URG's are pending at once, some out-of-band
 		 * data may creep in... ick.
 		 */
 		if (th->th_urp <= (u_long)tlen &&
 		    !(so->so_options & SO_OOBINLINE)) {
 			/* hdr drop is delayed */
 			tcp_pulloutofband(so, th, m, drop_hdrlen);
 		}
 	} else {
 		/*
 		 * If no out of band data is expected,
 		 * pull receive urgent pointer along
 		 * with the receive window.
 		 */
 		if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
 			tp->rcv_up = tp->rcv_nxt;
 	}
 dodata:							/* XXX */
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	/*
 	 * Process the segment text, merging it into the TCP sequencing queue,
 	 * and arranging for acknowledgment of receipt if necessary.
 	 * This process logically involves adjusting tp->rcv_wnd as data
 	 * is presented to the user (this happens in tcp_usrreq.c,
 	 * case PRU_RCVD).  If a FIN has already been received on this
 	 * connection then we just ignore the text.
 	 */
 	if ((tlen || (thflags & TH_FIN)) &&
 	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
 		tcp_seq save_start = th->th_seq;
 		m_adj(m, drop_hdrlen);	/* delayed header drop */
 		/*
 		 * Insert segment which includes th into TCP reassembly queue
 		 * with control block tp.  Set thflags to whether reassembly now
 		 * includes a segment with FIN.  This handles the common case
 		 * inline (segment is the next to be received on an established
 		 * connection, and the queue is empty), avoiding linkage into
 		 * and removal from the queue and repetition of various
 		 * conversions.
 		 * Set DELACK for segments received in order, but ack
 		 * immediately when segments are out of order (so
 		 * fast retransmit can work).
 		 */
 		if (th->th_seq == tp->rcv_nxt &&
 		    LIST_EMPTY(&tp->t_segq) &&
 		    TCPS_HAVEESTABLISHED(tp->t_state)) {
 			if (DELAY_ACK(tp, tlen))
 				tp->t_flags |= TF_DELACK;
 			else
 				tp->t_flags |= TF_ACKNOW;
 			tp->rcv_nxt += tlen;
 			thflags = th->th_flags & TH_FIN;
 			TCPSTAT_INC(tcps_rcvpack);
 			TCPSTAT_ADD(tcps_rcvbyte, tlen);
 			SOCKBUF_LOCK(&so->so_rcv);
 			if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
 				m_freem(m);
 			else
 				sbappendstream_locked(&so->so_rcv, m, 0);
 			/* NB: sorwakeup_locked() does an implicit unlock. */
 			sorwakeup_locked(so);
 		} else {
 			/*
 			 * XXX: Due to the header drop above "th" is
 			 * theoretically invalid by now.  Fortunately
 			 * m_adj() doesn't actually frees any mbufs
 			 * when trimming from the head.
 			 */
 			thflags = tcp_reass(tp, th, &tlen, m);
 			tp->t_flags |= TF_ACKNOW;
 		}
 		if (tlen > 0 && (tp->t_flags & TF_SACK_PERMIT))
 			tcp_update_sack_list(tp, save_start, save_start + tlen);
 #if 0
 		/*
 		 * Note the amount of data that peer has sent into
 		 * our window, in order to estimate the sender's
 		 * buffer size.
 		 * XXX: Unused.
 		 */
 		if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt))
 			len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
 		else
 			len = so->so_rcv.sb_hiwat;
 #endif
 	} else {
 		m_freem(m);
 		thflags &= ~TH_FIN;
 	}
 
 	/*
 	 * If FIN is received ACK the FIN and let the user know
 	 * that the connection is closing.
 	 */
 	if (thflags & TH_FIN) {
 		if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
 			socantrcvmore(so);
 			/*
 			 * If connection is half-synchronized
 			 * (ie NEEDSYN flag on) then delay ACK,
 			 * so it may be piggybacked when SYN is sent.
 			 * Otherwise, since we received a FIN then no
 			 * more input can be expected, send ACK now.
 			 */
 			if (tp->t_flags & TF_NEEDSYN)
 				tp->t_flags |= TF_DELACK;
 			else
 				tp->t_flags |= TF_ACKNOW;
 			tp->rcv_nxt++;
 		}
 		switch (tp->t_state) {
 
 		/*
 		 * In SYN_RECEIVED and ESTABLISHED STATES
 		 * enter the CLOSE_WAIT state.
 		 */
 		case TCPS_SYN_RECEIVED:
 			tp->t_starttime = ticks;
 			/* FALLTHROUGH */
 		case TCPS_ESTABLISHED:
 			tcp_state_change(tp, TCPS_CLOSE_WAIT);
 			break;
 
 		/*
 		 * If still in FIN_WAIT_1 STATE FIN has not been acked so
 		 * enter the CLOSING state.
 		 */
 		case TCPS_FIN_WAIT_1:
 			tcp_state_change(tp, TCPS_CLOSING);
 			break;
 
 		/*
 		 * In FIN_WAIT_2 state enter the TIME_WAIT state,
 		 * starting the time-wait timer, turning off the other
 		 * standard timers.
 		 */
 		case TCPS_FIN_WAIT_2:
 			INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 			KASSERT(ti_locked == TI_RLOCKED, ("%s: dodata "
 			    "TCP_FIN_WAIT_2 ti_locked: %d", __func__,
 			    ti_locked));
 
 			tcp_twstart(tp);
 			INP_INFO_RUNLOCK(&V_tcbinfo);
 			return;
 		}
 	}
 	if (ti_locked == TI_RLOCKED) {
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 	}
 	ti_locked = TI_UNLOCKED;
 
 #ifdef TCPDEBUG
 	if (so->so_options & SO_DEBUG)
 		tcp_trace(TA_INPUT, ostate, tp, (void *)tcp_saveipgen,
 			  &tcp_savetcp, 0);
 #endif
 	TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));
 
 	/*
 	 * Return any desired output.
 	 */
 	if (needoutput || (tp->t_flags & TF_ACKNOW))
 		(void) tp->t_fb->tfb_tcp_output(tp);
 
 	KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d",
 	    __func__, ti_locked));
 	INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	if (tp->t_flags & TF_DELACK) {
 		tp->t_flags &= ~TF_DELACK;
 		tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
 	}
 	INP_WUNLOCK(tp->t_inpcb);
 	return;
 
 dropafterack:
 	/*
 	 * Generate an ACK dropping incoming segment if it occupies
 	 * sequence space, where the ACK reflects our state.
 	 *
 	 * We can now skip the test for the RST flag since all
 	 * paths to this code happen after packets containing
 	 * RST have been dropped.
 	 *
 	 * In the SYN-RECEIVED state, don't send an ACK unless the
 	 * segment we received passes the SYN-RECEIVED ACK test.
 	 * If it fails send a RST.  This breaks the loop in the
 	 * "LAND" DoS attack, and also prevents an ACK storm
 	 * between two listening ports that have been sent forged
 	 * SYN segments, each with the source address of the other.
 	 */
 	if (tp->t_state == TCPS_SYN_RECEIVED && (thflags & TH_ACK) &&
 	    (SEQ_GT(tp->snd_una, th->th_ack) ||
 	     SEQ_GT(th->th_ack, tp->snd_max)) ) {
 		rstreason = BANDLIM_RST_OPENPORT;
 		goto dropwithreset;
 	}
 #ifdef TCPDEBUG
 	if (so->so_options & SO_DEBUG)
 		tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
 			  &tcp_savetcp, 0);
 #endif
 	TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));
 	if (ti_locked == TI_RLOCKED) {
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 	}
 	ti_locked = TI_UNLOCKED;
 
 	tp->t_flags |= TF_ACKNOW;
 	(void) tp->t_fb->tfb_tcp_output(tp);
 	INP_WUNLOCK(tp->t_inpcb);
 	m_freem(m);
 	return;
 
 dropwithreset:
 	if (ti_locked == TI_RLOCKED) {
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 	}
 	ti_locked = TI_UNLOCKED;
 
 	if (tp != NULL) {
 		tcp_dropwithreset(m, th, tp, tlen, rstreason);
 		INP_WUNLOCK(tp->t_inpcb);
 	} else
 		tcp_dropwithreset(m, th, NULL, tlen, rstreason);
 	return;
 
 drop:
 	if (ti_locked == TI_RLOCKED) {
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		ti_locked = TI_UNLOCKED;
 	}
 #ifdef INVARIANTS
 	else
 		INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 #endif
 
 	/*
 	 * Drop space held by incoming segment and return.
 	 */
 #ifdef TCPDEBUG
 	if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
 		tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
 			  &tcp_savetcp, 0);
 #endif
 	TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));
 	if (tp != NULL)
 		INP_WUNLOCK(tp->t_inpcb);
 	m_freem(m);
 }
 
 
 /*
  * Do fast slow is a combination of the original
  * tcp_dosegment and a split fastpath, one function
  * for the fast-ack which also includes allowing fastpath
  * for window advanced in sequence acks. And also a
  * sub-function that handles the insequence data.
  */
 void
 tcp_do_segment_fastslow(struct mbuf *m, struct tcphdr *th, struct socket *so,
 			struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
 			int ti_locked)
 {
 	int thflags;
 	u_long tiwin;
 	char *s;
 	int can_enter;
 	struct in_conninfo *inc;
 	struct tcpopt to;
 
 	thflags = th->th_flags;
 	tp->sackhint.last_sack_ack = 0;
 	inc = &tp->t_inpcb->inp_inc;
 	/*
 	 * If this is either a state-changing packet or current state isn't
 	 * established, we require a write lock on tcbinfo.  Otherwise, we
 	 * allow the tcbinfo to be in either alocked or unlocked, as the
 	 * caller may have unnecessarily acquired a write lock due to a race.
 	 */
 	if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
 	    tp->t_state != TCPS_ESTABLISHED) {
 		KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for "
 						  "SYN/FIN/RST/!EST", __func__, ti_locked));
 		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	} else {
 #ifdef INVARIANTS
 		if (ti_locked == TI_RLOCKED) {
 			INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 		} else {
 			KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST "
 							   "ti_locked: %d", __func__, ti_locked));
 			INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 		}
 #endif
 	}
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
 					    __func__));
 	KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT",
 						__func__));
 
 	/*
 	 * Segment received on connection.
 	 * Reset idle time and keep-alive timer.
 	 * XXX: This should be done after segment
 	 * validation to ignore broken/spoofed segs.
 	 */
 	tp->t_rcvtime = ticks;
 	if (TCPS_HAVEESTABLISHED(tp->t_state))
 		tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
 
 	/*
 	 * Unscale the window into a 32-bit value.
 	 * For the SYN_SENT state the scale is zero.
 	 */
 	tiwin = th->th_win << tp->snd_scale;
 
 	/*
 	 * TCP ECN processing.
 	 */
 	if (tp->t_flags & TF_ECN_PERMIT) {
 		if (thflags & TH_CWR)
 			tp->t_flags &= ~TF_ECN_SND_ECE;
 		switch (iptos & IPTOS_ECN_MASK) {
 		case IPTOS_ECN_CE:
 			tp->t_flags |= TF_ECN_SND_ECE;
 			TCPSTAT_INC(tcps_ecn_ce);
 			break;
 		case IPTOS_ECN_ECT0:
 			TCPSTAT_INC(tcps_ecn_ect0);
 			break;
 		case IPTOS_ECN_ECT1:
 			TCPSTAT_INC(tcps_ecn_ect1);
 			break;
 		}
 		/* Congestion experienced. */
 		if (thflags & TH_ECE) {
 			cc_cong_signal(tp, th, CC_ECN);
 		}
 	}
 
 	/*
 	 * Parse options on any incoming segment.
 	 */
 	tcp_dooptions(&to, (u_char *)(th + 1),
 		      (th->th_off << 2) - sizeof(struct tcphdr),
 		      (thflags & TH_SYN) ? TO_SYN : 0);
 
 	/*
 	 * If echoed timestamp is later than the current time,
 	 * fall back to non RFC1323 RTT calculation.  Normalize
 	 * timestamp if syncookies were used when this connection
 	 * was established.
 	 */
 	if ((to.to_flags & TOF_TS) && (to.to_tsecr != 0)) {
 		to.to_tsecr -= tp->ts_offset;
 		if (TSTMP_GT(to.to_tsecr, tcp_ts_getticks()))
 			to.to_tsecr = 0;
 	}
 	/*
 	 * If timestamps were negotiated during SYN/ACK they should
 	 * appear on every segment during this session and vice versa.
 	 */
 	if ((tp->t_flags & TF_RCVD_TSTMP) && !(to.to_flags & TOF_TS)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: Timestamp missing, "
 			    "no action\n", s, __func__);
 			free(s, M_TCPLOG);
 		}
 	}
 	if (!(tp->t_flags & TF_RCVD_TSTMP) && (to.to_flags & TOF_TS)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: Timestamp not expected, "
 			    "no action\n", s, __func__);
 			free(s, M_TCPLOG);
 		}
 	}
 
 	/*
 	 * Process options only when we get SYN/ACK back. The SYN case
 	 * for incoming connections is handled in tcp_syncache.
 	 * According to RFC1323 the window field in a SYN (i.e., a <SYN>
 	 * or <SYN,ACK>) segment itself is never scaled.
 	 * XXX this is traditional behavior, may need to be cleaned up.
 	 */
 	if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
 		if ((to.to_flags & TOF_SCALE) &&
 		    (tp->t_flags & TF_REQ_SCALE)) {
 			tp->t_flags |= TF_RCVD_SCALE;
 			tp->snd_scale = to.to_wscale;
 		}
 		/*
 		 * Initial send window.  It will be updated with
 		 * the next incoming segment to the scaled value.
 		 */
 		tp->snd_wnd = th->th_win;
 		if (to.to_flags & TOF_TS) {
 			tp->t_flags |= TF_RCVD_TSTMP;
 			tp->ts_recent = to.to_tsval;
 			tp->ts_recent_age = tcp_ts_getticks();
 		}
 		if (to.to_flags & TOF_MSS)
 			tcp_mss(tp, to.to_mss);
 		if ((tp->t_flags & TF_SACK_PERMIT) &&
 		    (to.to_flags & TOF_SACKPERM) == 0)
 			tp->t_flags &= ~TF_SACK_PERMIT;
 	}
 	can_enter = 0;
 	if (__predict_true((tlen == 0))) {
 		/*
 		 * The ack moved forward and we have a window (non-zero)
 		 * <or>
 		 * The ack did not move forward, but the window increased.
 		 */
 		if (__predict_true((SEQ_GT(th->th_ack, tp->snd_una) && tiwin) ||
 				   ((th->th_ack == tp->snd_una) && tiwin && (tiwin > tp->snd_wnd)))) {
 			can_enter = 1;
 		}
 	} else {
 		/* 
 		 * Data incoming, use the old entry criteria
 		 * for fast-path with data.
 		 */
 		if ((tiwin && tiwin == tp->snd_wnd)) {
 			can_enter = 1;
 		}
 	}
 	/*
 	 * Header prediction: check for the two common cases
 	 * of a uni-directional data xfer.  If the packet has
 	 * no control flags, is in-sequence, the window didn't
 	 * change and we're not retransmitting, it's a
 	 * candidate.  If the length is zero and the ack moved
 	 * forward, we're the sender side of the xfer.  Just
 	 * free the data acked & wake any higher level process
 	 * that was blocked waiting for space.  If the length
 	 * is non-zero and the ack didn't move, we're the
 	 * receiver side.  If we're getting packets in-order
 	 * (the reassembly queue is empty), add the data to
 	 * the socket buffer and note that we need a delayed ack.
 	 * Make sure that the hidden state-flags are also off.
 	 * Since we check for TCPS_ESTABLISHED first, it can only
 	 * be TH_NEEDSYN.
 	 */
 	if (__predict_true(tp->t_state == TCPS_ESTABLISHED &&
 	    th->th_seq == tp->rcv_nxt &&
 	    (thflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
 	    tp->snd_nxt == tp->snd_max &&
 	    can_enter &&
 	    ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&
 	    LIST_EMPTY(&tp->t_segq) &&
 	    ((to.to_flags & TOF_TS) == 0 ||
 	     TSTMP_GEQ(to.to_tsval, tp->ts_recent)))) {
 		if (__predict_true((tlen == 0) &&
 		    (SEQ_LEQ(th->th_ack, tp->snd_max) &&
 		     !IN_RECOVERY(tp->t_flags) &&
 		     (to.to_flags & TOF_SACK) == 0 &&
 		     TAILQ_EMPTY(&tp->snd_holes)))) {
 			/* We are done */
 			tcp_do_fastack(m, th, so, tp, &to, drop_hdrlen, tlen, 
 				       ti_locked, tiwin);
 			return;
 		} else if ((tlen) &&
 			   (th->th_ack == tp->snd_una &&
 			    tlen <= sbspace(&so->so_rcv))) {
 			tcp_do_fastnewdata(m, th, so, tp, &to, drop_hdrlen, tlen, 
 					   ti_locked, tiwin);
 			/* We are done */
 			return;
 		}
 	}
 	tcp_do_slowpath(m, th, so, tp, &to, drop_hdrlen, tlen,
 			ti_locked, tiwin, thflags);
 }
 
 
 /*
  * This subfunction is used to try to highly optimize the
  * fast path. We again allow window updates that are
  * in sequence to remain in the fast-path. We also add
  * in the __predict's to attempt to help the compiler.
  * Note that if we return a 0, then we can *not* process
  * it and the caller should push the packet into the 
  * slow-path.
  */
 static int
 tcp_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	       struct tcpcb *tp, struct tcpopt *to, int drop_hdrlen, int tlen, 
 	       int ti_locked, u_long tiwin)
 {
 	int acked;
 	int winup_only=0;
 #ifdef TCPDEBUG
 	/*
 	 * The size of tcp_saveipgen must be the size of the max ip header,
 	 * now IPv6.
 	 */
 	u_char tcp_saveipgen[IP6_HDR_LEN];
 	struct tcphdr tcp_savetcp;
 	short ostate = 0;
 #endif
 
 
 	if (__predict_false(SEQ_LEQ(th->th_ack, tp->snd_una))) {
 		/* Old ack, behind (or duplicate to) the last one rcv'd */
 		return (0);
 	}
 	if (__predict_false(th->th_ack == tp->snd_una) && 
 	    __predict_false(tiwin <= tp->snd_wnd)) {
 		/* duplicate ack <or> a shrinking dup ack with shrinking window */
 		return (0);
 	}
 	if (__predict_false(tiwin == 0)) {
 		/* zero window */
 		return (0);
 	}
 	if (__predict_false(SEQ_GT(th->th_ack, tp->snd_max))) {
 		/* Above what we have sent? */
 		return (0);
 	}
 	if (__predict_false(tp->snd_nxt != tp->snd_max)) {
 		/* We are retransmitting */
 		return (0);
 	}
 	if (__predict_false(tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN))) {
 		/* We need a SYN or a FIN, unlikely.. */
 		return (0);
 	}
 	if((to->to_flags & TOF_TS) && __predict_false(TSTMP_LT(to->to_tsval, tp->ts_recent))) {
 		/* Timestamp is behind .. old ack with seq wrap? */
 		return (0);
 	}
 	if (__predict_false(IN_RECOVERY(tp->t_flags))) {
 		/* Still recovering */
 		return (0);
 	}
 	if (__predict_false(to->to_flags & TOF_SACK)) {
 		/* Sack included in the ack..  */
 		return (0);
 	}
 	if (!TAILQ_EMPTY(&tp->snd_holes)) {
 		/* We have sack holes on our scoreboard */
 		return (0);
 	}
 	/* Ok if we reach here, we can process a fast-ack */
 
 	/* Did the window get updated? */
 	if (tiwin != tp->snd_wnd) {
 		/* keep track of pure window updates */
 		if (tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd) {
 			winup_only = 1;
 			TCPSTAT_INC(tcps_rcvwinupd);
 		}
 		tp->snd_wnd = tiwin;
 		tp->snd_wl1 = th->th_seq;
 		if (tp->snd_wnd > tp->max_sndwnd)
 			tp->max_sndwnd = tp->snd_wnd;
 	}
 	/*
 	 * Pull snd_wl2 up to prevent seq wrap relative
 	 * to th_ack.
 	 */
 	tp->snd_wl2 = th->th_ack;
 	/*
 	 * If last ACK falls within this segment's sequence numbers,
 	 * record the timestamp.
 	 * NOTE that the test is modified according to the latest
 	 * proposal of the tcplw@cray.com list (Braden 1993/04/26).
 	 */
 	if ((to->to_flags & TOF_TS) != 0 &&
 	    SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
 		tp->ts_recent_age = tcp_ts_getticks();
 		tp->ts_recent = to->to_tsval;
 	}
 	/*
 	 * This is a pure ack for outstanding data.
 	 */
 	if (ti_locked == TI_RLOCKED) {
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 	}
 	ti_locked = TI_UNLOCKED;
 
 	TCPSTAT_INC(tcps_predack);
 
 	/*
 	 * "bad retransmit" recovery.
 	 */
 	if (tp->t_rxtshift == 1 &&
 	    tp->t_flags & TF_PREVVALID &&
 	    (int)(ticks - tp->t_badrxtwin) < 0) {
 		cc_cong_signal(tp, th, CC_RTO_ERR);
 	}
 
 	/*
 	 * Recalculate the transmit timer / rtt.
 	 *
 	 * Some boxes send broken timestamp replies
 	 * during the SYN+ACK phase, ignore
 	 * timestamps of 0 or we could calculate a
 	 * huge RTT and blow up the retransmit timer.
 	 */
 	if ((to->to_flags & TOF_TS) != 0 &&
 	    to->to_tsecr) {
 		u_int t;
 
 		t = tcp_ts_getticks() - to->to_tsecr;
 		if (!tp->t_rttlow || tp->t_rttlow > t)
 			tp->t_rttlow = t;
 		tcp_xmit_timer(tp,
 			       TCP_TS_TO_TICKS(t) + 1);
 	} else if (tp->t_rtttime &&
 		   SEQ_GT(th->th_ack, tp->t_rtseq)) {
 		if (!tp->t_rttlow ||
 		    tp->t_rttlow > ticks - tp->t_rtttime)
 			tp->t_rttlow = ticks - tp->t_rtttime;
 		tcp_xmit_timer(tp,
 			       ticks - tp->t_rtttime);
 	}
 	if (winup_only == 0) {
 		acked = BYTES_THIS_ACK(tp, th);
 
 		/* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */
 		hhook_run_tcp_est_in(tp, th, to);
 
 		TCPSTAT_ADD(tcps_rcvackbyte, acked);
 		sbdrop(&so->so_snd, acked);
 		if (SEQ_GT(tp->snd_una, tp->snd_recover) &&
 		    SEQ_LEQ(th->th_ack, tp->snd_recover))
 			tp->snd_recover = th->th_ack - 1;
 				
 		/*
 		 * Let the congestion control algorithm update
 		 * congestion control related information. This
 		 * typically means increasing the congestion
 		 * window.
 		 */
 		cc_ack_received(tp, th, CC_ACK);
 
 		tp->snd_una = th->th_ack;
 		tp->t_dupacks = 0;
 		m_freem(m);
 
 		/*
 		 * If all outstanding data are acked, stop
 		 * retransmit timer, otherwise restart timer
 		 * using current (possibly backed-off) value.
 		 * If process is waiting for space,
 		 * wakeup/selwakeup/signal.  If data
 		 * are ready to send, let tcp_output
 		 * decide between more output or persist.
 		 */
 #ifdef TCPDEBUG
 		if (so->so_options & SO_DEBUG)
 			tcp_trace(TA_INPUT, ostate, tp,
 				  (void *)tcp_saveipgen,
 				  &tcp_savetcp, 0);
 #endif
 		if (tp->snd_una == tp->snd_max)
 			tcp_timer_activate(tp, TT_REXMT, 0);
 		else if (!tcp_timer_active(tp, TT_PERSIST))
 			tcp_timer_activate(tp, TT_REXMT,
 					   tp->t_rxtcur);
 		/* Wake up the socket if we have room to write more */
 		sowwakeup(so);
 	} else {
 		/* 
 		 * Window update only, just free the mbufs and
 		 * send out whatever we can.
 		 */
 		m_freem(m);
 	}
 	if (sbavail(&so->so_snd))
 		(void) tcp_output(tp);
 	KASSERT(ti_locked == TI_UNLOCKED, ("%s: check_delack ti_locked %d",
 					    __func__, ti_locked));
 	INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	if (tp->t_flags & TF_DELACK) {
 		tp->t_flags &= ~TF_DELACK;
 		tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
 	}
 	INP_WUNLOCK(tp->t_inpcb);
 	return (1);
 }
 
 /*
  * This tcp-do-segment concentrates on making the fastest
  * ack processing path. It does not have a fast-path for
  * data (it possibly could which would then eliminate the
  * need for fast-slow above). For a content distributor having
  * large outgoing elephants and very very little coming in
  * having no fastpath for data does not really help (since you
  * don't get much data in). The most important thing is 
  * processing ack's quickly and getting the rest of the data
  * output to the peer as quickly as possible. This routine
  * seems to be about an overall 3% faster then the old
  * tcp_do_segment and keeps us in the fast-path for packets
  * much more (by allowing window updates to also stay in the fastpath).
  */
 void
 tcp_do_segment_fastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
 		       struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
 		       int ti_locked)
 {
 	int thflags;
 	u_long tiwin;
 	char *s;
 	struct in_conninfo *inc;
 	struct tcpopt to;
 
 	thflags = th->th_flags;
 	tp->sackhint.last_sack_ack = 0;
 	inc = &tp->t_inpcb->inp_inc;
 	/*
 	 * If this is either a state-changing packet or current state isn't
 	 * established, we require a write lock on tcbinfo.  Otherwise, we
 	 * allow the tcbinfo to be in either alocked or unlocked, as the
 	 * caller may have unnecessarily acquired a write lock due to a race.
 	 */
 	if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
 	    tp->t_state != TCPS_ESTABLISHED) {
 		KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for "
 						  "SYN/FIN/RST/!EST", __func__, ti_locked));
 		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	} else {
 #ifdef INVARIANTS
 		if (ti_locked == TI_RLOCKED) {
 			INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 		} else {
 			KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST "
 							   "ti_locked: %d", __func__, ti_locked));
 			INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
 		}
 #endif
 	}
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
 					    __func__));
 	KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT",
 						__func__));
 
 	/*
 	 * Segment received on connection.
 	 * Reset idle time and keep-alive timer.
 	 * XXX: This should be done after segment
 	 * validation to ignore broken/spoofed segs.
 	 */
 	tp->t_rcvtime = ticks;
 	if (TCPS_HAVEESTABLISHED(tp->t_state))
 		tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
 
 	/*
 	 * Unscale the window into a 32-bit value.
 	 * For the SYN_SENT state the scale is zero.
 	 */
 	tiwin = th->th_win << tp->snd_scale;
 
 	/*
 	 * TCP ECN processing.
 	 */
 	if (tp->t_flags & TF_ECN_PERMIT) {
 		if (thflags & TH_CWR)
 			tp->t_flags &= ~TF_ECN_SND_ECE;
 		switch (iptos & IPTOS_ECN_MASK) {
 		case IPTOS_ECN_CE:
 			tp->t_flags |= TF_ECN_SND_ECE;
 			TCPSTAT_INC(tcps_ecn_ce);
 			break;
 		case IPTOS_ECN_ECT0:
 			TCPSTAT_INC(tcps_ecn_ect0);
 			break;
 		case IPTOS_ECN_ECT1:
 			TCPSTAT_INC(tcps_ecn_ect1);
 			break;
 		}
 		/* Congestion experienced. */
 		if (thflags & TH_ECE) {
 			cc_cong_signal(tp, th, CC_ECN);
 		}
 	}
 
 	/*
 	 * Parse options on any incoming segment.
 	 */
 	tcp_dooptions(&to, (u_char *)(th + 1),
 		      (th->th_off << 2) - sizeof(struct tcphdr),
 		      (thflags & TH_SYN) ? TO_SYN : 0);
 
 	/*
 	 * If echoed timestamp is later than the current time,
 	 * fall back to non RFC1323 RTT calculation.  Normalize
 	 * timestamp if syncookies were used when this connection
 	 * was established.
 	 */
 	if ((to.to_flags & TOF_TS) && (to.to_tsecr != 0)) {
 		to.to_tsecr -= tp->ts_offset;
 		if (TSTMP_GT(to.to_tsecr, tcp_ts_getticks()))
 			to.to_tsecr = 0;
 	}
 	/*
 	 * If timestamps were negotiated during SYN/ACK they should
 	 * appear on every segment during this session and vice versa.
 	 */
 	if ((tp->t_flags & TF_RCVD_TSTMP) && !(to.to_flags & TOF_TS)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: Timestamp missing, "
 			    "no action\n", s, __func__);
 			free(s, M_TCPLOG);
 		}
 	}
 	if (!(tp->t_flags & TF_RCVD_TSTMP) && (to.to_flags & TOF_TS)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: Timestamp not expected, "
 			    "no action\n", s, __func__);
 			free(s, M_TCPLOG);
 		}
 	}
 
 	/*
 	 * Process options only when we get SYN/ACK back. The SYN case
 	 * for incoming connections is handled in tcp_syncache.
 	 * According to RFC1323 the window field in a SYN (i.e., a <SYN>
 	 * or <SYN,ACK>) segment itself is never scaled.
 	 * XXX this is traditional behavior, may need to be cleaned up.
 	 */
 	if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) {
 		if ((to.to_flags & TOF_SCALE) &&
 		    (tp->t_flags & TF_REQ_SCALE)) {
 			tp->t_flags |= TF_RCVD_SCALE;
 			tp->snd_scale = to.to_wscale;
 		}
 		/*
 		 * Initial send window.  It will be updated with
 		 * the next incoming segment to the scaled value.
 		 */
 		tp->snd_wnd = th->th_win;
 		if (to.to_flags & TOF_TS) {
 			tp->t_flags |= TF_RCVD_TSTMP;
 			tp->ts_recent = to.to_tsval;
 			tp->ts_recent_age = tcp_ts_getticks();
 		}
 		if (to.to_flags & TOF_MSS)
 			tcp_mss(tp, to.to_mss);
 		if ((tp->t_flags & TF_SACK_PERMIT) &&
 		    (to.to_flags & TOF_SACKPERM) == 0)
 			tp->t_flags &= ~TF_SACK_PERMIT;
 	}
 	/*
 	 * Header prediction: check for the two common cases
 	 * of a uni-directional data xfer.  If the packet has
 	 * no control flags, is in-sequence, the window didn't
 	 * change and we're not retransmitting, it's a
 	 * candidate.  If the length is zero and the ack moved
 	 * forward, we're the sender side of the xfer.  Just
 	 * free the data acked & wake any higher level process
 	 * that was blocked waiting for space.  If the length
 	 * is non-zero and the ack didn't move, we're the
 	 * receiver side.  If we're getting packets in-order
 	 * (the reassembly queue is empty), add the data to
 	 * the socket buffer and note that we need a delayed ack.
 	 * Make sure that the hidden state-flags are also off.
 	 * Since we check for TCPS_ESTABLISHED first, it can only
 	 * be TH_NEEDSYN.
 	 */
 	if (__predict_true(tp->t_state == TCPS_ESTABLISHED) &&
 	    __predict_true(((to.to_flags & TOF_SACK) == 0)) &&
 	    __predict_true(tlen == 0) &&
 	    __predict_true((thflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK) &&
 	    __predict_true(LIST_EMPTY(&tp->t_segq)) &&
 	    __predict_true(th->th_seq == tp->rcv_nxt)) {
 		    if (tcp_fastack(m, th, so, tp, &to, drop_hdrlen, tlen, 
 				    ti_locked, tiwin)) {
 			    return;
 		    }
 	} 
 	tcp_do_slowpath(m, th, so, tp, &to, drop_hdrlen, tlen,
 			ti_locked, tiwin, thflags);
 }
 
 struct tcp_function_block __tcp_fastslow = {
 	"fastslow",
 	tcp_output,
 	tcp_do_segment_fastslow,
 	tcp_default_ctloutput,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	0,
 	0
 
 };
 
 struct tcp_function_block __tcp_fastack = {
 	"fastack",
 	tcp_output,
 	tcp_do_segment_fastack,
 	tcp_default_ctloutput,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	0,
 	0
 };
 
 static int
 tcp_addfastpaths(module_t mod, int type, void *data)
 {
 	int err=0;
 
 	switch (type) {
 	case MOD_LOAD:
 		err = register_tcp_functions(&__tcp_fastack, M_WAITOK);
 		if (err) {
 			printf("Failed to register fastack module -- err:%d\n", err);
 			return(err);
 		}
 		err = register_tcp_functions(&__tcp_fastslow, M_WAITOK); 
 		if (err) {
 			printf("Failed to register fastslow module -- err:%d\n", err);
 			deregister_tcp_functions(&__tcp_fastack);
 			return(err);
 		}
 		break;
 	case MOD_QUIESCE:
 		if ((__tcp_fastslow.tfb_refcnt) ||( __tcp_fastack.tfb_refcnt)) {
 			return(EBUSY);
 		}
 		break;
 	case MOD_UNLOAD:
 		err = deregister_tcp_functions(&__tcp_fastack);
 		if (err == EBUSY)
 			break;
 		err = deregister_tcp_functions(&__tcp_fastslow);
 		if (err == EBUSY)
 			break;
 		err = 0;
 		break;
 	default:
 		return (EOPNOTSUPP);
 	}
 	return (err);
 }
 
 static moduledata_t new_tcp_fastpaths = {
 	.name = "tcp_fastpaths",
 	.evhand = tcp_addfastpaths,
 	.priv = 0
 };
 
 MODULE_VERSION(kern_tcpfastpaths, 1);
 DECLARE_MODULE(kern_tcpfastpaths, new_tcp_fastpaths, SI_SUB_PSEUDO, SI_ORDER_ANY);
Index: projects/release-pkg/sys/netinet/tcp_subr.c
===================================================================
--- projects/release-pkg/sys/netinet/tcp_subr.c	(revision 293335)
+++ projects/release-pkg/sys/netinet/tcp_subr.c	(revision 293336)
@@ -1,2867 +1,2920 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)tcp_subr.c	8.2 (Berkeley) 5/24/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_tcpdebug.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/callout.h>
 #include <sys/hhook.h>
 #include <sys/kernel.h>
 #include <sys/khelp.h>
 #include <sys/sysctl.h>
 #include <sys/jail.h>
 #include <sys/malloc.h>
 #include <sys/refcount.h>
 #include <sys/mbuf.h>
 #ifdef INET6
 #include <sys/domain.h>
 #endif
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/sdt.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/protosw.h>
 #include <sys/random.h>
 
 #include <vm/uma.h>
 
 #include <net/route.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/vnet.h>
 
 #include <netinet/cc.h>
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_var.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/nd6.h>
 #endif
 
 #ifdef TCP_RFC7413
 #include <netinet/tcp_fastopen.h>
 #endif
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_syncache.h>
 #ifdef INET6
 #include <netinet6/tcp6_var.h>
 #endif
 #include <netinet/tcpip.h>
 #ifdef TCPPCAP
 #include <netinet/tcp_pcap.h>
 #endif
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif
 #ifdef INET6
 #include <netinet6/ip6protosw.h>
 #endif
 #ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
 #endif
 
 #ifdef IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/xform.h>
 #ifdef INET6
 #include <netipsec/ipsec6.h>
 #endif
 #include <netipsec/key.h>
 #include <sys/syslog.h>
 #endif /*IPSEC*/
 
 #include <machine/in_cksum.h>
 #include <sys/md5.h>
 
 #include <security/mac/mac_framework.h>
 
 VNET_DEFINE(int, tcp_mssdflt) = TCP_MSS;
 #ifdef INET6
 VNET_DEFINE(int, tcp_v6mssdflt) = TCP6_MSS;
 #endif
 
 struct rwlock tcp_function_lock;
 
 static int
 sysctl_net_inet_tcp_mss_check(SYSCTL_HANDLER_ARGS)
 {
 	int error, new;
 
 	new = V_tcp_mssdflt;
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if (error == 0 && req->newptr) {
 		if (new < TCP_MINMSS)
 			error = EINVAL;
 		else
 			V_tcp_mssdflt = new;
 	}
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt,
     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(tcp_mssdflt), 0,
     &sysctl_net_inet_tcp_mss_check, "I",
     "Default TCP Maximum Segment Size");
 
 #ifdef INET6
 static int
 sysctl_net_inet_tcp_mss_v6_check(SYSCTL_HANDLER_ARGS)
 {
 	int error, new;
 
 	new = V_tcp_v6mssdflt;
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if (error == 0 && req->newptr) {
 		if (new < TCP_MINMSS)
 			error = EINVAL;
 		else
 			V_tcp_v6mssdflt = new;
 	}
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt,
     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(tcp_v6mssdflt), 0,
     &sysctl_net_inet_tcp_mss_v6_check, "I",
    "Default TCP Maximum Segment Size for IPv6");
 #endif /* INET6 */
 
 /*
  * Minimum MSS we accept and use. This prevents DoS attacks where
  * we are forced to a ridiculous low MSS like 20 and send hundreds
  * of packets instead of one. The effect scales with the available
  * bandwidth and quickly saturates the CPU and network interface
  * with packet generation and sending. Set to zero to disable MINMSS
  * checking. This setting prevents us from sending too small packets.
  */
 VNET_DEFINE(int, tcp_minmss) = TCP_MINMSS;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmss, CTLFLAG_VNET | CTLFLAG_RW,
      &VNET_NAME(tcp_minmss), 0,
     "Minimum TCP Maximum Segment Size");
 
 VNET_DEFINE(int, tcp_do_rfc1323) = 1;
 SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_rfc1323), 0,
     "Enable rfc1323 (high performance TCP) extensions");
 
 static int	tcp_log_debug = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_debug, CTLFLAG_RW,
     &tcp_log_debug, 0, "Log errors caused by incoming TCP segments");
 
 static int	tcp_tcbhashsize;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
     &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable");
 
 static int	do_tcpdrain = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0,
     "Enable tcp_drain routine for extra help when low on mbufs");
 
 SYSCTL_UINT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_VNET | CTLFLAG_RD,
     &VNET_NAME(tcbinfo.ipi_count), 0, "Number of active PCBs");
 
 static VNET_DEFINE(int, icmp_may_rst) = 1;
 #define	V_icmp_may_rst			VNET(icmp_may_rst)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(icmp_may_rst), 0,
     "Certain ICMP unreachable messages may abort connections in SYN_SENT");
 
 static VNET_DEFINE(int, tcp_isn_reseed_interval) = 0;
 #define	V_tcp_isn_reseed_interval	VNET(tcp_isn_reseed_interval)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_isn_reseed_interval), 0,
     "Seconds between reseeding of ISN secret");
 
 static int	tcp_soreceive_stream;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, soreceive_stream, CTLFLAG_RDTUN,
     &tcp_soreceive_stream, 0, "Using soreceive_stream for TCP sockets");
 
 #ifdef TCP_SIGNATURE
 static int	tcp_sig_checksigs = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, signature_verify_input, CTLFLAG_RW,
     &tcp_sig_checksigs, 0, "Verify RFC2385 digests on inbound traffic");
 #endif
 
 VNET_DEFINE(uma_zone_t, sack_hole_zone);
 #define	V_sack_hole_zone		VNET(sack_hole_zone)
 
 VNET_DEFINE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST+1]);
 
 static struct inpcb *tcp_notify(struct inpcb *, int);
 static struct inpcb *tcp_mtudisc_notify(struct inpcb *, int);
 static void tcp_mtudisc(struct inpcb *, int);
 static char *	tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th,
 		    void *ip4hdr, const void *ip6hdr);
 static void	tcp_timer_discard(struct tcpcb *, uint32_t);
 
 
 static struct tcp_function_block tcp_def_funcblk = {
 	"default",
 	tcp_output,
 	tcp_do_segment,
 	tcp_default_ctloutput,
 	NULL,
 	NULL,	
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	0,
 	0
 };
 
 struct tcp_funchead t_functions;
 static struct tcp_function_block *tcp_func_set_ptr = &tcp_def_funcblk;
 
 static struct tcp_function_block *
 find_tcp_functions_locked(struct tcp_function_set *fs)
 {
 	struct tcp_function *f;
 	struct tcp_function_block *blk=NULL;
 
 	TAILQ_FOREACH(f, &t_functions, tf_next) {
 		if (strcmp(f->tf_fb->tfb_tcp_block_name, fs->function_set_name) == 0) {
 			blk = f->tf_fb;
 			break;
 		}
 	}
 	return(blk);
 }
 
 static struct tcp_function_block *
 find_tcp_fb_locked(struct tcp_function_block *blk, struct tcp_function **s)
 {
 	struct tcp_function_block *rblk=NULL;
 	struct tcp_function *f;
 
 	TAILQ_FOREACH(f, &t_functions, tf_next) {
 		if (f->tf_fb == blk) {
 			rblk = blk;
 			if (s) {
 				*s = f;
 			}
 			break;
 		}
 	}
 	return (rblk);
 }
 
 struct tcp_function_block *
 find_and_ref_tcp_functions(struct tcp_function_set *fs)
 {
 	struct tcp_function_block *blk;
 	
 	rw_rlock(&tcp_function_lock);	
 	blk = find_tcp_functions_locked(fs);
 	if (blk)
 		refcount_acquire(&blk->tfb_refcnt); 
 	rw_runlock(&tcp_function_lock);
 	return(blk);
 }
 
 struct tcp_function_block *
 find_and_ref_tcp_fb(struct tcp_function_block *blk)
 {
 	struct tcp_function_block *rblk;
 	
 	rw_rlock(&tcp_function_lock);	
 	rblk = find_tcp_fb_locked(blk, NULL);
 	if (rblk) 
 		refcount_acquire(&rblk->tfb_refcnt);
 	rw_runlock(&tcp_function_lock);
 	return(rblk);
 }
 
 
 static int
 sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS)
 {
 	int error=ENOENT;
 	struct tcp_function_set fs;
 	struct tcp_function_block *blk;
 
 	memset(&fs, 0, sizeof(fs));
 	rw_rlock(&tcp_function_lock);
 	blk = find_tcp_fb_locked(tcp_func_set_ptr, NULL);
 	if (blk) {
 		/* Found him */
 		strcpy(fs.function_set_name, blk->tfb_tcp_block_name);
 		fs.pcbcnt = blk->tfb_refcnt;
 	}
 	rw_runlock(&tcp_function_lock);	
 	error = sysctl_handle_string(oidp, fs.function_set_name,
 				     sizeof(fs.function_set_name), req);
 
 	/* Check for error or no change */
 	if (error != 0 || req->newptr == NULL)
 		return(error);
 
 	rw_wlock(&tcp_function_lock);
 	blk = find_tcp_functions_locked(&fs);
 	if ((blk == NULL) ||
 	    (blk->tfb_flags & TCP_FUNC_BEING_REMOVED)) { 
 		error = ENOENT; 
 		goto done;
 	}
 	tcp_func_set_ptr = blk;
 done:
 	rw_wunlock(&tcp_function_lock);
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_default,
 	    CTLTYPE_STRING | CTLFLAG_RW,
 	    NULL, 0, sysctl_net_inet_default_tcp_functions, "A",
 	    "Set/get the default TCP functions");
 
 static int
 sysctl_net_inet_list_available(SYSCTL_HANDLER_ARGS)
 {
 	int error, cnt, linesz;
 	struct tcp_function *f;
 	char *buffer, *cp;
 	size_t bufsz, outsz;
 
 	cnt = 0;
 	rw_rlock(&tcp_function_lock);
 	TAILQ_FOREACH(f, &t_functions, tf_next) {
 		cnt++;
 	}
 	rw_runlock(&tcp_function_lock);
 
 	bufsz = (cnt+2) * (TCP_FUNCTION_NAME_LEN_MAX + 12) + 1;
 	buffer = malloc(bufsz, M_TEMP, M_WAITOK);
 
 	error = 0;
 	cp = buffer;
 
 	linesz = snprintf(cp, bufsz, "\n%-32s%c %s\n", "Stack", 'D', "PCB count");
 	cp += linesz;
 	bufsz -= linesz;
 	outsz = linesz;
 
 	rw_rlock(&tcp_function_lock);	
 	TAILQ_FOREACH(f, &t_functions, tf_next) {
 		linesz = snprintf(cp, bufsz, "%-32s%c %u\n",
 		    f->tf_fb->tfb_tcp_block_name,
 		    (f->tf_fb == tcp_func_set_ptr) ? '*' : ' ',
 		    f->tf_fb->tfb_refcnt);
 		if (linesz >= bufsz) {
 			error = EOVERFLOW;
 			break;
 		}
 		cp += linesz;
 		bufsz -= linesz;
 		outsz += linesz;
 	}
 	rw_runlock(&tcp_function_lock);
 	if (error == 0)
 		error = sysctl_handle_string(oidp, buffer, outsz + 1, req);
 	free(buffer, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_available,
 	    CTLTYPE_STRING|CTLFLAG_RD,
 	    NULL, 0, sysctl_net_inet_list_available, "A",
 	    "list available TCP Function sets");
 
 /*
  * Target size of TCP PCB hash tables. Must be a power of two.
  *
  * Note that this can be overridden by the kernel environment
  * variable net.inet.tcp.tcbhashsize
  */
 #ifndef TCBHASHSIZE
 #define TCBHASHSIZE	0
 #endif
 
 /*
  * XXX
  * Callouts should be moved into struct tcp directly.  They are currently
  * separate because the tcpcb structure is exported to userland for sysctl
  * parsing purposes, which do not know about callouts.
  */
 struct tcpcb_mem {
 	struct	tcpcb		tcb;
 	struct	tcp_timer	tt;
 	struct	cc_var		ccv;
 	struct	osd		osd;
 };
 
 static VNET_DEFINE(uma_zone_t, tcpcb_zone);
 #define	V_tcpcb_zone			VNET(tcpcb_zone)
 
 MALLOC_DEFINE(M_TCPLOG, "tcplog", "TCP address and flags print buffers");
 MALLOC_DEFINE(M_TCPFUNCTIONS, "tcpfunc", "TCP function set memory");
 
 static struct mtx isn_mtx;
 
 #define	ISN_LOCK_INIT()	mtx_init(&isn_mtx, "isn_mtx", NULL, MTX_DEF)
 #define	ISN_LOCK()	mtx_lock(&isn_mtx)
 #define	ISN_UNLOCK()	mtx_unlock(&isn_mtx)
 
 /*
  * TCP initialization.
  */
 static void
 tcp_zone_change(void *tag)
 {
 
 	uma_zone_set_max(V_tcbinfo.ipi_zone, maxsockets);
 	uma_zone_set_max(V_tcpcb_zone, maxsockets);
 	tcp_tw_zone_change();
 }
 
 static int
 tcp_inpcb_init(void *mem, int size, int flags)
 {
 	struct inpcb *inp = mem;
 
 	INP_LOCK_INIT(inp, "inp", "tcpinp");
 	return (0);
 }
 
 /*
  * Take a value and get the next power of 2 that doesn't overflow.
  * Used to size the tcp_inpcb hash buckets.
  */
 static int
 maketcp_hashsize(int size)
 {
 	int hashsize;
 
 	/*
 	 * auto tune.
 	 * get the next power of 2 higher than maxsockets.
 	 */
 	hashsize = 1 << fls(size);
 	/* catch overflow, and just go one power of 2 smaller */
 	if (hashsize < size) {
 		hashsize = 1 << (fls(size) - 1);
 	}
 	return (hashsize);
 }
 
 int
 register_tcp_functions(struct tcp_function_block *blk, int wait)
 {
 	struct tcp_function_block *lblk;
 	struct tcp_function *n;
 	struct tcp_function_set fs;
 
 	if ((blk->tfb_tcp_output == NULL) ||
 	    (blk->tfb_tcp_do_segment == NULL) ||
 	    (blk->tfb_tcp_ctloutput == NULL) ||
 	    (strlen(blk->tfb_tcp_block_name) == 0)) {
 		/* 
 		 * These functions are required and you
 		 * need a name.
 		 */
 		return (EINVAL);
 	}
 	if (blk->tfb_tcp_timer_stop_all ||
 	    blk->tfb_tcp_timers_left ||
 	    blk->tfb_tcp_timer_activate ||
 	    blk->tfb_tcp_timer_active ||
 	    blk->tfb_tcp_timer_stop) {
 		/*
 		 * If you define one timer function you 
 		 * must have them all.
 		 */
 		if ((blk->tfb_tcp_timer_stop_all == NULL) ||
 		    (blk->tfb_tcp_timers_left  == NULL) ||
 		    (blk->tfb_tcp_timer_activate == NULL) ||
 		    (blk->tfb_tcp_timer_active == NULL) ||
 		    (blk->tfb_tcp_timer_stop == NULL)) {
 			return (EINVAL);			
 		}
 	}	
 	n = malloc(sizeof(struct tcp_function), M_TCPFUNCTIONS, wait);
 	if (n == NULL) {
 		return (ENOMEM);
 	}
 	n->tf_fb = blk;
 	strcpy(fs.function_set_name, blk->tfb_tcp_block_name);
 	rw_wlock(&tcp_function_lock);
 	lblk = find_tcp_functions_locked(&fs);
 	if (lblk) {
 		/* Duplicate name space not allowed */
 		rw_wunlock(&tcp_function_lock);
 		free(n, M_TCPFUNCTIONS);
 		return (EALREADY);
 	}
 	refcount_init(&blk->tfb_refcnt, 0);
 	blk->tfb_flags = 0;
 	TAILQ_INSERT_TAIL(&t_functions, n, tf_next);
 	rw_wunlock(&tcp_function_lock);
 	return(0);
 }	
 
 int
 deregister_tcp_functions(struct tcp_function_block *blk)
 {
 	struct tcp_function_block *lblk;
 	struct tcp_function *f;
 	int error=ENOENT;
 	
 	if (strcmp(blk->tfb_tcp_block_name, "default") == 0) {
 		/* You can't un-register the default */
 		return (EPERM);
 	}
 	rw_wlock(&tcp_function_lock);
 	if (blk == tcp_func_set_ptr) {
 		/* You can't free the current default */
 		rw_wunlock(&tcp_function_lock);
 		return (EBUSY);
 	}
 	if (blk->tfb_refcnt) {
 		/* Still tcb attached, mark it. */
 		blk->tfb_flags |= TCP_FUNC_BEING_REMOVED;
 		rw_wunlock(&tcp_function_lock);		
 		return (EBUSY);
 	}
 	lblk = find_tcp_fb_locked(blk, &f);
 	if (lblk) {
 		/* Found */
 		TAILQ_REMOVE(&t_functions, f, tf_next);
 		f->tf_fb = NULL;
 		free(f, M_TCPFUNCTIONS);
 		error = 0;
 	}
 	rw_wunlock(&tcp_function_lock);
 	return (error);
 }
 
 void
 tcp_init(void)
 {
 	const char *tcbhash_tuneable;
 	int hashsize;
 
 	tcbhash_tuneable = "net.inet.tcp.tcbhashsize";
 
 	if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_IN,
 	    &V_tcp_hhh[HHOOK_TCP_EST_IN], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
 		printf("%s: WARNING: unable to register helper hook\n", __func__);
 	if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT,
 	    &V_tcp_hhh[HHOOK_TCP_EST_OUT], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
 		printf("%s: WARNING: unable to register helper hook\n", __func__);
 	hashsize = TCBHASHSIZE;
 	TUNABLE_INT_FETCH(tcbhash_tuneable, &hashsize);
 	if (hashsize == 0) {
 		/*
 		 * Auto tune the hash size based on maxsockets.
 		 * A perfect hash would have a 1:1 mapping
 		 * (hashsize = maxsockets) however it's been
 		 * suggested that O(2) average is better.
 		 */
 		hashsize = maketcp_hashsize(maxsockets / 4);
 		/*
 		 * Our historical default is 512,
 		 * do not autotune lower than this.
 		 */
 		if (hashsize < 512)
 			hashsize = 512;
 		if (bootverbose && IS_DEFAULT_VNET(curvnet))
 			printf("%s: %s auto tuned to %d\n", __func__,
 			    tcbhash_tuneable, hashsize);
 	}
 	/*
 	 * We require a hashsize to be a power of two.
 	 * Previously if it was not a power of two we would just reset it
 	 * back to 512, which could be a nasty surprise if you did not notice
 	 * the error message.
 	 * Instead what we do is clip it to the closest power of two lower
 	 * than the specified hash value.
 	 */
 	if (!powerof2(hashsize)) {
 		int oldhashsize = hashsize;
 
 		hashsize = maketcp_hashsize(hashsize);
 		/* prevent absurdly low value */
 		if (hashsize < 16)
 			hashsize = 16;
 		printf("%s: WARNING: TCB hash size not a power of 2, "
 		    "clipped from %d to %d.\n", __func__, oldhashsize,
 		    hashsize);
 	}
 	in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, hashsize, hashsize,
 	    "tcp_inpcb", tcp_inpcb_init, NULL, UMA_ZONE_NOFREE,
 	    IPI_HASHFIELDS_4TUPLE);
 
 	/*
 	 * These have to be type stable for the benefit of the timers.
 	 */
 	V_tcpcb_zone = uma_zcreate("tcpcb", sizeof(struct tcpcb_mem),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	uma_zone_set_max(V_tcpcb_zone, maxsockets);
 	uma_zone_set_warning(V_tcpcb_zone, "kern.ipc.maxsockets limit reached");
 
 	tcp_tw_init();
 	syncache_init();
 	tcp_hc_init();
 
 	TUNABLE_INT_FETCH("net.inet.tcp.sack.enable", &V_tcp_do_sack);
 	V_sack_hole_zone = uma_zcreate("sackhole", sizeof(struct sackhole),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 
 	/* Skip initialization of globals for non-default instances. */
 	if (!IS_DEFAULT_VNET(curvnet))
 		return;
 
 	tcp_reass_global_init();
 
 	/* XXX virtualize those bellow? */
 	tcp_delacktime = TCPTV_DELACK;
 	tcp_keepinit = TCPTV_KEEP_INIT;
 	tcp_keepidle = TCPTV_KEEP_IDLE;
 	tcp_keepintvl = TCPTV_KEEPINTVL;
 	tcp_maxpersistidle = TCPTV_KEEP_IDLE;
 	tcp_msl = TCPTV_MSL;
 	tcp_rexmit_min = TCPTV_MIN;
 	if (tcp_rexmit_min < 1)
 		tcp_rexmit_min = 1;
 	tcp_rexmit_slop = TCPTV_CPU_VAR;
 	tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT;
 	tcp_tcbhashsize = hashsize;
 	/* Setup the tcp function block list */
 	TAILQ_INIT(&t_functions);
 	rw_init_flags(&tcp_function_lock, "tcp_func_lock" , 0);
 	register_tcp_functions(&tcp_def_funcblk, M_WAITOK);
 
 	if (tcp_soreceive_stream) {
 #ifdef INET
 		tcp_usrreqs.pru_soreceive = soreceive_stream;
 #endif
 #ifdef INET6
 		tcp6_usrreqs.pru_soreceive = soreceive_stream;
 #endif /* INET6 */
 	}
 
 #ifdef INET6
 #define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr))
 #else /* INET6 */
 #define TCP_MINPROTOHDR (sizeof(struct tcpiphdr))
 #endif /* INET6 */
 	if (max_protohdr < TCP_MINPROTOHDR)
 		max_protohdr = TCP_MINPROTOHDR;
 	if (max_linkhdr + TCP_MINPROTOHDR > MHLEN)
 		panic("tcp_init");
 #undef TCP_MINPROTOHDR
 
 	ISN_LOCK_INIT();
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, tcp_fini, NULL,
 		SHUTDOWN_PRI_DEFAULT);
 	EVENTHANDLER_REGISTER(maxsockets_change, tcp_zone_change, NULL,
 		EVENTHANDLER_PRI_ANY);
 #ifdef TCPPCAP
 	tcp_pcap_init();
 #endif
 
 #ifdef TCP_RFC7413
 	tcp_fastopen_init();
 #endif
 }
 
 #ifdef VIMAGE
 void
 tcp_destroy(void)
 {
 	int error;
 
 #ifdef TCP_RFC7413
 	tcp_fastopen_destroy();
 #endif
 	tcp_hc_destroy();
 	syncache_destroy();
 	tcp_tw_destroy();
 	in_pcbinfo_destroy(&V_tcbinfo);
 	uma_zdestroy(V_sack_hole_zone);
 	uma_zdestroy(V_tcpcb_zone);
 
 	error = hhook_head_deregister(V_tcp_hhh[HHOOK_TCP_EST_IN]);
 	if (error != 0) {
 		printf("%s: WARNING: unable to deregister helper hook "
 		    "type=%d, id=%d: error %d returned\n", __func__,
 		    HHOOK_TYPE_TCP, HHOOK_TCP_EST_IN, error);
 	}
 	error = hhook_head_deregister(V_tcp_hhh[HHOOK_TCP_EST_OUT]);
 	if (error != 0) {
 		printf("%s: WARNING: unable to deregister helper hook "
 		    "type=%d, id=%d: error %d returned\n", __func__,
 		    HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT, error);
 	}
 }
 #endif
 
 void
 tcp_fini(void *xtp)
 {
 
 }
 
 /*
  * Fill in the IP and TCP headers for an outgoing packet, given the tcpcb.
  * tcp_template used to store this data in mbufs, but we now recopy it out
  * of the tcpcb each time to conserve mbufs.
  */
 void
 tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void *tcp_ptr)
 {
 	struct tcphdr *th = (struct tcphdr *)tcp_ptr;
 
 	INP_WLOCK_ASSERT(inp);
 
 #ifdef INET6
 	if ((inp->inp_vflag & INP_IPV6) != 0) {
 		struct ip6_hdr *ip6;
 
 		ip6 = (struct ip6_hdr *)ip_ptr;
 		ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) |
 			(inp->inp_flow & IPV6_FLOWINFO_MASK);
 		ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) |
 			(IPV6_VERSION & IPV6_VERSION_MASK);
 		ip6->ip6_nxt = IPPROTO_TCP;
 		ip6->ip6_plen = htons(sizeof(struct tcphdr));
 		ip6->ip6_src = inp->in6p_laddr;
 		ip6->ip6_dst = inp->in6p_faddr;
 	}
 #endif /* INET6 */
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET
 	{
 		struct ip *ip;
 
 		ip = (struct ip *)ip_ptr;
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = 5;
 		ip->ip_tos = inp->inp_ip_tos;
 		ip->ip_len = 0;
 		ip->ip_id = 0;
 		ip->ip_off = 0;
 		ip->ip_ttl = inp->inp_ip_ttl;
 		ip->ip_sum = 0;
 		ip->ip_p = IPPROTO_TCP;
 		ip->ip_src = inp->inp_laddr;
 		ip->ip_dst = inp->inp_faddr;
 	}
 #endif /* INET */
 	th->th_sport = inp->inp_lport;
 	th->th_dport = inp->inp_fport;
 	th->th_seq = 0;
 	th->th_ack = 0;
 	th->th_x2 = 0;
 	th->th_off = 5;
 	th->th_flags = 0;
 	th->th_win = 0;
 	th->th_urp = 0;
 	th->th_sum = 0;		/* in_pseudo() is called later for ipv4 */
 }
 
 /*
  * Create template to be used to send tcp packets on a connection.
  * Allocates an mbuf and fills in a skeletal tcp/ip header.  The only
  * use for this function is in keepalives, which use tcp_respond.
  */
 struct tcptemp *
 tcpip_maketemplate(struct inpcb *inp)
 {
 	struct tcptemp *t;
 
 	t = malloc(sizeof(*t), M_TEMP, M_NOWAIT);
 	if (t == NULL)
 		return (NULL);
 	tcpip_fillheaders(inp, (void *)&t->tt_ipgen, (void *)&t->tt_t);
 	return (t);
 }
 
 /*
  * Send a single message to the TCP at address specified by
  * the given TCP/IP header.  If m == NULL, then we make a copy
  * of the tcpiphdr at th and send directly to the addressed host.
  * This is used to force keep alive messages out using the TCP
  * template for a connection.  If flags are given then we send
  * a message back to the TCP which originated the segment th,
  * and discard the mbuf containing it and any other attached mbufs.
  *
  * In any case the ack and sequence number of the transmitted
  * segment are as specified by the parameters.
  *
  * NOTE: If m != NULL, then th must point to *inside* the mbuf.
  */
 void
 tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
     tcp_seq ack, tcp_seq seq, int flags)
 {
 	int tlen;
 	int win = 0;
 	struct ip *ip;
 	struct tcphdr *nth;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 	int isipv6;
 #endif /* INET6 */
 	int ipflags = 0;
 	struct inpcb *inp;
 
 	KASSERT(tp != NULL || m != NULL, ("tcp_respond: tp and m both NULL"));
 
 #ifdef INET6
 	isipv6 = ((struct ip *)ipgen)->ip_v == (IPV6_VERSION >> 4);
 	ip6 = ipgen;
 #endif /* INET6 */
 	ip = ipgen;
 
 	if (tp != NULL) {
 		inp = tp->t_inpcb;
 		KASSERT(inp != NULL, ("tcp control block w/o inpcb"));
 		INP_WLOCK_ASSERT(inp);
 	} else
 		inp = NULL;
 
 	if (tp != NULL) {
 		if (!(flags & TH_RST)) {
 			win = sbspace(&inp->inp_socket->so_rcv);
 			if (win > (long)TCP_MAXWIN << tp->rcv_scale)
 				win = (long)TCP_MAXWIN << tp->rcv_scale;
 		}
 	}
 	if (m == NULL) {
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m == NULL)
 			return;
 		tlen = 0;
 		m->m_data += max_linkhdr;
 #ifdef INET6
 		if (isipv6) {
 			bcopy((caddr_t)ip6, mtod(m, caddr_t),
 			      sizeof(struct ip6_hdr));
 			ip6 = mtod(m, struct ip6_hdr *);
 			nth = (struct tcphdr *)(ip6 + 1);
 		} else
 #endif /* INET6 */
 		{
 			bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
 			ip = mtod(m, struct ip *);
 			nth = (struct tcphdr *)(ip + 1);
 		}
 		bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr));
 		flags = TH_ACK;
 	} else {
 		/*
 		 *  reuse the mbuf. 
 		 * XXX MRT We inherrit the FIB, which is lucky.
 		 */
 		m_freem(m->m_next);
 		m->m_next = NULL;
 		m->m_data = (caddr_t)ipgen;
 		/* m_len is set later */
 		tlen = 0;
 #define xchg(a,b,type) { type t; t=a; a=b; b=t; }
 #ifdef INET6
 		if (isipv6) {
 			xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr);
 			nth = (struct tcphdr *)(ip6 + 1);
 		} else
 #endif /* INET6 */
 		{
 			xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t);
 			nth = (struct tcphdr *)(ip + 1);
 		}
 		if (th != nth) {
 			/*
 			 * this is usually a case when an extension header
 			 * exists between the IPv6 header and the
 			 * TCP header.
 			 */
 			nth->th_sport = th->th_sport;
 			nth->th_dport = th->th_dport;
 		}
 		xchg(nth->th_dport, nth->th_sport, uint16_t);
 #undef xchg
 	}
 #ifdef INET6
 	if (isipv6) {
 		ip6->ip6_flow = 0;
 		ip6->ip6_vfc = IPV6_VERSION;
 		ip6->ip6_nxt = IPPROTO_TCP;
 		tlen += sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
 		ip6->ip6_plen = htons(tlen - sizeof(*ip6));
 	}
 #endif
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 	{
 		tlen += sizeof (struct tcpiphdr);
 		ip->ip_len = htons(tlen);
 		ip->ip_ttl = V_ip_defttl;
 		if (V_path_mtu_discovery)
 			ip->ip_off |= htons(IP_DF);
 	}
 #endif
 	m->m_len = tlen;
 	m->m_pkthdr.len = tlen;
 	m->m_pkthdr.rcvif = NULL;
 #ifdef MAC
 	if (inp != NULL) {
 		/*
 		 * Packet is associated with a socket, so allow the
 		 * label of the response to reflect the socket label.
 		 */
 		INP_WLOCK_ASSERT(inp);
 		mac_inpcb_create_mbuf(inp, m);
 	} else {
 		/*
 		 * Packet is not associated with a socket, so possibly
 		 * update the label in place.
 		 */
 		mac_netinet_tcp_reply(m);
 	}
 #endif
 	nth->th_seq = htonl(seq);
 	nth->th_ack = htonl(ack);
 	nth->th_x2 = 0;
 	nth->th_off = sizeof (struct tcphdr) >> 2;
 	nth->th_flags = flags;
 	if (tp != NULL)
 		nth->th_win = htons((u_short) (win >> tp->rcv_scale));
 	else
 		nth->th_win = htons((u_short)win);
 	nth->th_urp = 0;
 
 	m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 #ifdef INET6
 	if (isipv6) {
 		m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
 		nth->th_sum = in6_cksum_pseudo(ip6,
 		    tlen - sizeof(struct ip6_hdr), IPPROTO_TCP, 0);
 		ip6->ip6_hlim = in6_selecthlim(tp != NULL ? tp->t_inpcb :
 		    NULL, NULL);
 	}
 #endif /* INET6 */
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET
 	{
 		m->m_pkthdr.csum_flags = CSUM_TCP;
 		nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 		    htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p)));
 	}
 #endif /* INET */
 #ifdef TCPDEBUG
 	if (tp == NULL || (inp->inp_socket->so_options & SO_DEBUG))
 		tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0);
 #endif
 	TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));
 	if (flags & TH_RST)
 		TCP_PROBE5(accept__refused, NULL, NULL, mtod(m, const char *),
 		    tp, nth);
 
 	TCP_PROBE5(send, NULL, tp, mtod(m, const char *), tp, nth);
 #ifdef INET6
 	if (isipv6)
 		(void) ip6_output(m, NULL, NULL, ipflags, NULL, NULL, inp);
 #endif /* INET6 */
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 		(void) ip_output(m, NULL, NULL, ipflags, NULL, inp);
 #endif
 }
 
 /*
  * Create a new TCP control block, making an
  * empty reassembly queue and hooking it to the argument
  * protocol control block.  The `inp' parameter must have
  * come from the zone allocator set up in tcp_init().
  */
 struct tcpcb *
 tcp_newtcpcb(struct inpcb *inp)
 {
 	struct tcpcb_mem *tm;
 	struct tcpcb *tp;
 #ifdef INET6
 	int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
 #endif /* INET6 */
 
 	tm = uma_zalloc(V_tcpcb_zone, M_NOWAIT | M_ZERO);
 	if (tm == NULL)
 		return (NULL);
 	tp = &tm->tcb;
 
 	/* Initialise cc_var struct for this tcpcb. */
 	tp->ccv = &tm->ccv;
 	tp->ccv->type = IPPROTO_TCP;
 	tp->ccv->ccvc.tcp = tp;
 	rw_rlock(&tcp_function_lock);
 	tp->t_fb = tcp_func_set_ptr;
 	refcount_acquire(&tp->t_fb->tfb_refcnt);
 	rw_runlock(&tcp_function_lock);
 	if (tp->t_fb->tfb_tcp_fb_init) {
 		(*tp->t_fb->tfb_tcp_fb_init)(tp);
 	}
 	/*
 	 * Use the current system default CC algorithm.
 	 */
 	CC_LIST_RLOCK();
 	KASSERT(!STAILQ_EMPTY(&cc_list), ("cc_list is empty!"));
 	CC_ALGO(tp) = CC_DEFAULT();
 	CC_LIST_RUNLOCK();
 
 	if (CC_ALGO(tp)->cb_init != NULL)
 		if (CC_ALGO(tp)->cb_init(tp->ccv) > 0) {
 			if (tp->t_fb->tfb_tcp_fb_fini)
 				(*tp->t_fb->tfb_tcp_fb_fini)(tp);
 			refcount_release(&tp->t_fb->tfb_refcnt);
 			uma_zfree(V_tcpcb_zone, tm);
 			return (NULL);
 		}
 
 	tp->osd = &tm->osd;
 	if (khelp_init_osd(HELPER_CLASS_TCP, tp->osd)) {
 		if (tp->t_fb->tfb_tcp_fb_fini)
 			(*tp->t_fb->tfb_tcp_fb_fini)(tp);
 		refcount_release(&tp->t_fb->tfb_refcnt);
 		uma_zfree(V_tcpcb_zone, tm);
 		return (NULL);
 	}
 
 #ifdef VIMAGE
 	tp->t_vnet = inp->inp_vnet;
 #endif
 	tp->t_timers = &tm->tt;
 	/*	LIST_INIT(&tp->t_segq); */	/* XXX covered by M_ZERO */
-	tp->t_maxseg = tp->t_maxopd =
+	tp->t_maxseg =
 #ifdef INET6
 		isipv6 ? V_tcp_v6mssdflt :
 #endif /* INET6 */
 		V_tcp_mssdflt;
 
 	/* Set up our timeouts. */
 	callout_init(&tp->t_timers->tt_rexmt, 1);
 	callout_init(&tp->t_timers->tt_persist, 1);
 	callout_init(&tp->t_timers->tt_keep, 1);
 	callout_init(&tp->t_timers->tt_2msl, 1);
 	callout_init(&tp->t_timers->tt_delack, 1);
 
 	if (V_tcp_do_rfc1323)
 		tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
 	if (V_tcp_do_sack)
 		tp->t_flags |= TF_SACK_PERMIT;
 	TAILQ_INIT(&tp->snd_holes);
 	/*
 	 * The tcpcb will hold a reference on its inpcb until tcp_discardcb()
 	 * is called.
 	 */
 	in_pcbref(inp);	/* Reference for tcpcb */
 	tp->t_inpcb = inp;
 
 	/*
 	 * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
 	 * rtt estimate.  Set rttvar so that srtt + 4 * rttvar gives
 	 * reasonable initial retransmit time.
 	 */
 	tp->t_srtt = TCPTV_SRTTBASE;
 	tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
 	tp->t_rttmin = tcp_rexmit_min;
 	tp->t_rxtcur = TCPTV_RTOBASE;
 	tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 	tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 	tp->t_rcvtime = ticks;
 	/*
 	 * IPv4 TTL initialization is necessary for an IPv6 socket as well,
 	 * because the socket may be bound to an IPv6 wildcard address,
 	 * which may match an IPv4-mapped IPv6 address.
 	 */
 	inp->inp_ip_ttl = V_ip_defttl;
 	inp->inp_ppcb = tp;
 #ifdef TCPPCAP
 	/*
 	 * Init the TCP PCAP queues.
 	 */
 	tcp_pcap_tcpcb_init(tp);
 #endif
 	return (tp);		/* XXX */
 }
 
 /*
  * Switch the congestion control algorithm back to NewReno for any active
  * control blocks using an algorithm which is about to go away.
  * This ensures the CC framework can allow the unload to proceed without leaving
  * any dangling pointers which would trigger a panic.
  * Returning non-zero would inform the CC framework that something went wrong
  * and it would be unsafe to allow the unload to proceed. However, there is no
  * way for this to occur with this implementation so we always return zero.
  */
 int
 tcp_ccalgounload(struct cc_algo *unload_algo)
 {
 	struct cc_algo *tmpalgo;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	/*
 	 * Check all active control blocks across all network stacks and change
 	 * any that are using "unload_algo" back to NewReno. If "unload_algo"
 	 * requires cleanup code to be run, call it.
 	 */
 	VNET_LIST_RLOCK();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		INP_INFO_WLOCK(&V_tcbinfo);
 		/*
 		 * New connections already part way through being initialised
 		 * with the CC algo we're removing will not race with this code
 		 * because the INP_INFO_WLOCK is held during initialisation. We
 		 * therefore don't enter the loop below until the connection
 		 * list has stabilised.
 		 */
 		LIST_FOREACH(inp, &V_tcb, inp_list) {
 			INP_WLOCK(inp);
 			/* Important to skip tcptw structs. */
 			if (!(inp->inp_flags & INP_TIMEWAIT) &&
 			    (tp = intotcpcb(inp)) != NULL) {
 				/*
 				 * By holding INP_WLOCK here, we are assured
 				 * that the connection is not currently
 				 * executing inside the CC module's functions
 				 * i.e. it is safe to make the switch back to
 				 * NewReno.
 				 */
 				if (CC_ALGO(tp) == unload_algo) {
 					tmpalgo = CC_ALGO(tp);
 					/* NewReno does not require any init. */
 					CC_ALGO(tp) = &newreno_cc_algo;
 					if (tmpalgo->cb_destroy != NULL)
 						tmpalgo->cb_destroy(tp->ccv);
 				}
 			}
 			INP_WUNLOCK(inp);
 		}
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK();
 
 	return (0);
 }
 
 /*
  * Drop a TCP connection, reporting
  * the specified error.  If connection is synchronized,
  * then send a RST to peer.
  */
 struct tcpcb *
 tcp_drop(struct tcpcb *tp, int errno)
 {
 	struct socket *so = tp->t_inpcb->inp_socket;
 
 	INP_INFO_LOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	if (TCPS_HAVERCVDSYN(tp->t_state)) {
 		tcp_state_change(tp, TCPS_CLOSED);
 		(void) tp->t_fb->tfb_tcp_output(tp);
 		TCPSTAT_INC(tcps_drops);
 	} else
 		TCPSTAT_INC(tcps_conndrops);
 	if (errno == ETIMEDOUT && tp->t_softerror)
 		errno = tp->t_softerror;
 	so->so_error = errno;
 	return (tcp_close(tp));
 }
 
 void
 tcp_discardcb(struct tcpcb *tp)
 {
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so = inp->inp_socket;
 #ifdef INET6
 	int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
 #endif /* INET6 */
 	int released;
 
 	INP_WLOCK_ASSERT(inp);
 
 	/*
 	 * Make sure that all of our timers are stopped before we delete the
 	 * PCB.
 	 *
 	 * If stopping a timer fails, we schedule a discard function in same
 	 * callout, and the last discard function called will take care of
 	 * deleting the tcpcb.
 	 */
 	tcp_timer_stop(tp, TT_REXMT);
 	tcp_timer_stop(tp, TT_PERSIST);
 	tcp_timer_stop(tp, TT_KEEP);
 	tcp_timer_stop(tp, TT_2MSL);
 	tcp_timer_stop(tp, TT_DELACK);
 	if (tp->t_fb->tfb_tcp_timer_stop_all) {
 		/* Call the stop-all function of the methods */
 		tp->t_fb->tfb_tcp_timer_stop_all(tp);
 	}
 
 	/*
 	 * If we got enough samples through the srtt filter,
 	 * save the rtt and rttvar in the routing entry.
 	 * 'Enough' is arbitrarily defined as 4 rtt samples.
 	 * 4 samples is enough for the srtt filter to converge
 	 * to within enough % of the correct value; fewer samples
 	 * and we could save a bogus rtt. The danger is not high
 	 * as tcp quickly recovers from everything.
 	 * XXX: Works very well but needs some more statistics!
 	 */
 	if (tp->t_rttupdated >= 4) {
 		struct hc_metrics_lite metrics;
 		u_long ssthresh;
 
 		bzero(&metrics, sizeof(metrics));
 		/*
 		 * Update the ssthresh always when the conditions below
 		 * are satisfied. This gives us better new start value
 		 * for the congestion avoidance for new connections.
 		 * ssthresh is only set if packet loss occured on a session.
 		 *
 		 * XXXRW: 'so' may be NULL here, and/or socket buffer may be
 		 * being torn down.  Ideally this code would not use 'so'.
 		 */
 		ssthresh = tp->snd_ssthresh;
 		if (ssthresh != 0 && ssthresh < so->so_snd.sb_hiwat / 2) {
 			/*
 			 * convert the limit from user data bytes to
 			 * packets then to packet data bytes.
 			 */
 			ssthresh = (ssthresh + tp->t_maxseg / 2) / tp->t_maxseg;
 			if (ssthresh < 2)
 				ssthresh = 2;
 			ssthresh *= (u_long)(tp->t_maxseg +
 #ifdef INET6
 			    (isipv6 ? sizeof (struct ip6_hdr) +
 				sizeof (struct tcphdr) :
 #endif
 				sizeof (struct tcpiphdr)
 #ifdef INET6
 			    )
 #endif
 			    );
 		} else
 			ssthresh = 0;
 		metrics.rmx_ssthresh = ssthresh;
 
 		metrics.rmx_rtt = tp->t_srtt;
 		metrics.rmx_rttvar = tp->t_rttvar;
 		metrics.rmx_cwnd = tp->snd_cwnd;
 		metrics.rmx_sendpipe = 0;
 		metrics.rmx_recvpipe = 0;
 
 		tcp_hc_update(&inp->inp_inc, &metrics);
 	}
 
 	/* free the reassembly queue, if any */
 	tcp_reass_flush(tp);
 
 #ifdef TCP_OFFLOAD
 	/* Disconnect offload device, if any. */
 	if (tp->t_flags & TF_TOE)
 		tcp_offload_detach(tp);
 #endif
 		
 	tcp_free_sackholes(tp);
 
 #ifdef TCPPCAP
 	/* Free the TCP PCAP queues. */
 	tcp_pcap_drain(&(tp->t_inpkts));
 	tcp_pcap_drain(&(tp->t_outpkts));
 #endif
 
 	/* Allow the CC algorithm to clean up after itself. */
 	if (CC_ALGO(tp)->cb_destroy != NULL)
 		CC_ALGO(tp)->cb_destroy(tp->ccv);
 
 	khelp_destroy_osd(tp->osd);
 
 	CC_ALGO(tp) = NULL;
 	inp->inp_ppcb = NULL;
 	if ((tp->t_timers->tt_flags & TT_MASK) == 0) {
 		/* We own the last reference on tcpcb, let's free it. */
 		if ((tp->t_fb->tfb_tcp_timers_left) &&
 		    (tp->t_fb->tfb_tcp_timers_left(tp))) {
 			    /* Some fb timers left running! */
 			    return;
 		}
 		if (tp->t_fb->tfb_tcp_fb_fini)
 			(*tp->t_fb->tfb_tcp_fb_fini)(tp);
 		refcount_release(&tp->t_fb->tfb_refcnt);
 		tp->t_inpcb = NULL;
 		uma_zfree(V_tcpcb_zone, tp);
 		released = in_pcbrele_wlocked(inp);
 		KASSERT(!released, ("%s: inp %p should not have been released "
 			"here", __func__, inp));
 	}
 }
 
 void
 tcp_timer_2msl_discard(void *xtp)
 {
 
 	tcp_timer_discard((struct tcpcb *)xtp, TT_2MSL);
 }
 
 void
 tcp_timer_keep_discard(void *xtp)
 {
 
 	tcp_timer_discard((struct tcpcb *)xtp, TT_KEEP);
 }
 
 void
 tcp_timer_persist_discard(void *xtp)
 {
 
 	tcp_timer_discard((struct tcpcb *)xtp, TT_PERSIST);
 }
 
 void
 tcp_timer_rexmt_discard(void *xtp)
 {
 
 	tcp_timer_discard((struct tcpcb *)xtp, TT_REXMT);
 }
 
 void
 tcp_timer_delack_discard(void *xtp)
 {
 
 	tcp_timer_discard((struct tcpcb *)xtp, TT_DELACK);
 }
 
 void
 tcp_timer_discard(struct tcpcb *tp, uint32_t timer_type)
 {
 	struct inpcb *inp;
 
 	CURVNET_SET(tp->t_vnet);
 	INP_INFO_RLOCK(&V_tcbinfo);
 	inp = tp->t_inpcb;
 	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL",
 		__func__, tp));
 	INP_WLOCK(inp);
 	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) != 0,
 		("%s: tcpcb has to be stopped here", __func__));
 	KASSERT((tp->t_timers->tt_flags & timer_type) != 0,
 		("%s: discard callout should be running", __func__));
 	tp->t_timers->tt_flags &= ~timer_type;
 	if ((tp->t_timers->tt_flags & TT_MASK) == 0) {
 		/* We own the last reference on this tcpcb, let's free it. */
 		if ((tp->t_fb->tfb_tcp_timers_left) &&
 		    (tp->t_fb->tfb_tcp_timers_left(tp))) {
 			    /* Some fb timers left running! */
 			    goto leave;
 		}
 		if (tp->t_fb->tfb_tcp_fb_fini)
 			(*tp->t_fb->tfb_tcp_fb_fini)(tp);
 		refcount_release(&tp->t_fb->tfb_refcnt);
 		tp->t_inpcb = NULL;
 		uma_zfree(V_tcpcb_zone, tp);
 		if (in_pcbrele_wlocked(inp)) {
 			INP_INFO_RUNLOCK(&V_tcbinfo);
 			CURVNET_RESTORE();
 			return;
 		}
 	}
 leave:
 	INP_WUNLOCK(inp);
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 	CURVNET_RESTORE();
 }
 
 /*
  * Attempt to close a TCP control block, marking it as dropped, and freeing
  * the socket if we hold the only reference.
  */
 struct tcpcb *
 tcp_close(struct tcpcb *tp)
 {
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so;
 
 	INP_INFO_LOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 #ifdef TCP_OFFLOAD
 	if (tp->t_state == TCPS_LISTEN)
 		tcp_offload_listen_stop(tp);
 #endif
 #ifdef TCP_RFC7413
 	/*
 	 * This releases the TFO pending counter resource for TFO listen
 	 * sockets as well as passively-created TFO sockets that transition
 	 * from SYN_RECEIVED to CLOSED.
 	 */
 	if (tp->t_tfo_pending) {
 		tcp_fastopen_decrement_counter(tp->t_tfo_pending);
 		tp->t_tfo_pending = NULL;
 	}
 #endif
 	in_pcbdrop(inp);
 	TCPSTAT_INC(tcps_closed);
 	KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL"));
 	so = inp->inp_socket;
 	soisdisconnected(so);
 	if (inp->inp_flags & INP_SOCKREF) {
 		KASSERT(so->so_state & SS_PROTOREF,
 		    ("tcp_close: !SS_PROTOREF"));
 		inp->inp_flags &= ~INP_SOCKREF;
 		INP_WUNLOCK(inp);
 		ACCEPT_LOCK();
 		SOCK_LOCK(so);
 		so->so_state &= ~SS_PROTOREF;
 		sofree(so);
 		return (NULL);
 	}
 	return (tp);
 }
 
 void
 tcp_drain(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	if (!do_tcpdrain)
 		return;
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		struct inpcb *inpb;
 		struct tcpcb *tcpb;
 
 	/*
 	 * Walk the tcpbs, if existing, and flush the reassembly queue,
 	 * if there is one...
 	 * XXX: The "Net/3" implementation doesn't imply that the TCP
 	 *      reassembly queue should be flushed, but in a situation
 	 *	where we're really low on mbufs, this is potentially
 	 *	useful.
 	 */
 		INP_INFO_WLOCK(&V_tcbinfo);
 		LIST_FOREACH(inpb, V_tcbinfo.ipi_listhead, inp_list) {
 			if (inpb->inp_flags & INP_TIMEWAIT)
 				continue;
 			INP_WLOCK(inpb);
 			if ((tcpb = intotcpcb(inpb)) != NULL) {
 				tcp_reass_flush(tcpb);
 				tcp_clean_sackreport(tcpb);
 			}
 			INP_WUNLOCK(inpb);
 		}
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
 /*
  * Notify a tcp user of an asynchronous error;
  * store error as soft error, but wake up user
  * (for now, won't do anything until can select for soft error).
  *
  * Do not wake up user since there currently is no mechanism for
  * reporting soft errors (yet - a kqueue filter may be added).
  */
 static struct inpcb *
 tcp_notify(struct inpcb *inp, int error)
 {
 	struct tcpcb *tp;
 
 	INP_INFO_LOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	if ((inp->inp_flags & INP_TIMEWAIT) ||
 	    (inp->inp_flags & INP_DROPPED))
 		return (inp);
 
 	tp = intotcpcb(inp);
 	KASSERT(tp != NULL, ("tcp_notify: tp == NULL"));
 
 	/*
 	 * Ignore some errors if we are hooked up.
 	 * If connection hasn't completed, has retransmitted several times,
 	 * and receives a second error, give up now.  This is better
 	 * than waiting a long time to establish a connection that
 	 * can never complete.
 	 */
 	if (tp->t_state == TCPS_ESTABLISHED &&
 	    (error == EHOSTUNREACH || error == ENETUNREACH ||
 	     error == EHOSTDOWN)) {
 		return (inp);
 	} else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
 	    tp->t_softerror) {
 		tp = tcp_drop(tp, error);
 		if (tp != NULL)
 			return (inp);
 		else
 			return (NULL);
 	} else {
 		tp->t_softerror = error;
 		return (inp);
 	}
 #if 0
 	wakeup( &so->so_timeo);
 	sorwakeup(so);
 	sowwakeup(so);
 #endif
 }
 
 static int
 tcp_pcblist(SYSCTL_HANDLER_ARGS)
 {
 	int error, i, m, n, pcb_count;
 	struct inpcb *inp, **inp_list;
 	inp_gen_t gencnt;
 	struct xinpgen xig;
 
 	/*
 	 * The process of preparing the TCB list is too time-consuming and
 	 * resource-intensive to repeat twice on every request.
 	 */
 	if (req->oldptr == NULL) {
 		n = V_tcbinfo.ipi_count + syncache_pcbcount();
 		n += imax(n / 8, 10);
 		req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xtcpcb);
 		return (0);
 	}
 
 	if (req->newptr != NULL)
 		return (EPERM);
 
 	/*
 	 * OK, now we're committed to doing something.
 	 */
 	INP_LIST_RLOCK(&V_tcbinfo);
 	gencnt = V_tcbinfo.ipi_gencnt;
 	n = V_tcbinfo.ipi_count;
 	INP_LIST_RUNLOCK(&V_tcbinfo);
 
 	m = syncache_pcbcount();
 
 	error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
 		+ (n + m) * sizeof(struct xtcpcb));
 	if (error != 0)
 		return (error);
 
 	xig.xig_len = sizeof xig;
 	xig.xig_count = n + m;
 	xig.xig_gen = gencnt;
 	xig.xig_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, &xig, sizeof xig);
 	if (error)
 		return (error);
 
 	error = syncache_pcblist(req, m, &pcb_count);
 	if (error)
 		return (error);
 
 	inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
 	if (inp_list == NULL)
 		return (ENOMEM);
 
 	INP_INFO_WLOCK(&V_tcbinfo);
 	for (inp = LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0;
 	    inp != NULL && i < n; inp = LIST_NEXT(inp, inp_list)) {
 		INP_WLOCK(inp);
 		if (inp->inp_gencnt <= gencnt) {
 			/*
 			 * XXX: This use of cr_cansee(), introduced with
 			 * TCP state changes, is not quite right, but for
 			 * now, better than nothing.
 			 */
 			if (inp->inp_flags & INP_TIMEWAIT) {
 				if (intotw(inp) != NULL)
 					error = cr_cansee(req->td->td_ucred,
 					    intotw(inp)->tw_cred);
 				else
 					error = EINVAL;	/* Skip this inp. */
 			} else
 				error = cr_canseeinpcb(req->td->td_ucred, inp);
 			if (error == 0) {
 				in_pcbref(inp);
 				inp_list[i++] = inp;
 			}
 		}
 		INP_WUNLOCK(inp);
 	}
 	INP_INFO_WUNLOCK(&V_tcbinfo);
 	n = i;
 
 	error = 0;
 	for (i = 0; i < n; i++) {
 		inp = inp_list[i];
 		INP_RLOCK(inp);
 		if (inp->inp_gencnt <= gencnt) {
 			struct xtcpcb xt;
 			void *inp_ppcb;
 
 			bzero(&xt, sizeof(xt));
 			xt.xt_len = sizeof xt;
 			/* XXX should avoid extra copy */
 			bcopy(inp, &xt.xt_inp, sizeof *inp);
 			inp_ppcb = inp->inp_ppcb;
 			if (inp_ppcb == NULL)
 				bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
 			else if (inp->inp_flags & INP_TIMEWAIT) {
 				bzero((char *) &xt.xt_tp, sizeof xt.xt_tp);
 				xt.xt_tp.t_state = TCPS_TIME_WAIT;
 			} else {
 				bcopy(inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp);
 				if (xt.xt_tp.t_timers)
 					tcp_timer_to_xtimer(&xt.xt_tp, xt.xt_tp.t_timers, &xt.xt_timer);
 			}
 			if (inp->inp_socket != NULL)
 				sotoxsocket(inp->inp_socket, &xt.xt_socket);
 			else {
 				bzero(&xt.xt_socket, sizeof xt.xt_socket);
 				xt.xt_socket.xso_protocol = IPPROTO_TCP;
 			}
 			xt.xt_inp.inp_gencnt = inp->inp_gencnt;
 			INP_RUNLOCK(inp);
 			error = SYSCTL_OUT(req, &xt, sizeof xt);
 		} else
 			INP_RUNLOCK(inp);
 	}
 	INP_INFO_RLOCK(&V_tcbinfo);
 	for (i = 0; i < n; i++) {
 		inp = inp_list[i];
 		INP_RLOCK(inp);
 		if (!in_pcbrele_rlocked(inp))
 			INP_RUNLOCK(inp);
 	}
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 
 	if (!error) {
 		/*
 		 * Give the user an updated idea of our state.
 		 * If the generation differs from what we told
 		 * her before, she knows that something happened
 		 * while we were processing this request, and it
 		 * might be necessary to retry.
 		 */
 		INP_LIST_RLOCK(&V_tcbinfo);
 		xig.xig_gen = V_tcbinfo.ipi_gencnt;
 		xig.xig_sogen = so_gencnt;
 		xig.xig_count = V_tcbinfo.ipi_count + pcb_count;
 		INP_LIST_RUNLOCK(&V_tcbinfo);
 		error = SYSCTL_OUT(req, &xig, sizeof xig);
 	}
 	free(inp_list, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0,
     tcp_pcblist, "S,xtcpcb", "List of active TCP connections");
 
 #ifdef INET
 static int
 tcp_getcred(SYSCTL_HANDLER_ARGS)
 {
 	struct xucred xuc;
 	struct sockaddr_in addrs[2];
 	struct inpcb *inp;
 	int error;
 
 	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 	if (error)
 		return (error);
 	error = SYSCTL_IN(req, addrs, sizeof(addrs));
 	if (error)
 		return (error);
 	inp = in_pcblookup(&V_tcbinfo, addrs[1].sin_addr, addrs[1].sin_port,
 	    addrs[0].sin_addr, addrs[0].sin_port, INPLOOKUP_RLOCKPCB, NULL);
 	if (inp != NULL) {
 		if (inp->inp_socket == NULL)
 			error = ENOENT;
 		if (error == 0)
 			error = cr_canseeinpcb(req->td->td_ucred, inp);
 		if (error == 0)
 			cru2x(inp->inp_cred, &xuc);
 		INP_RUNLOCK(inp);
 	} else
 		error = ENOENT;
 	if (error == 0)
 		error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, getcred,
     CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
     tcp_getcred, "S,xucred", "Get the xucred of a TCP connection");
 #endif /* INET */
 
 #ifdef INET6
 static int
 tcp6_getcred(SYSCTL_HANDLER_ARGS)
 {
 	struct xucred xuc;
 	struct sockaddr_in6 addrs[2];
 	struct inpcb *inp;
 	int error;
 #ifdef INET
 	int mapped = 0;
 #endif
 
 	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 	if (error)
 		return (error);
 	error = SYSCTL_IN(req, addrs, sizeof(addrs));
 	if (error)
 		return (error);
 	if ((error = sa6_embedscope(&addrs[0], V_ip6_use_defzone)) != 0 ||
 	    (error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) {
 		return (error);
 	}
 	if (IN6_IS_ADDR_V4MAPPED(&addrs[0].sin6_addr)) {
 #ifdef INET
 		if (IN6_IS_ADDR_V4MAPPED(&addrs[1].sin6_addr))
 			mapped = 1;
 		else
 #endif
 			return (EINVAL);
 	}
 
 #ifdef INET
 	if (mapped == 1)
 		inp = in_pcblookup(&V_tcbinfo,
 			*(struct in_addr *)&addrs[1].sin6_addr.s6_addr[12],
 			addrs[1].sin6_port,
 			*(struct in_addr *)&addrs[0].sin6_addr.s6_addr[12],
 			addrs[0].sin6_port, INPLOOKUP_RLOCKPCB, NULL);
 	else
 #endif
 		inp = in6_pcblookup(&V_tcbinfo,
 			&addrs[1].sin6_addr, addrs[1].sin6_port,
 			&addrs[0].sin6_addr, addrs[0].sin6_port,
 			INPLOOKUP_RLOCKPCB, NULL);
 	if (inp != NULL) {
 		if (inp->inp_socket == NULL)
 			error = ENOENT;
 		if (error == 0)
 			error = cr_canseeinpcb(req->td->td_ucred, inp);
 		if (error == 0)
 			cru2x(inp->inp_cred, &xuc);
 		INP_RUNLOCK(inp);
 	} else
 		error = ENOENT;
 	if (error == 0)
 		error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet6_tcp6, OID_AUTO, getcred,
     CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
     tcp6_getcred, "S,xucred", "Get the xucred of a TCP6 connection");
 #endif /* INET6 */
 
 
 #ifdef INET
 void
 tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 {
 	struct ip *ip = vip;
 	struct tcphdr *th;
 	struct in_addr faddr;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify;
 	struct icmp *icp;
 	struct in_conninfo inc;
 	tcp_seq icmp_tcp_seq;
 	int mtu;
 
 	faddr = ((struct sockaddr_in *)sa)->sin_addr;
 	if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
 		return;
 
 	if (cmd == PRC_MSGSIZE)
 		notify = tcp_mtudisc_notify;
 	else if (V_icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB ||
 		cmd == PRC_UNREACH_PORT || cmd == PRC_TIMXCEED_INTRANS) && ip)
 		notify = tcp_drop_syn_sent;
 	/*
 	 * Redirects don't need to be handled up here.
 	 */
 	else if (PRC_IS_REDIRECT(cmd))
 		return;
 	/*
 	 * Hostdead is ugly because it goes linearly through all PCBs.
 	 * XXX: We never get this from ICMP, otherwise it makes an
 	 * excellent DoS attack on machines with many connections.
 	 */
 	else if (cmd == PRC_HOSTDEAD)
 		ip = NULL;
 	else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
 		return;
 
 	if (ip == NULL) {
 		in_pcbnotifyall(&V_tcbinfo, faddr, inetctlerrmap[cmd], notify);
 		return;
 	}
 
 	icp = (struct icmp *)((caddr_t)ip - offsetof(struct icmp, icmp_ip));
 	th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
 	INP_INFO_RLOCK(&V_tcbinfo);
 	inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport, ip->ip_src,
 	    th->th_sport, INPLOOKUP_WLOCKPCB, NULL);
 	if (inp != NULL)  {
 		if (!(inp->inp_flags & INP_TIMEWAIT) &&
 		    !(inp->inp_flags & INP_DROPPED) &&
 		    !(inp->inp_socket == NULL)) {
 			icmp_tcp_seq = ntohl(th->th_seq);
 			tp = intotcpcb(inp);
 			if (SEQ_GEQ(icmp_tcp_seq, tp->snd_una) &&
 			    SEQ_LT(icmp_tcp_seq, tp->snd_max)) {
 				if (cmd == PRC_MSGSIZE) {
 					/*
 					 * MTU discovery:
 					 * If we got a needfrag set the MTU
 					 * in the route to the suggested new
 					 * value (if given) and then notify.
 					 */
 				    	mtu = ntohs(icp->icmp_nextmtu);
 					/*
 					 * If no alternative MTU was
 					 * proposed, try the next smaller
 					 * one.
 					 */
 					if (!mtu)
 						mtu = ip_next_mtu(
 						    ntohs(ip->ip_len), 1);
 					if (mtu < V_tcp_minmss +
 					    sizeof(struct tcpiphdr))
 						mtu = V_tcp_minmss +
 						    sizeof(struct tcpiphdr);
 					/*
 					 * Only process the offered MTU if it
 					 * is smaller than the current one.
 					 */
-					if (mtu < tp->t_maxopd +
+					if (mtu < tp->t_maxseg +
 					    sizeof(struct tcpiphdr)) {
 						bzero(&inc, sizeof(inc));
 						inc.inc_faddr = faddr;
 						inc.inc_fibnum =
 						    inp->inp_inc.inc_fibnum;
 						tcp_hc_updatemtu(&inc, mtu);
 						tcp_mtudisc(inp, mtu);
 					}
 				} else
 					inp = (*notify)(inp,
 					    inetctlerrmap[cmd]);
 			}
 		}
 		if (inp != NULL)
 			INP_WUNLOCK(inp);
 	} else {
 		bzero(&inc, sizeof(inc));
 		inc.inc_fport = th->th_dport;
 		inc.inc_lport = th->th_sport;
 		inc.inc_faddr = faddr;
 		inc.inc_laddr = ip->ip_src;
 		syncache_unreach(&inc, th);
 	}
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 }
 #endif /* INET */
 
 #ifdef INET6
 void
 tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
 {
 	struct tcphdr th;
 	struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify;
 	struct ip6_hdr *ip6;
 	struct mbuf *m;
 	struct ip6ctlparam *ip6cp = NULL;
 	const struct sockaddr_in6 *sa6_src = NULL;
 	int off;
 	struct tcp_portonly {
 		u_int16_t th_sport;
 		u_int16_t th_dport;
 	} *thp;
 
 	if (sa->sa_family != AF_INET6 ||
 	    sa->sa_len != sizeof(struct sockaddr_in6))
 		return;
 
 	if (cmd == PRC_MSGSIZE)
 		notify = tcp_mtudisc_notify;
 	else if (!PRC_IS_REDIRECT(cmd) &&
 		 ((unsigned)cmd >= PRC_NCMDS || inet6ctlerrmap[cmd] == 0))
 		return;
 
 	/* if the parameter is from icmp6, decode it. */
 	if (d != NULL) {
 		ip6cp = (struct ip6ctlparam *)d;
 		m = ip6cp->ip6c_m;
 		ip6 = ip6cp->ip6c_ip6;
 		off = ip6cp->ip6c_off;
 		sa6_src = ip6cp->ip6c_src;
 	} else {
 		m = NULL;
 		ip6 = NULL;
 		off = 0;	/* fool gcc */
 		sa6_src = &sa6_any;
 	}
 
 	if (ip6 != NULL) {
 		struct in_conninfo inc;
 		/*
 		 * XXX: We assume that when IPV6 is non NULL,
 		 * M and OFF are valid.
 		 */
 
 		/* check if we can safely examine src and dst ports */
 		if (m->m_pkthdr.len < off + sizeof(*thp))
 			return;
 
 		bzero(&th, sizeof(th));
 		m_copydata(m, off, sizeof(*thp), (caddr_t)&th);
 
 		in6_pcbnotify(&V_tcbinfo, sa, th.th_dport,
 		    (struct sockaddr *)ip6cp->ip6c_src,
 		    th.th_sport, cmd, NULL, notify);
 
 		bzero(&inc, sizeof(inc));
 		inc.inc_fport = th.th_dport;
 		inc.inc_lport = th.th_sport;
 		inc.inc6_faddr = ((struct sockaddr_in6 *)sa)->sin6_addr;
 		inc.inc6_laddr = ip6cp->ip6c_src->sin6_addr;
 		inc.inc_flags |= INC_ISIPV6;
 		INP_INFO_RLOCK(&V_tcbinfo);
 		syncache_unreach(&inc, &th);
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 	} else
 		in6_pcbnotify(&V_tcbinfo, sa, 0, (const struct sockaddr *)sa6_src,
 			      0, cmd, NULL, notify);
 }
 #endif /* INET6 */
 
 
 /*
  * Following is where TCP initial sequence number generation occurs.
  *
  * There are two places where we must use initial sequence numbers:
  * 1.  In SYN-ACK packets.
  * 2.  In SYN packets.
  *
  * All ISNs for SYN-ACK packets are generated by the syncache.  See
  * tcp_syncache.c for details.
  *
  * The ISNs in SYN packets must be monotonic; TIME_WAIT recycling
  * depends on this property.  In addition, these ISNs should be
  * unguessable so as to prevent connection hijacking.  To satisfy
  * the requirements of this situation, the algorithm outlined in
  * RFC 1948 is used, with only small modifications.
  *
  * Implementation details:
  *
  * Time is based off the system timer, and is corrected so that it
  * increases by one megabyte per second.  This allows for proper
  * recycling on high speed LANs while still leaving over an hour
  * before rollover.
  *
  * As reading the *exact* system time is too expensive to be done
  * whenever setting up a TCP connection, we increment the time
  * offset in two ways.  First, a small random positive increment
  * is added to isn_offset for each connection that is set up.
  * Second, the function tcp_isn_tick fires once per clock tick
  * and increments isn_offset as necessary so that sequence numbers
  * are incremented at approximately ISN_BYTES_PER_SECOND.  The
  * random positive increments serve only to ensure that the same
  * exact sequence number is never sent out twice (as could otherwise
  * happen when a port is recycled in less than the system tick
  * interval.)
  *
  * net.inet.tcp.isn_reseed_interval controls the number of seconds
  * between seeding of isn_secret.  This is normally set to zero,
  * as reseeding should not be necessary.
  *
  * Locking of the global variables isn_secret, isn_last_reseed, isn_offset,
  * isn_offset_old, and isn_ctx is performed using the TCP pcbinfo lock.  In
  * general, this means holding an exclusive (write) lock.
  */
 
 #define ISN_BYTES_PER_SECOND 1048576
 #define ISN_STATIC_INCREMENT 4096
 #define ISN_RANDOM_INCREMENT (4096 - 1)
 
 static VNET_DEFINE(u_char, isn_secret[32]);
 static VNET_DEFINE(int, isn_last);
 static VNET_DEFINE(int, isn_last_reseed);
 static VNET_DEFINE(u_int32_t, isn_offset);
 static VNET_DEFINE(u_int32_t, isn_offset_old);
 
 #define	V_isn_secret			VNET(isn_secret)
 #define	V_isn_last			VNET(isn_last)
 #define	V_isn_last_reseed		VNET(isn_last_reseed)
 #define	V_isn_offset			VNET(isn_offset)
 #define	V_isn_offset_old		VNET(isn_offset_old)
 
 tcp_seq
 tcp_new_isn(struct tcpcb *tp)
 {
 	MD5_CTX isn_ctx;
 	u_int32_t md5_buffer[4];
 	tcp_seq new_isn;
 	u_int32_t projected_offset;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	ISN_LOCK();
 	/* Seed if this is the first use, reseed if requested. */
 	if ((V_isn_last_reseed == 0) || ((V_tcp_isn_reseed_interval > 0) &&
 	     (((u_int)V_isn_last_reseed + (u_int)V_tcp_isn_reseed_interval*hz)
 		< (u_int)ticks))) {
 		read_random(&V_isn_secret, sizeof(V_isn_secret));
 		V_isn_last_reseed = ticks;
 	}
 
 	/* Compute the md5 hash and return the ISN. */
 	MD5Init(&isn_ctx);
 	MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_fport, sizeof(u_short));
 	MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_lport, sizeof(u_short));
 #ifdef INET6
 	if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) {
 		MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_faddr,
 			  sizeof(struct in6_addr));
 		MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_laddr,
 			  sizeof(struct in6_addr));
 	} else
 #endif
 	{
 		MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_faddr,
 			  sizeof(struct in_addr));
 		MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_laddr,
 			  sizeof(struct in_addr));
 	}
 	MD5Update(&isn_ctx, (u_char *) &V_isn_secret, sizeof(V_isn_secret));
 	MD5Final((u_char *) &md5_buffer, &isn_ctx);
 	new_isn = (tcp_seq) md5_buffer[0];
 	V_isn_offset += ISN_STATIC_INCREMENT +
 		(arc4random() & ISN_RANDOM_INCREMENT);
 	if (ticks != V_isn_last) {
 		projected_offset = V_isn_offset_old +
 		    ISN_BYTES_PER_SECOND / hz * (ticks - V_isn_last);
 		if (SEQ_GT(projected_offset, V_isn_offset))
 			V_isn_offset = projected_offset;
 		V_isn_offset_old = V_isn_offset;
 		V_isn_last = ticks;
 	}
 	new_isn += V_isn_offset;
 	ISN_UNLOCK();
 	return (new_isn);
 }
 
 /*
  * When a specific ICMP unreachable message is received and the
  * connection state is SYN-SENT, drop the connection.  This behavior
  * is controlled by the icmp_may_rst sysctl.
  */
 struct inpcb *
 tcp_drop_syn_sent(struct inpcb *inp, int errno)
 {
 	struct tcpcb *tp;
 
 	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	if ((inp->inp_flags & INP_TIMEWAIT) ||
 	    (inp->inp_flags & INP_DROPPED))
 		return (inp);
 
 	tp = intotcpcb(inp);
 	if (tp->t_state != TCPS_SYN_SENT)
 		return (inp);
 
 	tp = tcp_drop(tp, errno);
 	if (tp != NULL)
 		return (inp);
 	else
 		return (NULL);
 }
 
 /*
  * When `need fragmentation' ICMP is received, update our idea of the MSS
  * based on the new value. Also nudge TCP to send something, since we
  * know the packet we just sent was dropped.
  * This duplicates some code in the tcp_mss() function in tcp_input.c.
  */
 static struct inpcb *
 tcp_mtudisc_notify(struct inpcb *inp, int error)
 {
 
 	tcp_mtudisc(inp, -1);
 	return (inp);
 }
 
 static void
 tcp_mtudisc(struct inpcb *inp, int mtuoffer)
 {
 	struct tcpcb *tp;
 	struct socket *so;
 
 	INP_WLOCK_ASSERT(inp);
 	if ((inp->inp_flags & INP_TIMEWAIT) ||
 	    (inp->inp_flags & INP_DROPPED))
 		return;
 
 	tp = intotcpcb(inp);
 	KASSERT(tp != NULL, ("tcp_mtudisc: tp == NULL"));
 
 	tcp_mss_update(tp, -1, mtuoffer, NULL, NULL);
   
 	so = inp->inp_socket;
 	SOCKBUF_LOCK(&so->so_snd);
 	/* If the mss is larger than the socket buffer, decrease the mss. */
 	if (so->so_snd.sb_hiwat < tp->t_maxseg)
 		tp->t_maxseg = so->so_snd.sb_hiwat;
 	SOCKBUF_UNLOCK(&so->so_snd);
 
 	TCPSTAT_INC(tcps_mturesent);
 	tp->t_rtttime = 0;
 	tp->snd_nxt = tp->snd_una;
 	tcp_free_sackholes(tp);
 	tp->snd_recover = tp->snd_max;
 	if (tp->t_flags & TF_SACK_PERMIT)
 		EXIT_FASTRECOVERY(tp->t_flags);
 	tp->t_fb->tfb_tcp_output(tp);
 }
 
 #ifdef INET
 /*
  * Look-up the routing entry to the peer of this inpcb.  If no route
  * is found and it cannot be allocated, then return 0.  This routine
  * is called by TCP routines that access the rmx structure and by
  * tcp_mss_update to get the peer/interface MTU.
  */
 u_long
 tcp_maxmtu(struct in_conninfo *inc, struct tcp_ifcap *cap)
 {
 	struct route sro;
 	struct sockaddr_in *dst;
 	struct ifnet *ifp;
 	u_long maxmtu = 0;
 
 	KASSERT(inc != NULL, ("tcp_maxmtu with NULL in_conninfo pointer"));
 
 	bzero(&sro, sizeof(sro));
 	if (inc->inc_faddr.s_addr != INADDR_ANY) {
 	        dst = (struct sockaddr_in *)&sro.ro_dst;
 		dst->sin_family = AF_INET;
 		dst->sin_len = sizeof(*dst);
 		dst->sin_addr = inc->inc_faddr;
 		in_rtalloc_ign(&sro, 0, inc->inc_fibnum);
 	}
 	if (sro.ro_rt != NULL) {
 		ifp = sro.ro_rt->rt_ifp;
 		if (sro.ro_rt->rt_mtu == 0)
 			maxmtu = ifp->if_mtu;
 		else
 			maxmtu = min(sro.ro_rt->rt_mtu, ifp->if_mtu);
 
 		/* Report additional interface capabilities. */
 		if (cap != NULL) {
 			if (ifp->if_capenable & IFCAP_TSO4 &&
 			    ifp->if_hwassist & CSUM_TSO) {
 				cap->ifcap |= CSUM_TSO;
 				cap->tsomax = ifp->if_hw_tsomax;
 				cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
 				cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
 			}
 		}
 		RTFREE(sro.ro_rt);
 	}
 	return (maxmtu);
 }
 #endif /* INET */
 
 #ifdef INET6
 u_long
 tcp_maxmtu6(struct in_conninfo *inc, struct tcp_ifcap *cap)
 {
 	struct route_in6 sro6;
 	struct ifnet *ifp;
 	u_long maxmtu = 0;
 
 	KASSERT(inc != NULL, ("tcp_maxmtu6 with NULL in_conninfo pointer"));
 
 	bzero(&sro6, sizeof(sro6));
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inc->inc6_faddr)) {
 		sro6.ro_dst.sin6_family = AF_INET6;
 		sro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6);
 		sro6.ro_dst.sin6_addr = inc->inc6_faddr;
 		in6_rtalloc_ign(&sro6, 0, inc->inc_fibnum);
 	}
 	if (sro6.ro_rt != NULL) {
 		ifp = sro6.ro_rt->rt_ifp;
 		if (sro6.ro_rt->rt_mtu == 0)
 			maxmtu = IN6_LINKMTU(sro6.ro_rt->rt_ifp);
 		else
 			maxmtu = min(sro6.ro_rt->rt_mtu,
 				     IN6_LINKMTU(sro6.ro_rt->rt_ifp));
 
 		/* Report additional interface capabilities. */
 		if (cap != NULL) {
 			if (ifp->if_capenable & IFCAP_TSO6 &&
 			    ifp->if_hwassist & CSUM_TSO) {
 				cap->ifcap |= CSUM_TSO;
 				cap->tsomax = ifp->if_hw_tsomax;
 				cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
 				cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
 			}
 		}
 		RTFREE(sro6.ro_rt);
 	}
 
 	return (maxmtu);
 }
 #endif /* INET6 */
+
+/*
+ * Calculate effective SMSS per RFC5681 definition for a given TCP
+ * connection at its current state, taking into account SACK and etc.
+ */
+u_int
+tcp_maxseg(const struct tcpcb *tp)
+{
+	u_int optlen;
+
+	if (tp->t_flags & TF_NOOPT)
+		return (tp->t_maxseg);
+
+	/*
+	 * Here we have a simplified code from tcp_addoptions(),
+	 * without a proper loop, and having most of paddings hardcoded.
+	 * We might make mistakes with padding here in some edge cases,
+	 * but this is harmless, since result of tcp_maxseg() is used
+	 * only in cwnd and ssthresh estimations.
+	 */
+#define	PAD(len)	((((len) / 4) + !!((len) % 4)) * 4)
+	if (TCPS_HAVEESTABLISHED(tp->t_state)) {
+		if (tp->t_flags & TF_RCVD_TSTMP)
+			optlen = TCPOLEN_TSTAMP_APPA;
+		else
+			optlen = 0;
+#ifdef TCP_SIGNATURE
+		if (tp->t_flags & TF_SIGNATURE)
+			optlen += PAD(TCPOLEN_SIGNATURE);
+#endif
+		if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0) {
+			optlen += TCPOLEN_SACKHDR;
+			optlen += tp->rcv_numsacks * TCPOLEN_SACK;
+			optlen = PAD(optlen);
+		}
+	} else {
+		if (tp->t_flags & TF_REQ_TSTMP)
+			optlen = TCPOLEN_TSTAMP_APPA;
+		else
+			optlen = PAD(TCPOLEN_MAXSEG);
+		if (tp->t_flags & TF_REQ_SCALE)
+			optlen += PAD(TCPOLEN_WINDOW);
+#ifdef TCP_SIGNATURE
+		if (tp->t_flags & TF_SIGNATURE)
+			optlen += PAD(TCPOLEN_SIGNATURE);
+#endif
+		if (tp->t_flags & TF_SACK_PERMIT)
+			optlen += PAD(TCPOLEN_SACK_PERMITTED);
+	}
+#undef PAD
+	optlen = min(optlen, TCP_MAXOLEN);
+	return (tp->t_maxseg - optlen);
+}
 
 #ifdef IPSEC
 /* compute ESP/AH header size for TCP, including outer IP header. */
 size_t
 ipsec_hdrsiz_tcp(struct tcpcb *tp)
 {
 	struct inpcb *inp;
 	struct mbuf *m;
 	size_t hdrsiz;
 	struct ip *ip;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 #endif
 	struct tcphdr *th;
 
 	if ((tp == NULL) || ((inp = tp->t_inpcb) == NULL) ||
 		(!key_havesp(IPSEC_DIR_OUTBOUND)))
 		return (0);
 	m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (!m)
 		return (0);
 
 #ifdef INET6
 	if ((inp->inp_vflag & INP_IPV6) != 0) {
 		ip6 = mtod(m, struct ip6_hdr *);
 		th = (struct tcphdr *)(ip6 + 1);
 		m->m_pkthdr.len = m->m_len =
 			sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
 		tcpip_fillheaders(inp, ip6, th);
 		hdrsiz = ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp);
 	} else
 #endif /* INET6 */
 	{
 		ip = mtod(m, struct ip *);
 		th = (struct tcphdr *)(ip + 1);
 		m->m_pkthdr.len = m->m_len = sizeof(struct tcpiphdr);
 		tcpip_fillheaders(inp, ip, th);
 		hdrsiz = ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp);
 	}
 
 	m_free(m);
 	return (hdrsiz);
 }
 #endif /* IPSEC */
 
 #ifdef TCP_SIGNATURE
 /*
  * Callback function invoked by m_apply() to digest TCP segment data
  * contained within an mbuf chain.
  */
 static int
 tcp_signature_apply(void *fstate, void *data, u_int len)
 {
 
 	MD5Update(fstate, (u_char *)data, len);
 	return (0);
 }
 
 /*
  * XXX The key is retrieved from the system's PF_KEY SADB, by keying a
  * search with the destination IP address, and a 'magic SPI' to be
  * determined by the application. This is hardcoded elsewhere to 1179
 */
 struct secasvar *
 tcp_get_sav(struct mbuf *m, u_int direction)
 {
 	union sockaddr_union dst;
 	struct secasvar *sav;
 	struct ip *ip;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 	char ip6buf[INET6_ADDRSTRLEN];
 #endif
 
 	/* Extract the destination from the IP header in the mbuf. */
 	bzero(&dst, sizeof(union sockaddr_union));
 	ip = mtod(m, struct ip *);
 #ifdef INET6
 	ip6 = NULL;	/* Make the compiler happy. */
 #endif
 	switch (ip->ip_v) {
 #ifdef INET
 	case IPVERSION:
 		dst.sa.sa_len = sizeof(struct sockaddr_in);
 		dst.sa.sa_family = AF_INET;
 		dst.sin.sin_addr = (direction == IPSEC_DIR_INBOUND) ?
 		    ip->ip_src : ip->ip_dst;
 		break;
 #endif
 #ifdef INET6
 	case (IPV6_VERSION >> 4):
 		ip6 = mtod(m, struct ip6_hdr *);
 		dst.sa.sa_len = sizeof(struct sockaddr_in6);
 		dst.sa.sa_family = AF_INET6;
 		dst.sin6.sin6_addr = (direction == IPSEC_DIR_INBOUND) ?
 		    ip6->ip6_src : ip6->ip6_dst;
 		break;
 #endif
 	default:
 		return (NULL);
 		/* NOTREACHED */
 		break;
 	}
 
 	/* Look up an SADB entry which matches the address of the peer. */
 	sav = KEY_ALLOCSA(&dst, IPPROTO_TCP, htonl(TCP_SIG_SPI));
 	if (sav == NULL) {
 		ipseclog((LOG_ERR, "%s: SADB lookup failed for %s\n", __func__,
 		    (ip->ip_v == IPVERSION) ? inet_ntoa(dst.sin.sin_addr) :
 #ifdef INET6
 			(ip->ip_v == (IPV6_VERSION >> 4)) ?
 			    ip6_sprintf(ip6buf, &dst.sin6.sin6_addr) :
 #endif
 			"(unsupported)"));
 	}
 
 	return (sav);
 }
 
 /*
  * Compute TCP-MD5 hash of a TCP segment. (RFC2385)
  *
  * Parameters:
  * m		pointer to head of mbuf chain
  * len		length of TCP segment data, excluding options
  * optlen	length of TCP segment options
  * buf		pointer to storage for computed MD5 digest
  * sav		pointer to security assosiation
  *
  * We do this over ip, tcphdr, segment data, and the key in the SADB.
  * When called from tcp_input(), we can be sure that th_sum has been
  * zeroed out and verified already.
  *
  * Releases reference to SADB key before return. 
  *
  * Return 0 if successful, otherwise return -1.
  *
  */
 int
 tcp_signature_do_compute(struct mbuf *m, int len, int optlen,
     u_char *buf, struct secasvar *sav)
 {
 #ifdef INET
 	struct ippseudo ippseudo;
 #endif
 	MD5_CTX ctx;
 	int doff;
 	struct ip *ip;
 #ifdef INET
 	struct ipovly *ipovly;
 #endif
 	struct tcphdr *th;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 	struct in6_addr in6;
 	uint32_t plen;
 	uint16_t nhdr;
 #endif
 	u_short savecsum;
 
 	KASSERT(m != NULL, ("NULL mbuf chain"));
 	KASSERT(buf != NULL, ("NULL signature pointer"));
 
 	/* Extract the destination from the IP header in the mbuf. */
 	ip = mtod(m, struct ip *);
 #ifdef INET6
 	ip6 = NULL;	/* Make the compiler happy. */
 #endif
 
 	MD5Init(&ctx);
 	/*
 	 * Step 1: Update MD5 hash with IP(v6) pseudo-header.
 	 *
 	 * XXX The ippseudo header MUST be digested in network byte order,
 	 * or else we'll fail the regression test. Assume all fields we've
 	 * been doing arithmetic on have been in host byte order.
 	 * XXX One cannot depend on ipovly->ih_len here. When called from
 	 * tcp_output(), the underlying ip_len member has not yet been set.
 	 */
 	switch (ip->ip_v) {
 #ifdef INET
 	case IPVERSION:
 		ipovly = (struct ipovly *)ip;
 		ippseudo.ippseudo_src = ipovly->ih_src;
 		ippseudo.ippseudo_dst = ipovly->ih_dst;
 		ippseudo.ippseudo_pad = 0;
 		ippseudo.ippseudo_p = IPPROTO_TCP;
 		ippseudo.ippseudo_len = htons(len + sizeof(struct tcphdr) +
 		    optlen);
 		MD5Update(&ctx, (char *)&ippseudo, sizeof(struct ippseudo));
 
 		th = (struct tcphdr *)((u_char *)ip + sizeof(struct ip));
 		doff = sizeof(struct ip) + sizeof(struct tcphdr) + optlen;
 		break;
 #endif
 #ifdef INET6
 	/*
 	 * RFC 2385, 2.0  Proposal
 	 * For IPv6, the pseudo-header is as described in RFC 2460, namely the
 	 * 128-bit source IPv6 address, 128-bit destination IPv6 address, zero-
 	 * extended next header value (to form 32 bits), and 32-bit segment
 	 * length.
 	 * Note: Upper-Layer Packet Length comes before Next Header.
 	 */
 	case (IPV6_VERSION >> 4):
 		in6 = ip6->ip6_src;
 		in6_clearscope(&in6);
 		MD5Update(&ctx, (char *)&in6, sizeof(struct in6_addr));
 		in6 = ip6->ip6_dst;
 		in6_clearscope(&in6);
 		MD5Update(&ctx, (char *)&in6, sizeof(struct in6_addr));
 		plen = htonl(len + sizeof(struct tcphdr) + optlen);
 		MD5Update(&ctx, (char *)&plen, sizeof(uint32_t));
 		nhdr = 0;
 		MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t));
 		MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t));
 		MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t));
 		nhdr = IPPROTO_TCP;
 		MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t));
 
 		th = (struct tcphdr *)((u_char *)ip6 + sizeof(struct ip6_hdr));
 		doff = sizeof(struct ip6_hdr) + sizeof(struct tcphdr) + optlen;
 		break;
 #endif
 	default:
 		KEY_FREESAV(&sav);
 		return (-1);
 		/* NOTREACHED */
 		break;
 	}
 
 
 	/*
 	 * Step 2: Update MD5 hash with TCP header, excluding options.
 	 * The TCP checksum must be set to zero.
 	 */
 	savecsum = th->th_sum;
 	th->th_sum = 0;
 	MD5Update(&ctx, (char *)th, sizeof(struct tcphdr));
 	th->th_sum = savecsum;
 
 	/*
 	 * Step 3: Update MD5 hash with TCP segment data.
 	 *         Use m_apply() to avoid an early m_pullup().
 	 */
 	if (len > 0)
 		m_apply(m, doff, len, tcp_signature_apply, &ctx);
 
 	/*
 	 * Step 4: Update MD5 hash with shared secret.
 	 */
 	MD5Update(&ctx, sav->key_auth->key_data, _KEYLEN(sav->key_auth));
 	MD5Final(buf, &ctx);
 
 	key_sa_recordxfer(sav, m);
 	KEY_FREESAV(&sav);
 	return (0);
 }
 
 /*
  * Compute TCP-MD5 hash of a TCP segment. (RFC2385)
  *
  * Return 0 if successful, otherwise return -1.
  */
 int
 tcp_signature_compute(struct mbuf *m, int _unused, int len, int optlen,
     u_char *buf, u_int direction)
 {
 	struct secasvar *sav;
 
 	if ((sav = tcp_get_sav(m, direction)) == NULL)
 		return (-1);
 
 	return (tcp_signature_do_compute(m, len, optlen, buf, sav));
 }
 
 /*
  * Verify the TCP-MD5 hash of a TCP segment. (RFC2385)
  *
  * Parameters:
  * m		pointer to head of mbuf chain
  * len		length of TCP segment data, excluding options
  * optlen	length of TCP segment options
  * buf		pointer to storage for computed MD5 digest
  * direction	direction of flow (IPSEC_DIR_INBOUND or OUTBOUND)
  *
  * Return 1 if successful, otherwise return 0.
  */
 int
 tcp_signature_verify(struct mbuf *m, int off0, int tlen, int optlen,
     struct tcpopt *to, struct tcphdr *th, u_int tcpbflag)
 {
 	char tmpdigest[TCP_SIGLEN];
 
 	if (tcp_sig_checksigs == 0)
 		return (1);
 	if ((tcpbflag & TF_SIGNATURE) == 0) {
 		if ((to->to_flags & TOF_SIGNATURE) != 0) {
 
 			/*
 			 * If this socket is not expecting signature but
 			 * the segment contains signature just fail.
 			 */
 			TCPSTAT_INC(tcps_sig_err_sigopt);
 			TCPSTAT_INC(tcps_sig_rcvbadsig);
 			return (0);
 		}
 
 		/* Signature is not expected, and not present in segment. */
 		return (1);
 	}
 
 	/*
 	 * If this socket is expecting signature but the segment does not
 	 * contain any just fail.
 	 */
 	if ((to->to_flags & TOF_SIGNATURE) == 0) {
 		TCPSTAT_INC(tcps_sig_err_nosigopt);
 		TCPSTAT_INC(tcps_sig_rcvbadsig);
 		return (0);
 	}
 	if (tcp_signature_compute(m, off0, tlen, optlen, &tmpdigest[0],
 	    IPSEC_DIR_INBOUND) == -1) {
 		TCPSTAT_INC(tcps_sig_err_buildsig);
 		TCPSTAT_INC(tcps_sig_rcvbadsig);
 		return (0);
 	}
 	
 	if (bcmp(to->to_signature, &tmpdigest[0], TCP_SIGLEN) != 0) {
 		TCPSTAT_INC(tcps_sig_rcvbadsig);
 		return (0);
 	}
 	TCPSTAT_INC(tcps_sig_rcvgoodsig);
 	return (1);
 }
 #endif /* TCP_SIGNATURE */
 
 static int
 sysctl_drop(SYSCTL_HANDLER_ARGS)
 {
 	/* addrs[0] is a foreign socket, addrs[1] is a local one. */
 	struct sockaddr_storage addrs[2];
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct tcptw *tw;
 	struct sockaddr_in *fin, *lin;
 #ifdef INET6
 	struct sockaddr_in6 *fin6, *lin6;
 #endif
 	int error;
 
 	inp = NULL;
 	fin = lin = NULL;
 #ifdef INET6
 	fin6 = lin6 = NULL;
 #endif
 	error = 0;
 
 	if (req->oldptr != NULL || req->oldlen != 0)
 		return (EINVAL);
 	if (req->newptr == NULL)
 		return (EPERM);
 	if (req->newlen < sizeof(addrs))
 		return (ENOMEM);
 	error = SYSCTL_IN(req, &addrs, sizeof(addrs));
 	if (error)
 		return (error);
 
 	switch (addrs[0].ss_family) {
 #ifdef INET6
 	case AF_INET6:
 		fin6 = (struct sockaddr_in6 *)&addrs[0];
 		lin6 = (struct sockaddr_in6 *)&addrs[1];
 		if (fin6->sin6_len != sizeof(struct sockaddr_in6) ||
 		    lin6->sin6_len != sizeof(struct sockaddr_in6))
 			return (EINVAL);
 		if (IN6_IS_ADDR_V4MAPPED(&fin6->sin6_addr)) {
 			if (!IN6_IS_ADDR_V4MAPPED(&lin6->sin6_addr))
 				return (EINVAL);
 			in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[0]);
 			in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[1]);
 			fin = (struct sockaddr_in *)&addrs[0];
 			lin = (struct sockaddr_in *)&addrs[1];
 			break;
 		}
 		error = sa6_embedscope(fin6, V_ip6_use_defzone);
 		if (error)
 			return (error);
 		error = sa6_embedscope(lin6, V_ip6_use_defzone);
 		if (error)
 			return (error);
 		break;
 #endif
 #ifdef INET
 	case AF_INET:
 		fin = (struct sockaddr_in *)&addrs[0];
 		lin = (struct sockaddr_in *)&addrs[1];
 		if (fin->sin_len != sizeof(struct sockaddr_in) ||
 		    lin->sin_len != sizeof(struct sockaddr_in))
 			return (EINVAL);
 		break;
 #endif
 	default:
 		return (EINVAL);
 	}
 	INP_INFO_RLOCK(&V_tcbinfo);
 	switch (addrs[0].ss_family) {
 #ifdef INET6
 	case AF_INET6:
 		inp = in6_pcblookup(&V_tcbinfo, &fin6->sin6_addr,
 		    fin6->sin6_port, &lin6->sin6_addr, lin6->sin6_port,
 		    INPLOOKUP_WLOCKPCB, NULL);
 		break;
 #endif
 #ifdef INET
 	case AF_INET:
 		inp = in_pcblookup(&V_tcbinfo, fin->sin_addr, fin->sin_port,
 		    lin->sin_addr, lin->sin_port, INPLOOKUP_WLOCKPCB, NULL);
 		break;
 #endif
 	}
 	if (inp != NULL) {
 		if (inp->inp_flags & INP_TIMEWAIT) {
 			/*
 			 * XXXRW: There currently exists a state where an
 			 * inpcb is present, but its timewait state has been
 			 * discarded.  For now, don't allow dropping of this
 			 * type of inpcb.
 			 */
 			tw = intotw(inp);
 			if (tw != NULL)
 				tcp_twclose(tw, 0);
 			else
 				INP_WUNLOCK(inp);
 		} else if (!(inp->inp_flags & INP_DROPPED) &&
 			   !(inp->inp_socket->so_options & SO_ACCEPTCONN)) {
 			tp = intotcpcb(inp);
 			tp = tcp_drop(tp, ECONNABORTED);
 			if (tp != NULL)
 				INP_WUNLOCK(inp);
 		} else
 			INP_WUNLOCK(inp);
 	} else
 		error = ESRCH;
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DROP, drop,
     CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_WR | CTLFLAG_SKIP, NULL,
     0, sysctl_drop, "", "Drop TCP connection");
 
 /*
  * Generate a standardized TCP log line for use throughout the
  * tcp subsystem.  Memory allocation is done with M_NOWAIT to
  * allow use in the interrupt context.
  *
  * NB: The caller MUST free(s, M_TCPLOG) the returned string.
  * NB: The function may return NULL if memory allocation failed.
  *
  * Due to header inclusion and ordering limitations the struct ip
  * and ip6_hdr pointers have to be passed as void pointers.
  */
 char *
 tcp_log_vain(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr,
     const void *ip6hdr)
 {
 
 	/* Is logging enabled? */
 	if (tcp_log_in_vain == 0)
 		return (NULL);
 
 	return (tcp_log_addr(inc, th, ip4hdr, ip6hdr));
 }
 
 char *
 tcp_log_addrs(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr,
     const void *ip6hdr)
 {
 
 	/* Is logging enabled? */
 	if (tcp_log_debug == 0)
 		return (NULL);
 
 	return (tcp_log_addr(inc, th, ip4hdr, ip6hdr));
 }
 
 static char *
 tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr,
     const void *ip6hdr)
 {
 	char *s, *sp;
 	size_t size;
 	struct ip *ip;
 #ifdef INET6
 	const struct ip6_hdr *ip6;
 
 	ip6 = (const struct ip6_hdr *)ip6hdr;
 #endif /* INET6 */
 	ip = (struct ip *)ip4hdr;
 
 	/*
 	 * The log line looks like this:
 	 * "TCP: [1.2.3.4]:50332 to [1.2.3.4]:80 tcpflags 0x2<SYN>"
 	 */
 	size = sizeof("TCP: []:12345 to []:12345 tcpflags 0x2<>") +
 	    sizeof(PRINT_TH_FLAGS) + 1 +
 #ifdef INET6
 	    2 * INET6_ADDRSTRLEN;
 #else
 	    2 * INET_ADDRSTRLEN;
 #endif /* INET6 */
 
 	s = malloc(size, M_TCPLOG, M_ZERO|M_NOWAIT);
 	if (s == NULL)
 		return (NULL);
 
 	strcat(s, "TCP: [");
 	sp = s + strlen(s);
 
 	if (inc && ((inc->inc_flags & INC_ISIPV6) == 0)) {
 		inet_ntoa_r(inc->inc_faddr, sp);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i to [", ntohs(inc->inc_fport));
 		sp = s + strlen(s);
 		inet_ntoa_r(inc->inc_laddr, sp);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i", ntohs(inc->inc_lport));
 #ifdef INET6
 	} else if (inc) {
 		ip6_sprintf(sp, &inc->inc6_faddr);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i to [", ntohs(inc->inc_fport));
 		sp = s + strlen(s);
 		ip6_sprintf(sp, &inc->inc6_laddr);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i", ntohs(inc->inc_lport));
 	} else if (ip6 && th) {
 		ip6_sprintf(sp, &ip6->ip6_src);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i to [", ntohs(th->th_sport));
 		sp = s + strlen(s);
 		ip6_sprintf(sp, &ip6->ip6_dst);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i", ntohs(th->th_dport));
 #endif /* INET6 */
 #ifdef INET
 	} else if (ip && th) {
 		inet_ntoa_r(ip->ip_src, sp);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i to [", ntohs(th->th_sport));
 		sp = s + strlen(s);
 		inet_ntoa_r(ip->ip_dst, sp);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i", ntohs(th->th_dport));
 #endif /* INET */
 	} else {
 		free(s, M_TCPLOG);
 		return (NULL);
 	}
 	sp = s + strlen(s);
 	if (th)
 		sprintf(sp, " tcpflags 0x%b", th->th_flags, PRINT_TH_FLAGS);
 	if (*(s + size - 1) != '\0')
 		panic("%s: string too long", __func__);
 	return (s);
 }
 
 /*
  * A subroutine which makes it easy to track TCP state changes with DTrace.
  * This function shouldn't be called for t_state initializations that don't
  * correspond to actual TCP state transitions.
  */
 void
 tcp_state_change(struct tcpcb *tp, int newstate)
 {
 #if defined(KDTRACE_HOOKS)
 	int pstate = tp->t_state;
 #endif
 
 	tp->t_state = newstate;
 	TCP_PROBE6(state__change, NULL, tp, NULL, tp, NULL, pstate);
 }
Index: projects/release-pkg/sys/netinet/tcp_timer.c
===================================================================
--- projects/release-pkg/sys/netinet/tcp_timer.c	(revision 293335)
+++ projects/release-pkg/sys/netinet/tcp_timer.c	(revision 293336)
@@ -1,1010 +1,1005 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)tcp_timer.c	8.2 (Berkeley) 5/24/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_tcpdebug.h"
 #include "opt_rss.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/protosw.h>
 #include <sys/smp.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <net/if.h>
 #include <net/route.h>
 #include <net/rss_config.h>
 #include <net/vnet.h>
 #include <net/netisr.h>
 
 #include <netinet/cc.h>
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_rss.h>
 #include <netinet/in_systm.h>
 #ifdef INET6
 #include <netinet6/in6_pcb.h>
 #endif
 #include <netinet/ip_var.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #ifdef INET6
 #include <netinet6/tcp6_var.h>
 #endif
 #include <netinet/tcpip.h>
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif
 
 int	tcp_keepinit;
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection");
 
 int	tcp_keepidle;
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin");
 
 int	tcp_keepintvl;
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes");
 
 int	tcp_delacktime;
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
     "Time before a delayed ACK is sent");
 
 int	tcp_msl;
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
 
 int	tcp_rexmit_min;
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
     "Minimum Retransmission Timeout");
 
 int	tcp_rexmit_slop;
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
     "Retransmission Timer Slop");
 
 static int	always_keepalive = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
     &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
 
 int    tcp_fast_finwait2_recycle = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 
     &tcp_fast_finwait2_recycle, 0,
     "Recycle closed FIN_WAIT_2 connections faster");
 
 int    tcp_finwait2_timeout;
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW,
     &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout");
 
 int	tcp_keepcnt = TCPTV_KEEPCNT;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0,
     "Number of keepalive probes to send");
 
 	/* max idle probes */
 int	tcp_maxpersistidle;
 
 static int	tcp_rexmit_drop_options = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW,
     &tcp_rexmit_drop_options, 0,
     "Drop TCP options from 3rd and later retransmitted SYN");
 
 static VNET_DEFINE(int, tcp_pmtud_blackhole_detect);
 #define	V_tcp_pmtud_blackhole_detect	VNET(tcp_pmtud_blackhole_detect)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection,
     CTLFLAG_RW|CTLFLAG_VNET,
     &VNET_NAME(tcp_pmtud_blackhole_detect), 0,
     "Path MTU Discovery Black Hole Detection Enabled");
 
 static VNET_DEFINE(int, tcp_pmtud_blackhole_activated);
 #define	V_tcp_pmtud_blackhole_activated \
     VNET(tcp_pmtud_blackhole_activated)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated,
     CTLFLAG_RD|CTLFLAG_VNET,
     &VNET_NAME(tcp_pmtud_blackhole_activated), 0,
     "Path MTU Discovery Black Hole Detection, Activation Count");
 
 static VNET_DEFINE(int, tcp_pmtud_blackhole_activated_min_mss);
 #define	V_tcp_pmtud_blackhole_activated_min_mss \
     VNET(tcp_pmtud_blackhole_activated_min_mss)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated_min_mss,
     CTLFLAG_RD|CTLFLAG_VNET,
     &VNET_NAME(tcp_pmtud_blackhole_activated_min_mss), 0,
     "Path MTU Discovery Black Hole Detection, Activation Count at min MSS");
 
 static VNET_DEFINE(int, tcp_pmtud_blackhole_failed);
 #define	V_tcp_pmtud_blackhole_failed	VNET(tcp_pmtud_blackhole_failed)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_failed,
     CTLFLAG_RD|CTLFLAG_VNET,
     &VNET_NAME(tcp_pmtud_blackhole_failed), 0,
     "Path MTU Discovery Black Hole Detection, Failure Count");
 
 #ifdef INET
 static VNET_DEFINE(int, tcp_pmtud_blackhole_mss) = 1200;
 #define	V_tcp_pmtud_blackhole_mss	VNET(tcp_pmtud_blackhole_mss)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss,
     CTLFLAG_RW|CTLFLAG_VNET,
     &VNET_NAME(tcp_pmtud_blackhole_mss), 0,
     "Path MTU Discovery Black Hole Detection lowered MSS");
 #endif
 
 #ifdef INET6
 static VNET_DEFINE(int, tcp_v6pmtud_blackhole_mss) = 1220;
 #define	V_tcp_v6pmtud_blackhole_mss	VNET(tcp_v6pmtud_blackhole_mss)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, v6pmtud_blackhole_mss,
     CTLFLAG_RW|CTLFLAG_VNET,
     &VNET_NAME(tcp_v6pmtud_blackhole_mss), 0,
     "Path MTU Discovery IPv6 Black Hole Detection lowered MSS");
 #endif
 
 #ifdef	RSS
 static int	per_cpu_timers = 1;
 #else
 static int	per_cpu_timers = 0;
 #endif
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
     &per_cpu_timers , 0, "run tcp timers on all cpus");
 
 #if 0
 #define	INP_CPU(inp)	(per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \
 		((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0)
 #endif
 
 /*
  * Map the given inp to a CPU id.
  *
  * This queries RSS if it's compiled in, else it defaults to the current
  * CPU ID.
  */
 static inline int
 inp_to_cpuid(struct inpcb *inp)
 {
 	u_int cpuid;
 
 #ifdef	RSS
 	if (per_cpu_timers) {
 		cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype);
 		if (cpuid == NETISR_CPUID_NONE)
 			return (curcpu);	/* XXX */
 		else
 			return (cpuid);
 	}
 #else
 	/* Legacy, pre-RSS behaviour */
 	if (per_cpu_timers) {
 		/*
 		 * We don't have a flowid -> cpuid mapping, so cheat and
 		 * just map unknown cpuids to curcpu.  Not the best, but
 		 * apparently better than defaulting to swi 0.
 		 */
 		cpuid = inp->inp_flowid % (mp_maxid + 1);
 		if (! CPU_ABSENT(cpuid))
 			return (cpuid);
 		return (curcpu);
 	}
 #endif
 	/* Default for RSS and non-RSS - cpuid 0 */
 	else {
 		return (0);
 	}
 }
 
 /*
  * Tcp protocol timeout routine called every 500 ms.
  * Updates timestamps used for TCP
  * causes finite state machine actions if timers expire.
  */
 void
 tcp_slowtimo(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		(void) tcp_tw_2msl_scan(0);
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
 int	tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
     { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 };
 
 int	tcp_backoff[TCP_MAXRXTSHIFT + 1] =
     { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 };
 
 static int tcp_totbackoff = 2559;	/* sum of tcp_backoff[] */
 
 /*
  * TCP timer processing.
  */
 
 void
 tcp_timer_delack(void *xtp)
 {
 	struct tcpcb *tp = xtp;
 	struct inpcb *inp;
 	CURVNET_SET(tp->t_vnet);
 
 	inp = tp->t_inpcb;
 	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
 	INP_WLOCK(inp);
 	if (callout_pending(&tp->t_timers->tt_delack) ||
 	    !callout_active(&tp->t_timers->tt_delack)) {
 		INP_WUNLOCK(inp);
 		CURVNET_RESTORE();
 		return;
 	}
 	callout_deactivate(&tp->t_timers->tt_delack);
 	if ((inp->inp_flags & INP_DROPPED) != 0) {
 		INP_WUNLOCK(inp);
 		CURVNET_RESTORE();
 		return;
 	}
 	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
 		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
 	KASSERT((tp->t_timers->tt_flags & TT_DELACK) != 0,
 		("%s: tp %p delack callout should be running", __func__, tp));
 
 	tp->t_flags |= TF_ACKNOW;
 	TCPSTAT_INC(tcps_delack);
 	(void) tp->t_fb->tfb_tcp_output(tp);
 	INP_WUNLOCK(inp);
 	CURVNET_RESTORE();
 }
 
 void
 tcp_timer_2msl(void *xtp)
 {
 	struct tcpcb *tp = xtp;
 	struct inpcb *inp;
 	CURVNET_SET(tp->t_vnet);
 #ifdef TCPDEBUG
 	int ostate;
 
 	ostate = tp->t_state;
 #endif
 	INP_INFO_RLOCK(&V_tcbinfo);
 	inp = tp->t_inpcb;
 	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
 	INP_WLOCK(inp);
 	tcp_free_sackholes(tp);
 	if (callout_pending(&tp->t_timers->tt_2msl) ||
 	    !callout_active(&tp->t_timers->tt_2msl)) {
 		INP_WUNLOCK(tp->t_inpcb);
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	callout_deactivate(&tp->t_timers->tt_2msl);
 	if ((inp->inp_flags & INP_DROPPED) != 0) {
 		INP_WUNLOCK(inp);
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
 		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
 	KASSERT((tp->t_timers->tt_flags & TT_2MSL) != 0,
 		("%s: tp %p 2msl callout should be running", __func__, tp));
 	/*
 	 * 2 MSL timeout in shutdown went off.  If we're closed but
 	 * still waiting for peer to close and connection has been idle
 	 * too long delete connection control block.  Otherwise, check
 	 * again in a bit.
 	 *
 	 * If in TIME_WAIT state just ignore as this timeout is handled in
 	 * tcp_tw_2msl_scan().
 	 *
 	 * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 
 	 * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 
 	 * Ignore fact that there were recent incoming segments.
 	 */
 	if ((inp->inp_flags & INP_TIMEWAIT) != 0) {
 		INP_WUNLOCK(inp);
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
 	    tp->t_inpcb && tp->t_inpcb->inp_socket && 
 	    (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
 		TCPSTAT_INC(tcps_finwait2_drops);
 		tp = tcp_close(tp);             
 	} else {
 		if (ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) {
 			if (!callout_reset(&tp->t_timers->tt_2msl,
 			   TP_KEEPINTVL(tp), tcp_timer_2msl, tp)) {
 				tp->t_timers->tt_flags &= ~TT_2MSL_RST;
 			}
 		} else
 		       tp = tcp_close(tp);
        }
 
 #ifdef TCPDEBUG
 	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
 			  PRU_SLOWTIMO);
 #endif
 	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
 
 	if (tp != NULL)
 		INP_WUNLOCK(inp);
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 	CURVNET_RESTORE();
 }
 
 void
 tcp_timer_keep(void *xtp)
 {
 	struct tcpcb *tp = xtp;
 	struct tcptemp *t_template;
 	struct inpcb *inp;
 	CURVNET_SET(tp->t_vnet);
 #ifdef TCPDEBUG
 	int ostate;
 
 	ostate = tp->t_state;
 #endif
 	INP_INFO_RLOCK(&V_tcbinfo);
 	inp = tp->t_inpcb;
 	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
 	INP_WLOCK(inp);
 	if (callout_pending(&tp->t_timers->tt_keep) ||
 	    !callout_active(&tp->t_timers->tt_keep)) {
 		INP_WUNLOCK(inp);
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	callout_deactivate(&tp->t_timers->tt_keep);
 	if ((inp->inp_flags & INP_DROPPED) != 0) {
 		INP_WUNLOCK(inp);
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
 		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
 	KASSERT((tp->t_timers->tt_flags & TT_KEEP) != 0,
 		("%s: tp %p keep callout should be running", __func__, tp));
 	/*
 	 * Keep-alive timer went off; send something
 	 * or drop connection if idle for too long.
 	 */
 	TCPSTAT_INC(tcps_keeptimeo);
 	if (tp->t_state < TCPS_ESTABLISHED)
 		goto dropit;
 	if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
 	    tp->t_state <= TCPS_CLOSING) {
 		if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
 			goto dropit;
 		/*
 		 * Send a packet designed to force a response
 		 * if the peer is up and reachable:
 		 * either an ACK if the connection is still alive,
 		 * or an RST if the peer has closed the connection
 		 * due to timeout or reboot.
 		 * Using sequence number tp->snd_una-1
 		 * causes the transmitted zero-length segment
 		 * to lie outside the receive window;
 		 * by the protocol spec, this requires the
 		 * correspondent TCP to respond.
 		 */
 		TCPSTAT_INC(tcps_keepprobe);
 		t_template = tcpip_maketemplate(inp);
 		if (t_template) {
 			tcp_respond(tp, t_template->tt_ipgen,
 				    &t_template->tt_t, (struct mbuf *)NULL,
 				    tp->rcv_nxt, tp->snd_una - 1, 0);
 			free(t_template, M_TEMP);
 		}
 		if (!callout_reset(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
 		    tcp_timer_keep, tp)) {
 			tp->t_timers->tt_flags &= ~TT_KEEP_RST;
 		}
 	} else if (!callout_reset(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
 		    tcp_timer_keep, tp)) {
 			tp->t_timers->tt_flags &= ~TT_KEEP_RST;
 		}
 
 #ifdef TCPDEBUG
 	if (inp->inp_socket->so_options & SO_DEBUG)
 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
 			  PRU_SLOWTIMO);
 #endif
 	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
 	INP_WUNLOCK(inp);
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 	CURVNET_RESTORE();
 	return;
 
 dropit:
 	TCPSTAT_INC(tcps_keepdrops);
 	tp = tcp_drop(tp, ETIMEDOUT);
 
 #ifdef TCPDEBUG
 	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
 			  PRU_SLOWTIMO);
 #endif
 	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
 	if (tp != NULL)
 		INP_WUNLOCK(tp->t_inpcb);
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 	CURVNET_RESTORE();
 }
 
 void
 tcp_timer_persist(void *xtp)
 {
 	struct tcpcb *tp = xtp;
 	struct inpcb *inp;
 	CURVNET_SET(tp->t_vnet);
 #ifdef TCPDEBUG
 	int ostate;
 
 	ostate = tp->t_state;
 #endif
 	INP_INFO_RLOCK(&V_tcbinfo);
 	inp = tp->t_inpcb;
 	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
 	INP_WLOCK(inp);
 	if (callout_pending(&tp->t_timers->tt_persist) ||
 	    !callout_active(&tp->t_timers->tt_persist)) {
 		INP_WUNLOCK(inp);
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	callout_deactivate(&tp->t_timers->tt_persist);
 	if ((inp->inp_flags & INP_DROPPED) != 0) {
 		INP_WUNLOCK(inp);
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
 		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
 	KASSERT((tp->t_timers->tt_flags & TT_PERSIST) != 0,
 		("%s: tp %p persist callout should be running", __func__, tp));
 	/*
 	 * Persistance timer into zero window.
 	 * Force a byte to be output, if possible.
 	 */
 	TCPSTAT_INC(tcps_persisttimeo);
 	/*
 	 * Hack: if the peer is dead/unreachable, we do not
 	 * time out if the window is closed.  After a full
 	 * backoff, drop the connection if the idle time
 	 * (no responses to probes) reaches the maximum
 	 * backoff that we would use if retransmitting.
 	 */
 	if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
 	    (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
 	     ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
 		TCPSTAT_INC(tcps_persistdrop);
 		tp = tcp_drop(tp, ETIMEDOUT);
 		goto out;
 	}
 	/*
 	 * If the user has closed the socket then drop a persisting
 	 * connection after a much reduced timeout.
 	 */
 	if (tp->t_state > TCPS_CLOSE_WAIT &&
 	    (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
 		TCPSTAT_INC(tcps_persistdrop);
 		tp = tcp_drop(tp, ETIMEDOUT);
 		goto out;
 	}
 	tcp_setpersist(tp);
 	tp->t_flags |= TF_FORCEDATA;
 	(void) tp->t_fb->tfb_tcp_output(tp);
 	tp->t_flags &= ~TF_FORCEDATA;
 
 out:
 #ifdef TCPDEBUG
 	if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
 		tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
 #endif
 	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
 	if (tp != NULL)
 		INP_WUNLOCK(inp);
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 	CURVNET_RESTORE();
 }
 
 void
 tcp_timer_rexmt(void * xtp)
 {
 	struct tcpcb *tp = xtp;
 	CURVNET_SET(tp->t_vnet);
 	int rexmt;
 	int headlocked;
 	struct inpcb *inp;
 #ifdef TCPDEBUG
 	int ostate;
 
 	ostate = tp->t_state;
 #endif
 
 	INP_INFO_RLOCK(&V_tcbinfo);
 	inp = tp->t_inpcb;
 	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
 	INP_WLOCK(inp);
 	if (callout_pending(&tp->t_timers->tt_rexmt) ||
 	    !callout_active(&tp->t_timers->tt_rexmt)) {
 		INP_WUNLOCK(inp);
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	callout_deactivate(&tp->t_timers->tt_rexmt);
 	if ((inp->inp_flags & INP_DROPPED) != 0) {
 		INP_WUNLOCK(inp);
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 		return;
 	}
 	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
 		("%s: tp %p tcpcb can't be stopped here", __func__, tp));
 	KASSERT((tp->t_timers->tt_flags & TT_REXMT) != 0,
 		("%s: tp %p rexmt callout should be running", __func__, tp));
 	tcp_free_sackholes(tp);
 	/*
 	 * Retransmission timer went off.  Message has not
 	 * been acked within retransmit interval.  Back off
 	 * to a longer retransmit interval and retransmit one segment.
 	 */
 	if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
 		tp->t_rxtshift = TCP_MAXRXTSHIFT;
 		TCPSTAT_INC(tcps_timeoutdrop);
 
 		tp = tcp_drop(tp, tp->t_softerror ?
 			      tp->t_softerror : ETIMEDOUT);
 		headlocked = 1;
 		goto out;
 	}
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 	headlocked = 0;
 	if (tp->t_state == TCPS_SYN_SENT) {
 		/*
 		 * If the SYN was retransmitted, indicate CWND to be
 		 * limited to 1 segment in cc_conn_init().
 		 */
 		tp->snd_cwnd = 1;
 	} else if (tp->t_rxtshift == 1) {
 		/*
 		 * first retransmit; record ssthresh and cwnd so they can
 		 * be recovered if this turns out to be a "bad" retransmit.
 		 * A retransmit is considered "bad" if an ACK for this
 		 * segment is received within RTT/2 interval; the assumption
 		 * here is that the ACK was already in flight.  See
 		 * "On Estimating End-to-End Network Path Properties" by
 		 * Allman and Paxson for more details.
 		 */
 		tp->snd_cwnd_prev = tp->snd_cwnd;
 		tp->snd_ssthresh_prev = tp->snd_ssthresh;
 		tp->snd_recover_prev = tp->snd_recover;
 		if (IN_FASTRECOVERY(tp->t_flags))
 			tp->t_flags |= TF_WASFRECOVERY;
 		else
 			tp->t_flags &= ~TF_WASFRECOVERY;
 		if (IN_CONGRECOVERY(tp->t_flags))
 			tp->t_flags |= TF_WASCRECOVERY;
 		else
 			tp->t_flags &= ~TF_WASCRECOVERY;
 		tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
 		tp->t_flags |= TF_PREVVALID;
 	} else
 		tp->t_flags &= ~TF_PREVVALID;
 	TCPSTAT_INC(tcps_rexmttimeo);
 	if ((tp->t_state == TCPS_SYN_SENT) ||
 	    (tp->t_state == TCPS_SYN_RECEIVED))
 		rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift];
 	else
 		rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
 	TCPT_RANGESET(tp->t_rxtcur, rexmt,
 		      tp->t_rttmin, TCPTV_REXMTMAX);
 
 	/*
 	 * We enter the path for PLMTUD if connection is established or, if
 	 * connection is FIN_WAIT_1 status, reason for the last is that if
 	 * amount of data we send is very small, we could send it in couple of
 	 * packets and process straight to FIN. In that case we won't catch
 	 * ESTABLISHED state.
 	 */
 	if (V_tcp_pmtud_blackhole_detect && (((tp->t_state == TCPS_ESTABLISHED))
 	    || (tp->t_state == TCPS_FIN_WAIT_1))) {
-		int optlen;
 #ifdef INET6
 		int isipv6;
 #endif
 
 		/*
 		 * Idea here is that at each stage of mtu probe (usually, 1448
 		 * -> 1188 -> 524) should be given 2 chances to recover before
 		 *  further clamping down. 'tp->t_rxtshift % 2 == 0' should
 		 *  take care of that.
 		 */
 		if (((tp->t_flags2 & (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) ==
 		    (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) &&
 		    (tp->t_rxtshift >= 2 && tp->t_rxtshift % 2 == 0)) {
 			/*
 			 * Enter Path MTU Black-hole Detection mechanism:
 			 * - Disable Path MTU Discovery (IP "DF" bit).
 			 * - Reduce MTU to lower value than what we
 			 *   negotiated with peer.
 			 */
 			/* Record that we may have found a black hole. */
 			tp->t_flags2 |= TF2_PLPMTU_BLACKHOLE;
 
 			/* Keep track of previous MSS. */
-			optlen = tp->t_maxopd - tp->t_maxseg;
-			tp->t_pmtud_saved_maxopd = tp->t_maxopd;
+			tp->t_pmtud_saved_maxseg = tp->t_maxseg;
 
 			/* 
 			 * Reduce the MSS to blackhole value or to the default
 			 * in an attempt to retransmit.
 			 */
 #ifdef INET6
 			isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) ? 1 : 0;
 			if (isipv6 &&
-			    tp->t_maxopd > V_tcp_v6pmtud_blackhole_mss) {
+			    tp->t_maxseg > V_tcp_v6pmtud_blackhole_mss) {
 				/* Use the sysctl tuneable blackhole MSS. */
-				tp->t_maxopd = V_tcp_v6pmtud_blackhole_mss;
+				tp->t_maxseg = V_tcp_v6pmtud_blackhole_mss;
 				V_tcp_pmtud_blackhole_activated++;
 			} else if (isipv6) {
 				/* Use the default MSS. */
-				tp->t_maxopd = V_tcp_v6mssdflt;
+				tp->t_maxseg = V_tcp_v6mssdflt;
 				/*
 				 * Disable Path MTU Discovery when we switch to
 				 * minmss.
 				 */
 				tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
 				V_tcp_pmtud_blackhole_activated_min_mss++;
 			}
 #endif
 #if defined(INET6) && defined(INET)
 			else
 #endif
 #ifdef INET
-			if (tp->t_maxopd > V_tcp_pmtud_blackhole_mss) {
+			if (tp->t_maxseg > V_tcp_pmtud_blackhole_mss) {
 				/* Use the sysctl tuneable blackhole MSS. */
-				tp->t_maxopd = V_tcp_pmtud_blackhole_mss;
+				tp->t_maxseg = V_tcp_pmtud_blackhole_mss;
 				V_tcp_pmtud_blackhole_activated++;
 			} else {
 				/* Use the default MSS. */
-				tp->t_maxopd = V_tcp_mssdflt;
+				tp->t_maxseg = V_tcp_mssdflt;
 				/*
 				 * Disable Path MTU Discovery when we switch to
 				 * minmss.
 				 */
 				tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
 				V_tcp_pmtud_blackhole_activated_min_mss++;
 			}
 #endif
-			tp->t_maxseg = tp->t_maxopd - optlen;
 			/*
 			 * Reset the slow-start flight size
 			 * as it may depend on the new MSS.
 			 */
 			if (CC_ALGO(tp)->conn_init != NULL)
 				CC_ALGO(tp)->conn_init(tp->ccv);
 		} else {
 			/*
 			 * If further retransmissions are still unsuccessful
 			 * with a lowered MTU, maybe this isn't a blackhole and
 			 * we restore the previous MSS and blackhole detection
 			 * flags.
 			 * The limit '6' is determined by giving each probe
 			 * stage (1448, 1188, 524) 2 chances to recover.
 			 */
 			if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) &&
 			    (tp->t_rxtshift > 6)) {
 				tp->t_flags2 |= TF2_PLPMTU_PMTUD;
 				tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
-				optlen = tp->t_maxopd - tp->t_maxseg;
-				tp->t_maxopd = tp->t_pmtud_saved_maxopd;
-				tp->t_maxseg = tp->t_maxopd - optlen;
+				tp->t_maxseg = tp->t_pmtud_saved_maxseg;
 				V_tcp_pmtud_blackhole_failed++;
 				/*
 				 * Reset the slow-start flight size as it
 				 * may depend on the new MSS.
 				 */
 				if (CC_ALGO(tp)->conn_init != NULL)
 					CC_ALGO(tp)->conn_init(tp->ccv);
 			}
 		}
 	}
 
 	/*
 	 * Disable RFC1323 and SACK if we haven't got any response to
 	 * our third SYN to work-around some broken terminal servers
 	 * (most of which have hopefully been retired) that have bad VJ
 	 * header compression code which trashes TCP segments containing
 	 * unknown-to-them TCP options.
 	 */
 	if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) &&
 	    (tp->t_rxtshift == 3))
 		tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT);
 	/*
 	 * If we backed off this far, our srtt estimate is probably bogus.
 	 * Clobber it so we'll take the next rtt measurement as our srtt;
 	 * move the current srtt into rttvar to keep the current
 	 * retransmit times until then.
 	 */
 	if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
 #ifdef INET6
 		if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
 			in6_losing(tp->t_inpcb);
 #endif
 		tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
 		tp->t_srtt = 0;
 	}
 	tp->snd_nxt = tp->snd_una;
 	tp->snd_recover = tp->snd_max;
 	/*
 	 * Force a segment to be sent.
 	 */
 	tp->t_flags |= TF_ACKNOW;
 	/*
 	 * If timing a segment in this window, stop the timer.
 	 */
 	tp->t_rtttime = 0;
 
 	cc_cong_signal(tp, NULL, CC_RTO);
 
 	(void) tp->t_fb->tfb_tcp_output(tp);
 
 out:
 #ifdef TCPDEBUG
 	if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
 		tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
 			  PRU_SLOWTIMO);
 #endif
 	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
 	if (tp != NULL)
 		INP_WUNLOCK(inp);
 	if (headlocked)
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 	CURVNET_RESTORE();
 }
 
 void
 tcp_timer_activate(struct tcpcb *tp, uint32_t timer_type, u_int delta)
 {
 	struct callout *t_callout;
 	timeout_t *f_callout;
 	struct inpcb *inp = tp->t_inpcb;
 	int cpu = inp_to_cpuid(inp);
 	uint32_t f_reset;
 
 #ifdef TCP_OFFLOAD
 	if (tp->t_flags & TF_TOE)
 		return;
 #endif
 
 	if (tp->t_timers->tt_flags & TT_STOPPED)
 		return;
 
 	switch (timer_type) {
 		case TT_DELACK:
 			t_callout = &tp->t_timers->tt_delack;
 			f_callout = tcp_timer_delack;
 			f_reset = TT_DELACK_RST;
 			break;
 		case TT_REXMT:
 			t_callout = &tp->t_timers->tt_rexmt;
 			f_callout = tcp_timer_rexmt;
 			f_reset = TT_REXMT_RST;
 			break;
 		case TT_PERSIST:
 			t_callout = &tp->t_timers->tt_persist;
 			f_callout = tcp_timer_persist;
 			f_reset = TT_PERSIST_RST;
 			break;
 		case TT_KEEP:
 			t_callout = &tp->t_timers->tt_keep;
 			f_callout = tcp_timer_keep;
 			f_reset = TT_KEEP_RST;
 			break;
 		case TT_2MSL:
 			t_callout = &tp->t_timers->tt_2msl;
 			f_callout = tcp_timer_2msl;
 			f_reset = TT_2MSL_RST;
 			break;
 		default:
 			if (tp->t_fb->tfb_tcp_timer_activate) {
 				tp->t_fb->tfb_tcp_timer_activate(tp, timer_type, delta);
 				return;
 			}
 			panic("tp %p bad timer_type %#x", tp, timer_type);
 		}
 	if (delta == 0) {
 		if ((tp->t_timers->tt_flags & timer_type) &&
 		    (callout_stop(t_callout) > 0) &&
 		    (tp->t_timers->tt_flags & f_reset)) {
 			tp->t_timers->tt_flags &= ~(timer_type | f_reset);
 		}
 	} else {
 		if ((tp->t_timers->tt_flags & timer_type) == 0) {
 			tp->t_timers->tt_flags |= (timer_type | f_reset);
 			callout_reset_on(t_callout, delta, f_callout, tp, cpu);
 		} else {
 			/* Reset already running callout on the same CPU. */
 			if (!callout_reset(t_callout, delta, f_callout, tp)) {
 				/*
 				 * Callout not cancelled, consider it as not
 				 * properly restarted. */
 				tp->t_timers->tt_flags &= ~f_reset;
 			}
 		}
 	}
 }
 
 int
 tcp_timer_active(struct tcpcb *tp, uint32_t timer_type)
 {
 	struct callout *t_callout;
 
 	switch (timer_type) {
 		case TT_DELACK:
 			t_callout = &tp->t_timers->tt_delack;
 			break;
 		case TT_REXMT:
 			t_callout = &tp->t_timers->tt_rexmt;
 			break;
 		case TT_PERSIST:
 			t_callout = &tp->t_timers->tt_persist;
 			break;
 		case TT_KEEP:
 			t_callout = &tp->t_timers->tt_keep;
 			break;
 		case TT_2MSL:
 			t_callout = &tp->t_timers->tt_2msl;
 			break;
 		default:
 			if (tp->t_fb->tfb_tcp_timer_active) {
 				return(tp->t_fb->tfb_tcp_timer_active(tp, timer_type));
 			}
 			panic("tp %p bad timer_type %#x", tp, timer_type);
 		}
 	return callout_active(t_callout);
 }
 
 void
 tcp_timer_stop(struct tcpcb *tp, uint32_t timer_type)
 {
 	struct callout *t_callout;
 	timeout_t *f_callout;
 	uint32_t f_reset;
 
 	tp->t_timers->tt_flags |= TT_STOPPED;
 
 	switch (timer_type) {
 		case TT_DELACK:
 			t_callout = &tp->t_timers->tt_delack;
 			f_callout = tcp_timer_delack_discard;
 			f_reset = TT_DELACK_RST;
 			break;
 		case TT_REXMT:
 			t_callout = &tp->t_timers->tt_rexmt;
 			f_callout = tcp_timer_rexmt_discard;
 			f_reset = TT_REXMT_RST;
 			break;
 		case TT_PERSIST:
 			t_callout = &tp->t_timers->tt_persist;
 			f_callout = tcp_timer_persist_discard;
 			f_reset = TT_PERSIST_RST;
 			break;
 		case TT_KEEP:
 			t_callout = &tp->t_timers->tt_keep;
 			f_callout = tcp_timer_keep_discard;
 			f_reset = TT_KEEP_RST;
 			break;
 		case TT_2MSL:
 			t_callout = &tp->t_timers->tt_2msl;
 			f_callout = tcp_timer_2msl_discard;
 			f_reset = TT_2MSL_RST;
 			break;
 		default:
 			if (tp->t_fb->tfb_tcp_timer_stop) {
 				/* 
 				 * XXXrrs we need to look at this with the
 				 * stop case below (flags).
 				 */
 				tp->t_fb->tfb_tcp_timer_stop(tp, timer_type);
 				return;
 			}
 			panic("tp %p bad timer_type %#x", tp, timer_type);
 		}
 
 	if (tp->t_timers->tt_flags & timer_type) {
 		if ((callout_stop(t_callout) > 0) &&
 		    (tp->t_timers->tt_flags & f_reset)) {
 			tp->t_timers->tt_flags &= ~(timer_type | f_reset);
 		} else {
 			/*
 			 * Can't stop the callout, defer tcpcb actual deletion
 			 * to the last tcp timer discard callout.
 			 * The TT_STOPPED flag will ensure that no tcp timer
 			 * callouts can be restarted on our behalf, and
 			 * past this point currently running callouts waiting
 			 * on inp lock will return right away after the
 			 * classical check for callout reset/stop events:
 			 * callout_pending() || !callout_active()
 			 */
 			callout_reset(t_callout, 1, f_callout, tp);
 		}
 	}
 }
 
 #define	ticks_to_msecs(t)	(1000*(t) / hz)
 
 void
 tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer,
     struct xtcp_timer *xtimer)
 {
 	sbintime_t now;
 
 	bzero(xtimer, sizeof(*xtimer));
 	if (timer == NULL)
 		return;
 	now = getsbinuptime();
 	if (callout_active(&timer->tt_delack))
 		xtimer->tt_delack = (timer->tt_delack.c_time - now) / SBT_1MS;
 	if (callout_active(&timer->tt_rexmt))
 		xtimer->tt_rexmt = (timer->tt_rexmt.c_time - now) / SBT_1MS;
 	if (callout_active(&timer->tt_persist))
 		xtimer->tt_persist = (timer->tt_persist.c_time - now) / SBT_1MS;
 	if (callout_active(&timer->tt_keep))
 		xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS;
 	if (callout_active(&timer->tt_2msl))
 		xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS;
 	xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime);
 }
Index: projects/release-pkg/sys/netinet/tcp_usrreq.c
===================================================================
--- projects/release-pkg/sys/netinet/tcp_usrreq.c	(revision 293335)
+++ projects/release-pkg/sys/netinet/tcp_usrreq.c	(revision 293336)
@@ -1,2280 +1,2278 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.
  * Copyright (c) 2006-2007 Robert N. M. Watson
  * Copyright (c) 2010-2011 Juniper Networks, Inc.
  * All rights reserved.
  *
  * Portions of this software were developed by Robert N. M. Watson under
  * contract to Juniper Networks, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	From: @(#)tcp_usrreq.c	8.2 (Berkeley) 1/3/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_tcpdebug.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/refcount.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/mbuf.h>
 #ifdef INET6
 #include <sys/domain.h>
 #endif /* INET6 */
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/protosw.h>
 #include <sys/proc.h>
 #include <sys/jail.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/cc.h>
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #endif
 #ifdef TCP_RFC7413
 #include <netinet/tcp_fastopen.h>
 #endif
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcpip.h>
 #ifdef TCPPCAP
 #include <netinet/tcp_pcap.h>
 #endif
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif
 #ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
 #endif
 
 /*
  * TCP protocol interface to socket abstraction.
  */
 static int	tcp_attach(struct socket *);
 #ifdef INET
 static int	tcp_connect(struct tcpcb *, struct sockaddr *,
 		    struct thread *td);
 #endif /* INET */
 #ifdef INET6
 static int	tcp6_connect(struct tcpcb *, struct sockaddr *,
 		    struct thread *td);
 #endif /* INET6 */
 static void	tcp_disconnect(struct tcpcb *);
 static void	tcp_usrclosed(struct tcpcb *);
 static void	tcp_fill_info(struct tcpcb *, struct tcp_info *);
 
 #ifdef TCPDEBUG
 #define	TCPDEBUG0	int ostate = 0
 #define	TCPDEBUG1()	ostate = tp ? tp->t_state : 0
 #define	TCPDEBUG2(req)	if (tp && (so->so_options & SO_DEBUG)) \
 				tcp_trace(TA_USER, ostate, tp, 0, 0, req)
 #else
 #define	TCPDEBUG0
 #define	TCPDEBUG1()
 #define	TCPDEBUG2(req)
 #endif
 
 /*
  * TCP attaches to socket via pru_attach(), reserving space,
  * and an internet control block.
  */
 static int
 tcp_usr_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct inpcb *inp;
 	struct tcpcb *tp = NULL;
 	int error;
 	TCPDEBUG0;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL"));
 	TCPDEBUG1();
 
 	error = tcp_attach(so);
 	if (error)
 		goto out;
 
 	if ((so->so_options & SO_LINGER) && so->so_linger == 0)
 		so->so_linger = TCP_LINGERTIME;
 
 	inp = sotoinpcb(so);
 	tp = intotcpcb(inp);
 out:
 	TCPDEBUG2(PRU_ATTACH);
 	TCP_PROBE2(debug__user, tp, PRU_ATTACH);
 	return error;
 }
 
 /*
  * tcp_detach is called when the socket layer loses its final reference
  * to the socket, be it a file descriptor reference, a reference from TCP,
  * etc.  At this point, there is only one case in which we will keep around
  * inpcb state: time wait.
  *
  * This function can probably be re-absorbed back into tcp_usr_detach() now
  * that there is a single detach path.
  */
 static void
 tcp_detach(struct socket *so, struct inpcb *inp)
 {
 	struct tcpcb *tp;
 
 	INP_INFO_LOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	KASSERT(so->so_pcb == inp, ("tcp_detach: so_pcb != inp"));
 	KASSERT(inp->inp_socket == so, ("tcp_detach: inp_socket != so"));
 
 	tp = intotcpcb(inp);
 
 	if (inp->inp_flags & INP_TIMEWAIT) {
 		/*
 		 * There are two cases to handle: one in which the time wait
 		 * state is being discarded (INP_DROPPED), and one in which
 		 * this connection will remain in timewait.  In the former,
 		 * it is time to discard all state (except tcptw, which has
 		 * already been discarded by the timewait close code, which
 		 * should be further up the call stack somewhere).  In the
 		 * latter case, we detach from the socket, but leave the pcb
 		 * present until timewait ends.
 		 *
 		 * XXXRW: Would it be cleaner to free the tcptw here?
 		 *
 		 * Astute question indeed, from twtcp perspective there are
 		 * three cases to consider:
 		 *
 		 * #1 tcp_detach is called at tcptw creation time by
 		 *  tcp_twstart, then do not discard the newly created tcptw
 		 *  and leave inpcb present until timewait ends
 		 * #2 tcp_detach is called at timewait end (or reuse) by
 		 *  tcp_twclose, then the tcptw has already been discarded
 		 *  (or reused) and inpcb is freed here
 		 * #3 tcp_detach is called() after timewait ends (or reuse)
 		 *  (e.g. by soclose), then tcptw has already been discarded
 		 *  (or reused) and inpcb is freed here
 		 *
 		 *  In all three cases the tcptw should not be freed here.
 		 */
 		if (inp->inp_flags & INP_DROPPED) {
 			KASSERT(tp == NULL, ("tcp_detach: INP_TIMEWAIT && "
 			    "INP_DROPPED && tp != NULL"));
 			in_pcbdetach(inp);
 			in_pcbfree(inp);
 		} else {
 			in_pcbdetach(inp);
 			INP_WUNLOCK(inp);
 		}
 	} else {
 		/*
 		 * If the connection is not in timewait, we consider two
 		 * two conditions: one in which no further processing is
 		 * necessary (dropped || embryonic), and one in which TCP is
 		 * not yet done, but no longer requires the socket, so the
 		 * pcb will persist for the time being.
 		 *
 		 * XXXRW: Does the second case still occur?
 		 */
 		if (inp->inp_flags & INP_DROPPED ||
 		    tp->t_state < TCPS_SYN_SENT) {
 			tcp_discardcb(tp);
 			in_pcbdetach(inp);
 			in_pcbfree(inp);
 		} else {
 			in_pcbdetach(inp);
 			INP_WUNLOCK(inp);
 		}
 	}
 }
 
 /*
  * pru_detach() detaches the TCP protocol from the socket.
  * If the protocol state is non-embryonic, then can't
  * do this directly: have to initiate a pru_disconnect(),
  * which may finish later; embryonic TCB's can just
  * be discarded here.
  */
 static void
 tcp_usr_detach(struct socket *so)
 {
 	struct inpcb *inp;
 	int rlock = 0;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL"));
 	if (!INP_INFO_WLOCKED(&V_tcbinfo)) {
 		INP_INFO_RLOCK(&V_tcbinfo);
 		rlock = 1;
 	}
 	INP_WLOCK(inp);
 	KASSERT(inp->inp_socket != NULL,
 	    ("tcp_usr_detach: inp_socket == NULL"));
 	tcp_detach(so, inp);
 	if (rlock)
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 }
 
 #ifdef INET
 /*
  * Give the socket an address.
  */
 static int
 tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp = NULL;
 	struct sockaddr_in *sinp;
 
 	sinp = (struct sockaddr_in *)nam;
 	if (nam->sa_len != sizeof (*sinp))
 		return (EINVAL);
 	/*
 	 * Must check for multicast addresses and disallow binding
 	 * to them.
 	 */
 	if (sinp->sin_family == AF_INET &&
 	    IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
 		return (EAFNOSUPPORT);
 
 	TCPDEBUG0;
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 		error = EINVAL;
 		goto out;
 	}
 	tp = intotcpcb(inp);
 	TCPDEBUG1();
 	INP_HASH_WLOCK(&V_tcbinfo);
 	error = in_pcbbind(inp, nam, td->td_ucred);
 	INP_HASH_WUNLOCK(&V_tcbinfo);
 out:
 	TCPDEBUG2(PRU_BIND);
 	TCP_PROBE2(debug__user, tp, PRU_BIND);
 	INP_WUNLOCK(inp);
 
 	return (error);
 }
 #endif /* INET */
 
 #ifdef INET6
 static int
 tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp = NULL;
 	struct sockaddr_in6 *sin6p;
 
 	sin6p = (struct sockaddr_in6 *)nam;
 	if (nam->sa_len != sizeof (*sin6p))
 		return (EINVAL);
 	/*
 	 * Must check for multicast addresses and disallow binding
 	 * to them.
 	 */
 	if (sin6p->sin6_family == AF_INET6 &&
 	    IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
 		return (EAFNOSUPPORT);
 
 	TCPDEBUG0;
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 		error = EINVAL;
 		goto out;
 	}
 	tp = intotcpcb(inp);
 	TCPDEBUG1();
 	INP_HASH_WLOCK(&V_tcbinfo);
 	inp->inp_vflag &= ~INP_IPV4;
 	inp->inp_vflag |= INP_IPV6;
 #ifdef INET
 	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr))
 			inp->inp_vflag |= INP_IPV4;
 		else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
 			struct sockaddr_in sin;
 
 			in6_sin6_2_sin(&sin, sin6p);
 			inp->inp_vflag |= INP_IPV4;
 			inp->inp_vflag &= ~INP_IPV6;
 			error = in_pcbbind(inp, (struct sockaddr *)&sin,
 			    td->td_ucred);
 			INP_HASH_WUNLOCK(&V_tcbinfo);
 			goto out;
 		}
 	}
 #endif
 	error = in6_pcbbind(inp, nam, td->td_ucred);
 	INP_HASH_WUNLOCK(&V_tcbinfo);
 out:
 	TCPDEBUG2(PRU_BIND);
 	TCP_PROBE2(debug__user, tp, PRU_BIND);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 #endif /* INET6 */
 
 #ifdef INET
 /*
  * Prepare to accept connections.
  */
 static int
 tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
 {
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp = NULL;
 
 	TCPDEBUG0;
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 		error = EINVAL;
 		goto out;
 	}
 	tp = intotcpcb(inp);
 	TCPDEBUG1();
 	SOCK_LOCK(so);
 	error = solisten_proto_check(so);
 	INP_HASH_WLOCK(&V_tcbinfo);
 	if (error == 0 && inp->inp_lport == 0)
 		error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
 	INP_HASH_WUNLOCK(&V_tcbinfo);
 	if (error == 0) {
 		tcp_state_change(tp, TCPS_LISTEN);
 		solisten_proto(so, backlog);
 #ifdef TCP_OFFLOAD
 		if ((so->so_options & SO_NO_OFFLOAD) == 0)
 			tcp_offload_listen_start(tp);
 #endif
 	}
 	SOCK_UNLOCK(so);
 
 #ifdef TCP_RFC7413
 	if (tp->t_flags & TF_FASTOPEN)
 		tp->t_tfo_pending = tcp_fastopen_alloc_counter();
 #endif
 out:
 	TCPDEBUG2(PRU_LISTEN);
 	TCP_PROBE2(debug__user, tp, PRU_LISTEN);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 #endif /* INET */
 
 #ifdef INET6
 static int
 tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
 {
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp = NULL;
 
 	TCPDEBUG0;
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 		error = EINVAL;
 		goto out;
 	}
 	tp = intotcpcb(inp);
 	TCPDEBUG1();
 	SOCK_LOCK(so);
 	error = solisten_proto_check(so);
 	INP_HASH_WLOCK(&V_tcbinfo);
 	if (error == 0 && inp->inp_lport == 0) {
 		inp->inp_vflag &= ~INP_IPV4;
 		if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
 			inp->inp_vflag |= INP_IPV4;
 		error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
 	}
 	INP_HASH_WUNLOCK(&V_tcbinfo);
 	if (error == 0) {
 		tcp_state_change(tp, TCPS_LISTEN);
 		solisten_proto(so, backlog);
 #ifdef TCP_OFFLOAD
 		if ((so->so_options & SO_NO_OFFLOAD) == 0)
 			tcp_offload_listen_start(tp);
 #endif
 	}
 	SOCK_UNLOCK(so);
 
 #ifdef TCP_RFC7413
 	if (tp->t_flags & TF_FASTOPEN)
 		tp->t_tfo_pending = tcp_fastopen_alloc_counter();
 #endif
 out:
 	TCPDEBUG2(PRU_LISTEN);
 	TCP_PROBE2(debug__user, tp, PRU_LISTEN);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 #endif /* INET6 */
 
 #ifdef INET
 /*
  * Initiate connection to peer.
  * Create a template for use in transmissions on this connection.
  * Enter SYN_SENT state, and mark socket as connecting.
  * Start keep-alive timer, and seed output sequence space.
  * Send initial segment on connection.
  */
 static int
 tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp = NULL;
 	struct sockaddr_in *sinp;
 
 	sinp = (struct sockaddr_in *)nam;
 	if (nam->sa_len != sizeof (*sinp))
 		return (EINVAL);
 	/*
 	 * Must disallow TCP ``connections'' to multicast addresses.
 	 */
 	if (sinp->sin_family == AF_INET
 	    && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
 		return (EAFNOSUPPORT);
 	if ((error = prison_remote_ip4(td->td_ucred, &sinp->sin_addr)) != 0)
 		return (error);
 
 	TCPDEBUG0;
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_TIMEWAIT) {
 		error = EADDRINUSE;
 		goto out;
 	}
 	if (inp->inp_flags & INP_DROPPED) {
 		error = ECONNREFUSED;
 		goto out;
 	}
 	tp = intotcpcb(inp);
 	TCPDEBUG1();
 	if ((error = tcp_connect(tp, nam, td)) != 0)
 		goto out;
 #ifdef TCP_OFFLOAD
 	if (registered_toedevs > 0 &&
 	    (so->so_options & SO_NO_OFFLOAD) == 0 &&
 	    (error = tcp_offload_connect(so, nam)) == 0)
 		goto out;
 #endif
 	tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
 	error = tp->t_fb->tfb_tcp_output(tp);
 out:
 	TCPDEBUG2(PRU_CONNECT);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 #endif /* INET */
 
 #ifdef INET6
 static int
 tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp = NULL;
 	struct sockaddr_in6 *sin6p;
 
 	TCPDEBUG0;
 
 	sin6p = (struct sockaddr_in6 *)nam;
 	if (nam->sa_len != sizeof (*sin6p))
 		return (EINVAL);
 	/*
 	 * Must disallow TCP ``connections'' to multicast addresses.
 	 */
 	if (sin6p->sin6_family == AF_INET6
 	    && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
 		return (EAFNOSUPPORT);
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_TIMEWAIT) {
 		error = EADDRINUSE;
 		goto out;
 	}
 	if (inp->inp_flags & INP_DROPPED) {
 		error = ECONNREFUSED;
 		goto out;
 	}
 	tp = intotcpcb(inp);
 	TCPDEBUG1();
 #ifdef INET
 	/*
 	 * XXXRW: Some confusion: V4/V6 flags relate to binding, and
 	 * therefore probably require the hash lock, which isn't held here.
 	 * Is this a significant problem?
 	 */
 	if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
 		struct sockaddr_in sin;
 
 		if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
 			error = EINVAL;
 			goto out;
 		}
 
 		in6_sin6_2_sin(&sin, sin6p);
 		inp->inp_vflag |= INP_IPV4;
 		inp->inp_vflag &= ~INP_IPV6;
 		if ((error = prison_remote_ip4(td->td_ucred,
 		    &sin.sin_addr)) != 0)
 			goto out;
 		if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0)
 			goto out;
 #ifdef TCP_OFFLOAD
 		if (registered_toedevs > 0 &&
 		    (so->so_options & SO_NO_OFFLOAD) == 0 &&
 		    (error = tcp_offload_connect(so, nam)) == 0)
 			goto out;
 #endif
 		error = tp->t_fb->tfb_tcp_output(tp);
 		goto out;
 	}
 #endif
 	inp->inp_vflag &= ~INP_IPV4;
 	inp->inp_vflag |= INP_IPV6;
 	inp->inp_inc.inc_flags |= INC_ISIPV6;
 	if ((error = prison_remote_ip6(td->td_ucred, &sin6p->sin6_addr)) != 0)
 		goto out;
 	if ((error = tcp6_connect(tp, nam, td)) != 0)
 		goto out;
 #ifdef TCP_OFFLOAD
 	if (registered_toedevs > 0 &&
 	    (so->so_options & SO_NO_OFFLOAD) == 0 &&
 	    (error = tcp_offload_connect(so, nam)) == 0)
 		goto out;
 #endif
 	tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
 	error = tp->t_fb->tfb_tcp_output(tp);
 
 out:
 	TCPDEBUG2(PRU_CONNECT);
 	TCP_PROBE2(debug__user, tp, PRU_CONNECT);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 #endif /* INET6 */
 
 /*
  * Initiate disconnect from peer.
  * If connection never passed embryonic stage, just drop;
  * else if don't need to let data drain, then can just drop anyways,
  * else have to begin TCP shutdown process: mark socket disconnecting,
  * drain unread data, state switch to reflect user close, and
  * send segment (e.g. FIN) to peer.  Socket will be really disconnected
  * when peer sends FIN and acks ours.
  *
  * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
  */
 static int
 tcp_usr_disconnect(struct socket *so)
 {
 	struct inpcb *inp;
 	struct tcpcb *tp = NULL;
 	int error = 0;
 
 	TCPDEBUG0;
 	INP_INFO_RLOCK(&V_tcbinfo);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & INP_TIMEWAIT)
 		goto out;
 	if (inp->inp_flags & INP_DROPPED) {
 		error = ECONNRESET;
 		goto out;
 	}
 	tp = intotcpcb(inp);
 	TCPDEBUG1();
 	tcp_disconnect(tp);
 out:
 	TCPDEBUG2(PRU_DISCONNECT);
 	TCP_PROBE2(debug__user, tp, PRU_DISCONNECT);
 	INP_WUNLOCK(inp);
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 	return (error);
 }
 
 #ifdef INET
 /*
  * Accept a connection.  Essentially all the work is done at higher levels;
  * just return the address of the peer, storing through addr.
  */
 static int
 tcp_usr_accept(struct socket *so, struct sockaddr **nam)
 {
 	int error = 0;
 	struct inpcb *inp = NULL;
 	struct tcpcb *tp = NULL;
 	struct in_addr addr;
 	in_port_t port = 0;
 	TCPDEBUG0;
 
 	if (so->so_state & SS_ISDISCONNECTED)
 		return (ECONNABORTED);
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 		error = ECONNABORTED;
 		goto out;
 	}
 	tp = intotcpcb(inp);
 	TCPDEBUG1();
 
 	/*
 	 * We inline in_getpeeraddr and COMMON_END here, so that we can
 	 * copy the data of interest and defer the malloc until after we
 	 * release the lock.
 	 */
 	port = inp->inp_fport;
 	addr = inp->inp_faddr;
 
 out:
 	TCPDEBUG2(PRU_ACCEPT);
 	TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
 	INP_WUNLOCK(inp);
 	if (error == 0)
 		*nam = in_sockaddr(port, &addr);
 	return error;
 }
 #endif /* INET */
 
 #ifdef INET6
 static int
 tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
 {
 	struct inpcb *inp = NULL;
 	int error = 0;
 	struct tcpcb *tp = NULL;
 	struct in_addr addr;
 	struct in6_addr addr6;
 	in_port_t port = 0;
 	int v4 = 0;
 	TCPDEBUG0;
 
 	if (so->so_state & SS_ISDISCONNECTED)
 		return (ECONNABORTED);
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL"));
 	INP_INFO_RLOCK(&V_tcbinfo);
 	INP_WLOCK(inp);
 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 		error = ECONNABORTED;
 		goto out;
 	}
 	tp = intotcpcb(inp);
 	TCPDEBUG1();
 
 	/*
 	 * We inline in6_mapped_peeraddr and COMMON_END here, so that we can
 	 * copy the data of interest and defer the malloc until after we
 	 * release the lock.
 	 */
 	if (inp->inp_vflag & INP_IPV4) {
 		v4 = 1;
 		port = inp->inp_fport;
 		addr = inp->inp_faddr;
 	} else {
 		port = inp->inp_fport;
 		addr6 = inp->in6p_faddr;
 	}
 
 out:
 	TCPDEBUG2(PRU_ACCEPT);
 	TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
 	INP_WUNLOCK(inp);
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 	if (error == 0) {
 		if (v4)
 			*nam = in6_v4mapsin6_sockaddr(port, &addr);
 		else
 			*nam = in6_sockaddr(port, &addr6);
 	}
 	return error;
 }
 #endif /* INET6 */
 
 /*
  * Mark the connection as being incapable of further output.
  */
 static int
 tcp_usr_shutdown(struct socket *so)
 {
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp = NULL;
 
 	TCPDEBUG0;
 	INP_INFO_RLOCK(&V_tcbinfo);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 		error = ECONNRESET;
 		goto out;
 	}
 	tp = intotcpcb(inp);
 	TCPDEBUG1();
 	socantsendmore(so);
 	tcp_usrclosed(tp);
 	if (!(inp->inp_flags & INP_DROPPED))
 		error = tp->t_fb->tfb_tcp_output(tp);
 
 out:
 	TCPDEBUG2(PRU_SHUTDOWN);
 	TCP_PROBE2(debug__user, tp, PRU_SHUTDOWN);
 	INP_WUNLOCK(inp);
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 
 	return (error);
 }
 
 /*
  * After a receive, possibly send window update to peer.
  */
 static int
 tcp_usr_rcvd(struct socket *so, int flags)
 {
 	struct inpcb *inp;
 	struct tcpcb *tp = NULL;
 	int error = 0;
 
 	TCPDEBUG0;
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 		error = ECONNRESET;
 		goto out;
 	}
 	tp = intotcpcb(inp);
 	TCPDEBUG1();
 #ifdef TCP_RFC7413
 	/*
 	 * For passively-created TFO connections, don't attempt a window
 	 * update while still in SYN_RECEIVED as this may trigger an early
 	 * SYN|ACK.  It is preferable to have the SYN|ACK be sent along with
 	 * application response data, or failing that, when the DELACK timer
 	 * expires.
 	 */
 	if ((tp->t_flags & TF_FASTOPEN) &&
 	    (tp->t_state == TCPS_SYN_RECEIVED))
 		goto out;
 #endif
 #ifdef TCP_OFFLOAD
 	if (tp->t_flags & TF_TOE)
 		tcp_offload_rcvd(tp);
 	else
 #endif
 	tp->t_fb->tfb_tcp_output(tp);
 
 out:
 	TCPDEBUG2(PRU_RCVD);
 	TCP_PROBE2(debug__user, tp, PRU_RCVD);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 /*
  * Do a send by putting data in output queue and updating urgent
  * marker if URG set.  Possibly send more data.  Unlike the other
  * pru_*() routines, the mbuf chains are our responsibility.  We
  * must either enqueue them or free them.  The other pru_* routines
  * generally are caller-frees.
  */
 static int
 tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
     struct sockaddr *nam, struct mbuf *control, struct thread *td)
 {
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp = NULL;
 #ifdef INET6
 	int isipv6;
 #endif
 	TCPDEBUG0;
 
 	/*
 	 * We require the pcbinfo lock if we will close the socket as part of
 	 * this call.
 	 */
 	if (flags & PRUS_EOF)
 		INP_INFO_RLOCK(&V_tcbinfo);
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 		if (control)
 			m_freem(control);
 		/*
 		 * In case of PRUS_NOTREADY, tcp_usr_ready() is responsible
 		 * for freeing memory.
 		 */
 		if (m && (flags & PRUS_NOTREADY) == 0)
 			m_freem(m);
 		error = ECONNRESET;
 		goto out;
 	}
 #ifdef INET6
 	isipv6 = nam && nam->sa_family == AF_INET6;
 #endif /* INET6 */
 	tp = intotcpcb(inp);
 	TCPDEBUG1();
 	if (control) {
 		/* TCP doesn't do control messages (rights, creds, etc) */
 		if (control->m_len) {
 			m_freem(control);
 			if (m)
 				m_freem(m);
 			error = EINVAL;
 			goto out;
 		}
 		m_freem(control);	/* empty control, just free it */
 	}
 	if (!(flags & PRUS_OOB)) {
 		sbappendstream(&so->so_snd, m, flags);
 		if (nam && tp->t_state < TCPS_SYN_SENT) {
 			/*
 			 * Do implied connect if not yet connected,
 			 * initialize window to default value, and
-			 * initialize maxseg/maxopd using peer's cached
-			 * MSS.
+			 * initialize maxseg using peer's cached MSS.
 			 */
 #ifdef INET6
 			if (isipv6)
 				error = tcp6_connect(tp, nam, td);
 #endif /* INET6 */
 #if defined(INET6) && defined(INET)
 			else
 #endif
 #ifdef INET
 				error = tcp_connect(tp, nam, td);
 #endif
 			if (error)
 				goto out;
 			tp->snd_wnd = TTCP_CLIENT_SND_WND;
 			tcp_mss(tp, -1);
 		}
 		if (flags & PRUS_EOF) {
 			/*
 			 * Close the send side of the connection after
 			 * the data is sent.
 			 */
 			INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 			socantsendmore(so);
 			tcp_usrclosed(tp);
 		}
 		if (!(inp->inp_flags & INP_DROPPED) &&
 		    !(flags & PRUS_NOTREADY)) {
 			if (flags & PRUS_MORETOCOME)
 				tp->t_flags |= TF_MORETOCOME;
 			error = tp->t_fb->tfb_tcp_output(tp);
 			if (flags & PRUS_MORETOCOME)
 				tp->t_flags &= ~TF_MORETOCOME;
 		}
 	} else {
 		/*
 		 * XXXRW: PRUS_EOF not implemented with PRUS_OOB?
 		 */
 		SOCKBUF_LOCK(&so->so_snd);
 		if (sbspace(&so->so_snd) < -512) {
 			SOCKBUF_UNLOCK(&so->so_snd);
 			m_freem(m);
 			error = ENOBUFS;
 			goto out;
 		}
 		/*
 		 * According to RFC961 (Assigned Protocols),
 		 * the urgent pointer points to the last octet
 		 * of urgent data.  We continue, however,
 		 * to consider it to indicate the first octet
 		 * of data past the urgent section.
 		 * Otherwise, snd_up should be one lower.
 		 */
 		sbappendstream_locked(&so->so_snd, m, flags);
 		SOCKBUF_UNLOCK(&so->so_snd);
 		if (nam && tp->t_state < TCPS_SYN_SENT) {
 			/*
 			 * Do implied connect if not yet connected,
 			 * initialize window to default value, and
-			 * initialize maxseg/maxopd using peer's cached
-			 * MSS.
+			 * initialize maxseg using peer's cached MSS.
 			 */
 #ifdef INET6
 			if (isipv6)
 				error = tcp6_connect(tp, nam, td);
 #endif /* INET6 */
 #if defined(INET6) && defined(INET)
 			else
 #endif
 #ifdef INET
 				error = tcp_connect(tp, nam, td);
 #endif
 			if (error)
 				goto out;
 			tp->snd_wnd = TTCP_CLIENT_SND_WND;
 			tcp_mss(tp, -1);
 		}
 		tp->snd_up = tp->snd_una + sbavail(&so->so_snd);
 		if (!(flags & PRUS_NOTREADY)) {
 			tp->t_flags |= TF_FORCEDATA;
 			error = tp->t_fb->tfb_tcp_output(tp);
 			tp->t_flags &= ~TF_FORCEDATA;
 		}
 	}
 out:
 	TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB :
 		  ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
 	TCP_PROBE2(debug__user, tp, (flags & PRUS_OOB) ? PRU_SENDOOB :
 		   ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
 	INP_WUNLOCK(inp);
 	if (flags & PRUS_EOF)
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 	return (error);
 }
 
 static int
 tcp_usr_ready(struct socket *so, struct mbuf *m, int count)
 {
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	int error;
 
 	inp = sotoinpcb(so);
 	INP_WLOCK(inp);
 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 		INP_WUNLOCK(inp);
 		for (int i = 0; i < count; i++)
 			m = m_free(m);
 		return (ECONNRESET);
 	}
 	tp = intotcpcb(inp);
 
 	SOCKBUF_LOCK(&so->so_snd);
 	error = sbready(&so->so_snd, m, count);
 	SOCKBUF_UNLOCK(&so->so_snd);
 	if (error == 0)
 		error = tp->t_fb->tfb_tcp_output(tp);
 	INP_WUNLOCK(inp);
 
 	return (error);
 }
 
 /*
  * Abort the TCP.  Drop the connection abruptly.
  */
 static void
 tcp_usr_abort(struct socket *so)
 {
 	struct inpcb *inp;
 	struct tcpcb *tp = NULL;
 	TCPDEBUG0;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL"));
 
 	INP_INFO_RLOCK(&V_tcbinfo);
 	INP_WLOCK(inp);
 	KASSERT(inp->inp_socket != NULL,
 	    ("tcp_usr_abort: inp_socket == NULL"));
 
 	/*
 	 * If we still have full TCP state, and we're not dropped, drop.
 	 */
 	if (!(inp->inp_flags & INP_TIMEWAIT) &&
 	    !(inp->inp_flags & INP_DROPPED)) {
 		tp = intotcpcb(inp);
 		TCPDEBUG1();
 		tcp_drop(tp, ECONNABORTED);
 		TCPDEBUG2(PRU_ABORT);
 		TCP_PROBE2(debug__user, tp, PRU_ABORT);
 	}
 	if (!(inp->inp_flags & INP_DROPPED)) {
 		SOCK_LOCK(so);
 		so->so_state |= SS_PROTOREF;
 		SOCK_UNLOCK(so);
 		inp->inp_flags |= INP_SOCKREF;
 	}
 	INP_WUNLOCK(inp);
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 }
 
 /*
  * TCP socket is closed.  Start friendly disconnect.
  */
 static void
 tcp_usr_close(struct socket *so)
 {
 	struct inpcb *inp;
 	struct tcpcb *tp = NULL;
 	TCPDEBUG0;
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL"));
 
 	INP_INFO_RLOCK(&V_tcbinfo);
 	INP_WLOCK(inp);
 	KASSERT(inp->inp_socket != NULL,
 	    ("tcp_usr_close: inp_socket == NULL"));
 
 	/*
 	 * If we still have full TCP state, and we're not dropped, initiate
 	 * a disconnect.
 	 */
 	if (!(inp->inp_flags & INP_TIMEWAIT) &&
 	    !(inp->inp_flags & INP_DROPPED)) {
 		tp = intotcpcb(inp);
 		TCPDEBUG1();
 		tcp_disconnect(tp);
 		TCPDEBUG2(PRU_CLOSE);
 		TCP_PROBE2(debug__user, tp, PRU_CLOSE);
 	}
 	if (!(inp->inp_flags & INP_DROPPED)) {
 		SOCK_LOCK(so);
 		so->so_state |= SS_PROTOREF;
 		SOCK_UNLOCK(so);
 		inp->inp_flags |= INP_SOCKREF;
 	}
 	INP_WUNLOCK(inp);
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 }
 
 /*
  * Receive out-of-band data.
  */
 static int
 tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
 {
 	int error = 0;
 	struct inpcb *inp;
 	struct tcpcb *tp = NULL;
 
 	TCPDEBUG0;
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL"));
 	INP_WLOCK(inp);
 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 		error = ECONNRESET;
 		goto out;
 	}
 	tp = intotcpcb(inp);
 	TCPDEBUG1();
 	if ((so->so_oobmark == 0 &&
 	     (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
 	    so->so_options & SO_OOBINLINE ||
 	    tp->t_oobflags & TCPOOB_HADDATA) {
 		error = EINVAL;
 		goto out;
 	}
 	if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
 		error = EWOULDBLOCK;
 		goto out;
 	}
 	m->m_len = 1;
 	*mtod(m, caddr_t) = tp->t_iobc;
 	if ((flags & MSG_PEEK) == 0)
 		tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
 
 out:
 	TCPDEBUG2(PRU_RCVOOB);
 	TCP_PROBE2(debug__user, tp, PRU_RCVOOB);
 	INP_WUNLOCK(inp);
 	return (error);
 }
 
 #ifdef INET
 struct pr_usrreqs tcp_usrreqs = {
 	.pru_abort =		tcp_usr_abort,
 	.pru_accept =		tcp_usr_accept,
 	.pru_attach =		tcp_usr_attach,
 	.pru_bind =		tcp_usr_bind,
 	.pru_connect =		tcp_usr_connect,
 	.pru_control =		in_control,
 	.pru_detach =		tcp_usr_detach,
 	.pru_disconnect =	tcp_usr_disconnect,
 	.pru_listen =		tcp_usr_listen,
 	.pru_peeraddr =		in_getpeeraddr,
 	.pru_rcvd =		tcp_usr_rcvd,
 	.pru_rcvoob =		tcp_usr_rcvoob,
 	.pru_send =		tcp_usr_send,
 	.pru_ready =		tcp_usr_ready,
 	.pru_shutdown =		tcp_usr_shutdown,
 	.pru_sockaddr =		in_getsockaddr,
 	.pru_sosetlabel =	in_pcbsosetlabel,
 	.pru_close =		tcp_usr_close,
 };
 #endif /* INET */
 
 #ifdef INET6
 struct pr_usrreqs tcp6_usrreqs = {
 	.pru_abort =		tcp_usr_abort,
 	.pru_accept =		tcp6_usr_accept,
 	.pru_attach =		tcp_usr_attach,
 	.pru_bind =		tcp6_usr_bind,
 	.pru_connect =		tcp6_usr_connect,
 	.pru_control =		in6_control,
 	.pru_detach =		tcp_usr_detach,
 	.pru_disconnect =	tcp_usr_disconnect,
 	.pru_listen =		tcp6_usr_listen,
 	.pru_peeraddr =		in6_mapped_peeraddr,
 	.pru_rcvd =		tcp_usr_rcvd,
 	.pru_rcvoob =		tcp_usr_rcvoob,
 	.pru_send =		tcp_usr_send,
 	.pru_ready =		tcp_usr_ready,
 	.pru_shutdown =		tcp_usr_shutdown,
 	.pru_sockaddr =		in6_mapped_sockaddr,
 	.pru_sosetlabel =	in_pcbsosetlabel,
 	.pru_close =		tcp_usr_close,
 };
 #endif /* INET6 */
 
 #ifdef INET
 /*
  * Common subroutine to open a TCP connection to remote host specified
  * by struct sockaddr_in in mbuf *nam.  Call in_pcbbind to assign a local
  * port number if needed.  Call in_pcbconnect_setup to do the routing and
  * to choose a local host address (interface).  If there is an existing
  * incarnation of the same connection in TIME-WAIT state and if the remote
  * host was sending CC options and if the connection duration was < MSL, then
  * truncate the previous TIME-WAIT state and proceed.
  * Initialize connection parameters and enter SYN-SENT state.
  */
 static int
 tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp = tp->t_inpcb, *oinp;
 	struct socket *so = inp->inp_socket;
 	struct in_addr laddr;
 	u_short lport;
 	int error;
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK(&V_tcbinfo);
 
 	if (inp->inp_lport == 0) {
 		error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
 		if (error)
 			goto out;
 	}
 
 	/*
 	 * Cannot simply call in_pcbconnect, because there might be an
 	 * earlier incarnation of this same connection still in
 	 * TIME_WAIT state, creating an ADDRINUSE error.
 	 */
 	laddr = inp->inp_laddr;
 	lport = inp->inp_lport;
 	error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport,
 	    &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred);
 	if (error && oinp == NULL)
 		goto out;
 	if (oinp) {
 		error = EADDRINUSE;
 		goto out;
 	}
 	inp->inp_laddr = laddr;
 	in_pcbrehash(inp);
 	INP_HASH_WUNLOCK(&V_tcbinfo);
 
 	/*
 	 * Compute window scaling to request:
 	 * Scale to fit into sweet spot.  See tcp_syncache.c.
 	 * XXX: This should move to tcp_output().
 	 */
 	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
 	    (TCP_MAXWIN << tp->request_r_scale) < sb_max)
 		tp->request_r_scale++;
 
 	soisconnecting(so);
 	TCPSTAT_INC(tcps_connattempt);
 	tcp_state_change(tp, TCPS_SYN_SENT);
 	tp->iss = tcp_new_isn(tp);
 	tcp_sendseqinit(tp);
 
 	return 0;
 
 out:
 	INP_HASH_WUNLOCK(&V_tcbinfo);
 	return (error);
 }
 #endif /* INET */
 
 #ifdef INET6
 static int
 tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
 {
 	struct inpcb *inp = tp->t_inpcb;
 	int error;
 
 	INP_WLOCK_ASSERT(inp);
 	INP_HASH_WLOCK(&V_tcbinfo);
 
 	if (inp->inp_lport == 0) {
 		error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
 		if (error)
 			goto out;
 	}
 	error = in6_pcbconnect(inp, nam, td->td_ucred);
 	if (error != 0)
 		goto out;
 	INP_HASH_WUNLOCK(&V_tcbinfo);
 
 	/* Compute window scaling to request.  */
 	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
 	    (TCP_MAXWIN << tp->request_r_scale) < sb_max)
 		tp->request_r_scale++;
 
 	soisconnecting(inp->inp_socket);
 	TCPSTAT_INC(tcps_connattempt);
 	tcp_state_change(tp, TCPS_SYN_SENT);
 	tp->iss = tcp_new_isn(tp);
 	tcp_sendseqinit(tp);
 
 	return 0;
 
 out:
 	INP_HASH_WUNLOCK(&V_tcbinfo);
 	return error;
 }
 #endif /* INET6 */
 
 /*
  * Export TCP internal state information via a struct tcp_info, based on the
  * Linux 2.6 API.  Not ABI compatible as our constants are mapped differently
  * (TCP state machine, etc).  We export all information using FreeBSD-native
  * constants -- for example, the numeric values for tcpi_state will differ
  * from Linux.
  */
 static void
 tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
 {
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 	bzero(ti, sizeof(*ti));
 
 	ti->tcpi_state = tp->t_state;
 	if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
 		ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
 	if (tp->t_flags & TF_SACK_PERMIT)
 		ti->tcpi_options |= TCPI_OPT_SACK;
 	if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
 		ti->tcpi_options |= TCPI_OPT_WSCALE;
 		ti->tcpi_snd_wscale = tp->snd_scale;
 		ti->tcpi_rcv_wscale = tp->rcv_scale;
 	}
 
 	ti->tcpi_rto = tp->t_rxtcur * tick;
 	ti->tcpi_last_data_recv = (long)(ticks - (int)tp->t_rcvtime) * tick;
 	ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT;
 	ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT;
 
 	ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
 	ti->tcpi_snd_cwnd = tp->snd_cwnd;
 
 	/*
 	 * FreeBSD-specific extension fields for tcp_info.
 	 */
 	ti->tcpi_rcv_space = tp->rcv_wnd;
 	ti->tcpi_rcv_nxt = tp->rcv_nxt;
 	ti->tcpi_snd_wnd = tp->snd_wnd;
 	ti->tcpi_snd_bwnd = 0;		/* Unused, kept for compat. */
 	ti->tcpi_snd_nxt = tp->snd_nxt;
 	ti->tcpi_snd_mss = tp->t_maxseg;
 	ti->tcpi_rcv_mss = tp->t_maxseg;
 	if (tp->t_flags & TF_TOE)
 		ti->tcpi_options |= TCPI_OPT_TOE;
 	ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack;
 	ti->tcpi_rcv_ooopack = tp->t_rcvoopack;
 	ti->tcpi_snd_zerowin = tp->t_sndzerowin;
 }
 
 /*
  * tcp_ctloutput() must drop the inpcb lock before performing copyin on
  * socket option arguments.  When it re-acquires the lock after the copy, it
  * has to revalidate that the connection is still valid for the socket
  * option.
  */
 #define INP_WLOCK_RECHECK(inp) do {					\
 	INP_WLOCK(inp);							\
 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {		\
 		INP_WUNLOCK(inp);					\
 		return (ECONNRESET);					\
 	}								\
 	tp = intotcpcb(inp);						\
 } while(0)
 
 int
 tcp_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	int	error;
 	struct	inpcb *inp;
 	struct	tcpcb *tp;
 	struct tcp_function_block *blk;
 	struct tcp_function_set fsn;
 
 	error = 0;
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL"));
 	INP_WLOCK(inp);
 	if (sopt->sopt_level != IPPROTO_TCP) {
 #ifdef INET6
 		if (inp->inp_vflag & INP_IPV6PROTO) {
 			INP_WUNLOCK(inp);
 			error = ip6_ctloutput(so, sopt);
 		}
 #endif /* INET6 */
 #if defined(INET6) && defined(INET)
 		else
 #endif
 #ifdef INET
 		{
 			INP_WUNLOCK(inp);
 			error = ip_ctloutput(so, sopt);
 		}
 #endif
 		return (error);
 	}
 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 		INP_WUNLOCK(inp);
 		return (ECONNRESET);
 	}
 	tp = intotcpcb(inp);
 	/*
 	 * Protect the TCP option TCP_FUNCTION_BLK so
 	 * that a sub-function can *never* overwrite this.
 	 */
 	if ((sopt->sopt_dir == SOPT_SET) && 
 	    (sopt->sopt_name == TCP_FUNCTION_BLK)) {
 		INP_WUNLOCK(inp);
 		error = sooptcopyin(sopt, &fsn, sizeof fsn,
 		    sizeof fsn);
 		if (error)
 			return (error);
 		INP_WLOCK_RECHECK(inp);
 		if (tp->t_state != TCPS_CLOSED) {
 			/* 
 			 * The user has advanced the state
 			 * past the initial point, we can't
 			 * switch since we are down the road
 			 * and a new set of functions may
 			 * not be compatibile.
 			 */
 			INP_WUNLOCK(inp);
 			return(EINVAL);
 		}
 		blk = find_and_ref_tcp_functions(&fsn);
 		if (blk == NULL) {
 			INP_WUNLOCK(inp);
 			return (ENOENT);
 		}
 		if (tp->t_fb != blk) {
 			if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) {
 				refcount_release(&blk->tfb_refcnt);
 				INP_WUNLOCK(inp);
 				return (ENOENT);
 			}
 			/* 
 			 * Release the old refcnt, the
 			 * lookup acquires a ref on the
 			 * new one.
 			 */
 			if (tp->t_fb->tfb_tcp_fb_fini)
 				(*tp->t_fb->tfb_tcp_fb_fini)(tp);
 			refcount_release(&tp->t_fb->tfb_refcnt);
 			tp->t_fb = blk;
 			if (tp->t_fb->tfb_tcp_fb_init) {
 				(*tp->t_fb->tfb_tcp_fb_init)(tp);
 			}
 		}
 #ifdef TCP_OFFLOAD
 		if (tp->t_flags & TF_TOE) {
 			tcp_offload_ctloutput(tp, sopt->sopt_dir,
 			     sopt->sopt_name);
 		}
 #endif
 		INP_WUNLOCK(inp);
 		return (error);
 	} else if ((sopt->sopt_dir == SOPT_GET) && 
 	    (sopt->sopt_name == TCP_FUNCTION_BLK)) {
 		strcpy(fsn.function_set_name, tp->t_fb->tfb_tcp_block_name);
 		fsn.pcbcnt = tp->t_fb->tfb_refcnt;
 		INP_WUNLOCK(inp);
 		error = sooptcopyout(sopt, &fsn, sizeof fsn);
 		return (error);
 	}
 	/* Pass in the INP locked, called must unlock it */
 	return (tp->t_fb->tfb_tcp_ctloutput(so, sopt, inp, tp));
 }
 
 int
 tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp)
 {
 	int	error, opt, optval;
 	u_int	ui;
 	struct	tcp_info ti;
 	struct cc_algo *algo;
 	char buf[TCP_CA_NAME_MAX];
 	
 	switch (sopt->sopt_dir) {
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 #ifdef TCP_SIGNATURE
 		case TCP_MD5SIG:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				return (error);
 
 			INP_WLOCK_RECHECK(inp);
 			if (optval > 0)
 				tp->t_flags |= TF_SIGNATURE;
 			else
 				tp->t_flags &= ~TF_SIGNATURE;
 			goto unlock_and_done;
 #endif /* TCP_SIGNATURE */
 
 		case TCP_NODELAY:
 		case TCP_NOOPT:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				return (error);
 
 			INP_WLOCK_RECHECK(inp);
 			switch (sopt->sopt_name) {
 			case TCP_NODELAY:
 				opt = TF_NODELAY;
 				break;
 			case TCP_NOOPT:
 				opt = TF_NOOPT;
 				break;
 			default:
 				opt = 0; /* dead code to fool gcc */
 				break;
 			}
 
 			if (optval)
 				tp->t_flags |= opt;
 			else
 				tp->t_flags &= ~opt;
 unlock_and_done:
 #ifdef TCP_OFFLOAD
 			if (tp->t_flags & TF_TOE) {
 				tcp_offload_ctloutput(tp, sopt->sopt_dir,
 				    sopt->sopt_name);
 			}
 #endif
 			INP_WUNLOCK(inp);
 			break;
 
 		case TCP_NOPUSH:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				return (error);
 
 			INP_WLOCK_RECHECK(inp);
 			if (optval)
 				tp->t_flags |= TF_NOPUSH;
 			else if (tp->t_flags & TF_NOPUSH) {
 				tp->t_flags &= ~TF_NOPUSH;
 				if (TCPS_HAVEESTABLISHED(tp->t_state))
 					error = tp->t_fb->tfb_tcp_output(tp);
 			}
 			goto unlock_and_done;
 
 		case TCP_MAXSEG:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				return (error);
 
 			INP_WLOCK_RECHECK(inp);
 			if (optval > 0 && optval <= tp->t_maxseg &&
 			    optval + 40 >= V_tcp_minmss)
 				tp->t_maxseg = optval;
 			else
 				error = EINVAL;
 			goto unlock_and_done;
 
 		case TCP_INFO:
 			INP_WUNLOCK(inp);
 			error = EINVAL;
 			break;
 
 		case TCP_CONGESTION:
 			INP_WUNLOCK(inp);
 			bzero(buf, sizeof(buf));
 			error = sooptcopyin(sopt, &buf, sizeof(buf), 1);
 			if (error)
 				break;
 			INP_WLOCK_RECHECK(inp);
 			/*
 			 * Return EINVAL if we can't find the requested cc algo.
 			 */
 			error = EINVAL;
 			CC_LIST_RLOCK();
 			STAILQ_FOREACH(algo, &cc_list, entries) {
 				if (strncmp(buf, algo->name, TCP_CA_NAME_MAX)
 				    == 0) {
 					/* We've found the requested algo. */
 					error = 0;
 					/*
 					 * We hold a write lock over the tcb
 					 * so it's safe to do these things
 					 * without ordering concerns.
 					 */
 					if (CC_ALGO(tp)->cb_destroy != NULL)
 						CC_ALGO(tp)->cb_destroy(tp->ccv);
 					CC_ALGO(tp) = algo;
 					/*
 					 * If something goes pear shaped
 					 * initialising the new algo,
 					 * fall back to newreno (which
 					 * does not require initialisation).
 					 */
 					if (algo->cb_init != NULL)
 						if (algo->cb_init(tp->ccv) > 0) {
 							CC_ALGO(tp) = &newreno_cc_algo;
 							/*
 							 * The only reason init
 							 * should fail is
 							 * because of malloc.
 							 */
 							error = ENOMEM;
 						}
 					break; /* Break the STAILQ_FOREACH. */
 				}
 			}
 			CC_LIST_RUNLOCK();
 			goto unlock_and_done;
 
 		case TCP_KEEPIDLE:
 		case TCP_KEEPINTVL:
 		case TCP_KEEPINIT:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
 			if (error)
 				return (error);
 
 			if (ui > (UINT_MAX / hz)) {
 				error = EINVAL;
 				break;
 			}
 			ui *= hz;
 
 			INP_WLOCK_RECHECK(inp);
 			switch (sopt->sopt_name) {
 			case TCP_KEEPIDLE:
 				tp->t_keepidle = ui;
 				/*
 				 * XXX: better check current remaining
 				 * timeout and "merge" it with new value.
 				 */
 				if ((tp->t_state > TCPS_LISTEN) &&
 				    (tp->t_state <= TCPS_CLOSING))
 					tcp_timer_activate(tp, TT_KEEP,
 					    TP_KEEPIDLE(tp));
 				break;
 			case TCP_KEEPINTVL:
 				tp->t_keepintvl = ui;
 				if ((tp->t_state == TCPS_FIN_WAIT_2) &&
 				    (TP_MAXIDLE(tp) > 0))
 					tcp_timer_activate(tp, TT_2MSL,
 					    TP_MAXIDLE(tp));
 				break;
 			case TCP_KEEPINIT:
 				tp->t_keepinit = ui;
 				if (tp->t_state == TCPS_SYN_RECEIVED ||
 				    tp->t_state == TCPS_SYN_SENT)
 					tcp_timer_activate(tp, TT_KEEP,
 					    TP_KEEPINIT(tp));
 				break;
 			}
 			goto unlock_and_done;
 
 		case TCP_KEEPCNT:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
 			if (error)
 				return (error);
 
 			INP_WLOCK_RECHECK(inp);
 			tp->t_keepcnt = ui;
 			if ((tp->t_state == TCPS_FIN_WAIT_2) &&
 			    (TP_MAXIDLE(tp) > 0))
 				tcp_timer_activate(tp, TT_2MSL,
 				    TP_MAXIDLE(tp));
 			goto unlock_and_done;
 
 #ifdef TCPPCAP
 		case TCP_PCAP_OUT:
 		case TCP_PCAP_IN:
 			INP_WUNLOCK(inp);
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				return (error);
 
 			INP_WLOCK_RECHECK(inp);
 			if (optval >= 0)
 				tcp_pcap_set_sock_max(TCP_PCAP_OUT ?
 					&(tp->t_outpkts) : &(tp->t_inpkts),
 					optval);
 			else
 				error = EINVAL;
 			goto unlock_and_done;
 #endif
 
 #ifdef TCP_RFC7413
 		case TCP_FASTOPEN:
 			INP_WUNLOCK(inp);
 			if (!V_tcp_fastopen_enabled)
 				return (EPERM);
 
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				return (error);
 
 			INP_WLOCK_RECHECK(inp);
 			if (optval) {
 				tp->t_flags |= TF_FASTOPEN;
 				if ((tp->t_state == TCPS_LISTEN) &&
 				    (tp->t_tfo_pending == NULL))
 					tp->t_tfo_pending =
 					    tcp_fastopen_alloc_counter();
 			} else
 				tp->t_flags &= ~TF_FASTOPEN;
 			goto unlock_and_done;
 #endif
 
 		default:
 			INP_WUNLOCK(inp);
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	case SOPT_GET:
 		tp = intotcpcb(inp);
 		switch (sopt->sopt_name) {
 #ifdef TCP_SIGNATURE
 		case TCP_MD5SIG:
 			optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0;
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 #endif
 
 		case TCP_NODELAY:
 			optval = tp->t_flags & TF_NODELAY;
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 		case TCP_MAXSEG:
 			optval = tp->t_maxseg;
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 		case TCP_NOOPT:
 			optval = tp->t_flags & TF_NOOPT;
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 		case TCP_NOPUSH:
 			optval = tp->t_flags & TF_NOPUSH;
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 		case TCP_INFO:
 			tcp_fill_info(tp, &ti);
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &ti, sizeof ti);
 			break;
 		case TCP_CONGESTION:
 			bzero(buf, sizeof(buf));
 			strlcpy(buf, CC_ALGO(tp)->name, TCP_CA_NAME_MAX);
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, buf, TCP_CA_NAME_MAX);
 			break;
 		case TCP_KEEPIDLE:
 		case TCP_KEEPINTVL:
 		case TCP_KEEPINIT:
 		case TCP_KEEPCNT:
 			switch (sopt->sopt_name) {
 			case TCP_KEEPIDLE:
 				ui = tp->t_keepidle / hz;
 				break;
 			case TCP_KEEPINTVL:
 				ui = tp->t_keepintvl / hz;
 				break;
 			case TCP_KEEPINIT:
 				ui = tp->t_keepinit / hz;
 				break;
 			case TCP_KEEPCNT:
 				ui = tp->t_keepcnt;
 				break;
 			}
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &ui, sizeof(ui));
 			break;
 #ifdef TCPPCAP
 		case TCP_PCAP_OUT:
 		case TCP_PCAP_IN:
 			optval = tcp_pcap_get_sock_max(TCP_PCAP_OUT ?
 					&(tp->t_outpkts) : &(tp->t_inpkts));
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 #endif
 
 #ifdef TCP_RFC7413
 		case TCP_FASTOPEN:
 			optval = tp->t_flags & TF_FASTOPEN;
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 #endif
 		default:
 			INP_WUNLOCK(inp);
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 	}
 	return (error);
 }
 #undef INP_WLOCK_RECHECK
 
 /*
  * Attach TCP protocol to socket, allocating
  * internet protocol control block, tcp control block,
  * bufer space, and entering LISTEN state if to accept connections.
  */
 static int
 tcp_attach(struct socket *so)
 {
 	struct tcpcb *tp;
 	struct inpcb *inp;
 	int error;
 
 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
 		error = soreserve(so, V_tcp_sendspace, V_tcp_recvspace);
 		if (error)
 			return (error);
 	}
 	so->so_rcv.sb_flags |= SB_AUTOSIZE;
 	so->so_snd.sb_flags |= SB_AUTOSIZE;
 	INP_INFO_RLOCK(&V_tcbinfo);
 	error = in_pcballoc(so, &V_tcbinfo);
 	if (error) {
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		return (error);
 	}
 	inp = sotoinpcb(so);
 #ifdef INET6
 	if (inp->inp_vflag & INP_IPV6PROTO) {
 		inp->inp_vflag |= INP_IPV6;
 		inp->in6p_hops = -1;	/* use kernel default */
 	}
 	else
 #endif
 	inp->inp_vflag |= INP_IPV4;
 	tp = tcp_newtcpcb(inp);
 	if (tp == NULL) {
 		in_pcbdetach(inp);
 		in_pcbfree(inp);
 		INP_INFO_RUNLOCK(&V_tcbinfo);
 		return (ENOBUFS);
 	}
 	tp->t_state = TCPS_CLOSED;
 	INP_WUNLOCK(inp);
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 	return (0);
 }
 
 /*
  * Initiate (or continue) disconnect.
  * If embryonic state, just send reset (once).
  * If in ``let data drain'' option and linger null, just drop.
  * Otherwise (hard), mark socket disconnecting and drop
  * current input data; switch states based on user close, and
  * send segment to peer (with FIN).
  */
 static void
 tcp_disconnect(struct tcpcb *tp)
 {
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so = inp->inp_socket;
 
 	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	/*
 	 * Neither tcp_close() nor tcp_drop() should return NULL, as the
 	 * socket is still open.
 	 */
 	if (tp->t_state < TCPS_ESTABLISHED) {
 		tp = tcp_close(tp);
 		KASSERT(tp != NULL,
 		    ("tcp_disconnect: tcp_close() returned NULL"));
 	} else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
 		tp = tcp_drop(tp, 0);
 		KASSERT(tp != NULL,
 		    ("tcp_disconnect: tcp_drop() returned NULL"));
 	} else {
 		soisdisconnecting(so);
 		sbflush(&so->so_rcv);
 		tcp_usrclosed(tp);
 		if (!(inp->inp_flags & INP_DROPPED))
 			tp->t_fb->tfb_tcp_output(tp);
 	}
 }
 
 /*
  * User issued close, and wish to trail through shutdown states:
  * if never received SYN, just forget it.  If got a SYN from peer,
  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
  * If already got a FIN from peer, then almost done; go to LAST_ACK
  * state.  In all other cases, have already sent FIN to peer (e.g.
  * after PRU_SHUTDOWN), and just have to play tedious game waiting
  * for peer to send FIN or not respond to keep-alives, etc.
  * We can let the user exit from the close as soon as the FIN is acked.
  */
 static void
 tcp_usrclosed(struct tcpcb *tp)
 {
 
 	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	switch (tp->t_state) {
 	case TCPS_LISTEN:
 #ifdef TCP_OFFLOAD
 		tcp_offload_listen_stop(tp);
 #endif
 		tcp_state_change(tp, TCPS_CLOSED);
 		/* FALLTHROUGH */
 	case TCPS_CLOSED:
 		tp = tcp_close(tp);
 		/*
 		 * tcp_close() should never return NULL here as the socket is
 		 * still open.
 		 */
 		KASSERT(tp != NULL,
 		    ("tcp_usrclosed: tcp_close() returned NULL"));
 		break;
 
 	case TCPS_SYN_SENT:
 	case TCPS_SYN_RECEIVED:
 		tp->t_flags |= TF_NEEDFIN;
 		break;
 
 	case TCPS_ESTABLISHED:
 		tcp_state_change(tp, TCPS_FIN_WAIT_1);
 		break;
 
 	case TCPS_CLOSE_WAIT:
 		tcp_state_change(tp, TCPS_LAST_ACK);
 		break;
 	}
 	if (tp->t_state >= TCPS_FIN_WAIT_2) {
 		soisdisconnected(tp->t_inpcb->inp_socket);
 		/* Prevent the connection hanging in FIN_WAIT_2 forever. */
 		if (tp->t_state == TCPS_FIN_WAIT_2) {
 			int timeout;
 
 			timeout = (tcp_fast_finwait2_recycle) ? 
 			    tcp_finwait2_timeout : TP_MAXIDLE(tp);
 			tcp_timer_activate(tp, TT_2MSL, timeout);
 		}
 	}
 }
 
 #ifdef DDB
 static void
 db_print_indent(int indent)
 {
 	int i;
 
 	for (i = 0; i < indent; i++)
 		db_printf(" ");
 }
 
 static void
 db_print_tstate(int t_state)
 {
 
 	switch (t_state) {
 	case TCPS_CLOSED:
 		db_printf("TCPS_CLOSED");
 		return;
 
 	case TCPS_LISTEN:
 		db_printf("TCPS_LISTEN");
 		return;
 
 	case TCPS_SYN_SENT:
 		db_printf("TCPS_SYN_SENT");
 		return;
 
 	case TCPS_SYN_RECEIVED:
 		db_printf("TCPS_SYN_RECEIVED");
 		return;
 
 	case TCPS_ESTABLISHED:
 		db_printf("TCPS_ESTABLISHED");
 		return;
 
 	case TCPS_CLOSE_WAIT:
 		db_printf("TCPS_CLOSE_WAIT");
 		return;
 
 	case TCPS_FIN_WAIT_1:
 		db_printf("TCPS_FIN_WAIT_1");
 		return;
 
 	case TCPS_CLOSING:
 		db_printf("TCPS_CLOSING");
 		return;
 
 	case TCPS_LAST_ACK:
 		db_printf("TCPS_LAST_ACK");
 		return;
 
 	case TCPS_FIN_WAIT_2:
 		db_printf("TCPS_FIN_WAIT_2");
 		return;
 
 	case TCPS_TIME_WAIT:
 		db_printf("TCPS_TIME_WAIT");
 		return;
 
 	default:
 		db_printf("unknown");
 		return;
 	}
 }
 
 static void
 db_print_tflags(u_int t_flags)
 {
 	int comma;
 
 	comma = 0;
 	if (t_flags & TF_ACKNOW) {
 		db_printf("%sTF_ACKNOW", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_DELACK) {
 		db_printf("%sTF_DELACK", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_NODELAY) {
 		db_printf("%sTF_NODELAY", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_NOOPT) {
 		db_printf("%sTF_NOOPT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_SENTFIN) {
 		db_printf("%sTF_SENTFIN", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_REQ_SCALE) {
 		db_printf("%sTF_REQ_SCALE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_RCVD_SCALE) {
 		db_printf("%sTF_RECVD_SCALE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_REQ_TSTMP) {
 		db_printf("%sTF_REQ_TSTMP", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_RCVD_TSTMP) {
 		db_printf("%sTF_RCVD_TSTMP", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_SACK_PERMIT) {
 		db_printf("%sTF_SACK_PERMIT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_NEEDSYN) {
 		db_printf("%sTF_NEEDSYN", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_NEEDFIN) {
 		db_printf("%sTF_NEEDFIN", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_NOPUSH) {
 		db_printf("%sTF_NOPUSH", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_MORETOCOME) {
 		db_printf("%sTF_MORETOCOME", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_LQ_OVERFLOW) {
 		db_printf("%sTF_LQ_OVERFLOW", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_LASTIDLE) {
 		db_printf("%sTF_LASTIDLE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_RXWIN0SENT) {
 		db_printf("%sTF_RXWIN0SENT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_FASTRECOVERY) {
 		db_printf("%sTF_FASTRECOVERY", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_CONGRECOVERY) {
 		db_printf("%sTF_CONGRECOVERY", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_WASFRECOVERY) {
 		db_printf("%sTF_WASFRECOVERY", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_SIGNATURE) {
 		db_printf("%sTF_SIGNATURE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_FORCEDATA) {
 		db_printf("%sTF_FORCEDATA", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_TSO) {
 		db_printf("%sTF_TSO", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_ECN_PERMIT) {
 		db_printf("%sTF_ECN_PERMIT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_flags & TF_FASTOPEN) {
 		db_printf("%sTF_FASTOPEN", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_toobflags(char t_oobflags)
 {
 	int comma;
 
 	comma = 0;
 	if (t_oobflags & TCPOOB_HAVEDATA) {
 		db_printf("%sTCPOOB_HAVEDATA", comma ? ", " : "");
 		comma = 1;
 	}
 	if (t_oobflags & TCPOOB_HADDATA) {
 		db_printf("%sTCPOOB_HADDATA", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
 {
 
 	db_print_indent(indent);
 	db_printf("%s at %p\n", name, tp);
 
 	indent += 2;
 
 	db_print_indent(indent);
 	db_printf("t_segq first: %p   t_segqlen: %d   t_dupacks: %d\n",
 	   LIST_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
 
 	db_print_indent(indent);
 	db_printf("tt_rexmt: %p   tt_persist: %p   tt_keep: %p\n",
 	    &tp->t_timers->tt_rexmt, &tp->t_timers->tt_persist, &tp->t_timers->tt_keep);
 
 	db_print_indent(indent);
 	db_printf("tt_2msl: %p   tt_delack: %p   t_inpcb: %p\n", &tp->t_timers->tt_2msl,
 	    &tp->t_timers->tt_delack, tp->t_inpcb);
 
 	db_print_indent(indent);
 	db_printf("t_state: %d (", tp->t_state);
 	db_print_tstate(tp->t_state);
 	db_printf(")\n");
 
 	db_print_indent(indent);
 	db_printf("t_flags: 0x%x (", tp->t_flags);
 	db_print_tflags(tp->t_flags);
 	db_printf(")\n");
 
 	db_print_indent(indent);
 	db_printf("snd_una: 0x%08x   snd_max: 0x%08x   snd_nxt: x0%08x\n",
 	    tp->snd_una, tp->snd_max, tp->snd_nxt);
 
 	db_print_indent(indent);
 	db_printf("snd_up: 0x%08x   snd_wl1: 0x%08x   snd_wl2: 0x%08x\n",
 	   tp->snd_up, tp->snd_wl1, tp->snd_wl2);
 
 	db_print_indent(indent);
 	db_printf("iss: 0x%08x   irs: 0x%08x   rcv_nxt: 0x%08x\n",
 	    tp->iss, tp->irs, tp->rcv_nxt);
 
 	db_print_indent(indent);
 	db_printf("rcv_adv: 0x%08x   rcv_wnd: %lu   rcv_up: 0x%08x\n",
 	    tp->rcv_adv, tp->rcv_wnd, tp->rcv_up);
 
 	db_print_indent(indent);
 	db_printf("snd_wnd: %lu   snd_cwnd: %lu\n",
 	   tp->snd_wnd, tp->snd_cwnd);
 
 	db_print_indent(indent);
 	db_printf("snd_ssthresh: %lu   snd_recover: "
 	    "0x%08x\n", tp->snd_ssthresh, tp->snd_recover);
 
 	db_print_indent(indent);
-	db_printf("t_maxopd: %u   t_rcvtime: %u   t_startime: %u\n",
-	    tp->t_maxopd, tp->t_rcvtime, tp->t_starttime);
+	db_printf("t_rcvtime: %u   t_startime: %u\n",
+	    tp->t_rcvtime, tp->t_starttime);
 
 	db_print_indent(indent);
 	db_printf("t_rttime: %u   t_rtsq: 0x%08x\n",
 	    tp->t_rtttime, tp->t_rtseq);
 
 	db_print_indent(indent);
 	db_printf("t_rxtcur: %d   t_maxseg: %u   t_srtt: %d\n",
 	    tp->t_rxtcur, tp->t_maxseg, tp->t_srtt);
 
 	db_print_indent(indent);
 	db_printf("t_rttvar: %d   t_rxtshift: %d   t_rttmin: %u   "
 	    "t_rttbest: %u\n", tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin,
 	    tp->t_rttbest);
 
 	db_print_indent(indent);
 	db_printf("t_rttupdated: %lu   max_sndwnd: %lu   t_softerror: %d\n",
 	    tp->t_rttupdated, tp->max_sndwnd, tp->t_softerror);
 
 	db_print_indent(indent);
 	db_printf("t_oobflags: 0x%x (", tp->t_oobflags);
 	db_print_toobflags(tp->t_oobflags);
 	db_printf(")   t_iobc: 0x%02x\n", tp->t_iobc);
 
 	db_print_indent(indent);
 	db_printf("snd_scale: %u   rcv_scale: %u   request_r_scale: %u\n",
 	    tp->snd_scale, tp->rcv_scale, tp->request_r_scale);
 
 	db_print_indent(indent);
 	db_printf("ts_recent: %u   ts_recent_age: %u\n",
 	    tp->ts_recent, tp->ts_recent_age);
 
 	db_print_indent(indent);
 	db_printf("ts_offset: %u   last_ack_sent: 0x%08x   snd_cwnd_prev: "
 	    "%lu\n", tp->ts_offset, tp->last_ack_sent, tp->snd_cwnd_prev);
 
 	db_print_indent(indent);
 	db_printf("snd_ssthresh_prev: %lu   snd_recover_prev: 0x%08x   "
 	    "t_badrxtwin: %u\n", tp->snd_ssthresh_prev,
 	    tp->snd_recover_prev, tp->t_badrxtwin);
 
 	db_print_indent(indent);
 	db_printf("snd_numholes: %d  snd_holes first: %p\n",
 	    tp->snd_numholes, TAILQ_FIRST(&tp->snd_holes));
 
 	db_print_indent(indent);
 	db_printf("snd_fack: 0x%08x   rcv_numsacks: %d   sack_newdata: "
 	    "0x%08x\n", tp->snd_fack, tp->rcv_numsacks, tp->sack_newdata);
 
 	/* Skip sackblks, sackhint. */
 
 	db_print_indent(indent);
 	db_printf("t_rttlow: %d   rfbuf_ts: %u   rfbuf_cnt: %d\n",
 	    tp->t_rttlow, tp->rfbuf_ts, tp->rfbuf_cnt);
 }
 
 DB_SHOW_COMMAND(tcpcb, db_show_tcpcb)
 {
 	struct tcpcb *tp;
 
 	if (!have_addr) {
 		db_printf("usage: show tcpcb <addr>\n");
 		return;
 	}
 	tp = (struct tcpcb *)addr;
 
 	db_print_tcpcb(tp, "tcpcb", 0);
 }
 #endif
Index: projects/release-pkg/sys/netinet/tcp_var.h
===================================================================
--- projects/release-pkg/sys/netinet/tcp_var.h	(revision 293335)
+++ projects/release-pkg/sys/netinet/tcp_var.h	(revision 293336)
@@ -1,866 +1,865 @@
 /*-
  * Copyright (c) 1982, 1986, 1993, 1994, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)tcp_var.h	8.4 (Berkeley) 5/24/95
  * $FreeBSD$
  */
 
 #ifndef _NETINET_TCP_VAR_H_
 #define _NETINET_TCP_VAR_H_
 
 #include <netinet/tcp.h>
 
 #ifdef _KERNEL
 #include <net/vnet.h>
 #include <sys/mbuf.h>
 
 /*
  * Kernel variables for tcp.
  */
 VNET_DECLARE(int, tcp_do_rfc1323);
 #define	V_tcp_do_rfc1323	VNET(tcp_do_rfc1323)
 
 #endif /* _KERNEL */
 
 /* TCP segment queue entry */
 struct tseg_qent {
 	LIST_ENTRY(tseg_qent) tqe_q;
 	int	tqe_len;		/* TCP segment data length */
 	struct	tcphdr *tqe_th;		/* a pointer to tcp header */
 	struct	mbuf	*tqe_m;		/* mbuf contains packet */
 };
 LIST_HEAD(tsegqe_head, tseg_qent);
 
 struct sackblk {
 	tcp_seq start;		/* start seq no. of sack block */
 	tcp_seq end;		/* end seq no. */
 };
 
 struct sackhole {
 	tcp_seq start;		/* start seq no. of hole */
 	tcp_seq end;		/* end seq no. */
 	tcp_seq rxmit;		/* next seq. no in hole to be retransmitted */
 	TAILQ_ENTRY(sackhole) scblink;	/* scoreboard linkage */
 };
 
 struct sackhint {
 	struct sackhole	*nexthole;
 	int		sack_bytes_rexmit;
 	tcp_seq		last_sack_ack;	/* Most recent/largest sacked ack */
 
 	int		ispare;		/* explicit pad for 64bit alignment */
 	int             sacked_bytes;	/*
 					 * Total sacked bytes reported by the
 					 * receiver via sack option
 					 */
 	uint32_t	_pad1[1];	/* TBD */
 	uint64_t	_pad[1];	/* TBD */
 };
 
 struct tcptemp {
 	u_char	tt_ipgen[40]; /* the size must be of max ip header, now IPv6 */
 	struct	tcphdr tt_t;
 };
 
 #define tcp6cb		tcpcb  /* for KAME src sync over BSD*'s */
 
 /* 
  * TODO: We yet need to brave plowing in
  * to tcp_input() and the pru_usrreq() block.
  * Right now these go to the old standards which
  * are somewhat ok, but in the long term may
  * need to be changed. If we do tackle tcp_input()
  * then we need to get rid of the tcp_do_segment()
  * function below.
  */
 /* Flags for tcp functions */
 #define TCP_FUNC_BEING_REMOVED 0x01   	/* Can no longer be referenced */
 struct tcpcb;
 struct inpcb;
 struct sockopt;
 struct socket;
 
 struct tcp_function_block {
 	char tfb_tcp_block_name[TCP_FUNCTION_NAME_LEN_MAX];
 	int	(*tfb_tcp_output)(struct tcpcb *);
 	void	(*tfb_tcp_do_segment)(struct mbuf *, struct tcphdr *,
 			    struct socket *, struct tcpcb *,
 			    int, int, uint8_t,
 			    int);
 	int     (*tfb_tcp_ctloutput)(struct socket *so, struct sockopt *sopt,
 			    struct inpcb *inp, struct tcpcb *tp);
 	/* Optional memory allocation/free routine */
 	void	(*tfb_tcp_fb_init)(struct tcpcb *);
 	void	(*tfb_tcp_fb_fini)(struct tcpcb *);
 	/* Optional timers, must define all if you define one */
 	int	(*tfb_tcp_timer_stop_all)(struct tcpcb *);
 	int	(*tfb_tcp_timers_left)(struct tcpcb *);
 	void	(*tfb_tcp_timer_activate)(struct tcpcb *,
 			    uint32_t, u_int);
 	int	(*tfb_tcp_timer_active)(struct tcpcb *, uint32_t);
 	void	(*tfb_tcp_timer_stop)(struct tcpcb *, uint32_t);
 	volatile uint32_t tfb_refcnt;
 	uint32_t  tfb_flags;
 };
 
 struct tcp_function {
 	TAILQ_ENTRY(tcp_function) tf_next;
 	struct tcp_function_block *tf_fb;
 };
 
 TAILQ_HEAD(tcp_funchead, tcp_function);
 
 /*
  * Tcp control block, one per tcp; fields:
  * Organized for 16 byte cacheline efficiency.
  */
 struct tcpcb {
 	struct	tsegqe_head t_segq;	/* segment reassembly queue */
 	void	*t_pspare[2];		/* new reassembly queue */
 	int	t_segqlen;		/* segment reassembly queue length */
 	int	t_dupacks;		/* consecutive dup acks recd */
 
 	struct tcp_timer *t_timers;	/* All the TCP timers in one struct */
 
 	struct	inpcb *t_inpcb;		/* back pointer to internet pcb */
 	int	t_state;		/* state of this connection */
 	u_int	t_flags;
 
 	struct	vnet *t_vnet;		/* back pointer to parent vnet */
 
 	tcp_seq	snd_una;		/* sent but unacknowledged */
 	tcp_seq	snd_max;		/* highest sequence number sent;
 					 * used to recognize retransmits
 					 */
 	tcp_seq	snd_nxt;		/* send next */
 	tcp_seq	snd_up;			/* send urgent pointer */
 
 	tcp_seq	snd_wl1;		/* window update seg seq number */
 	tcp_seq	snd_wl2;		/* window update seg ack number */
 	tcp_seq	iss;			/* initial send sequence number */
 	tcp_seq	irs;			/* initial receive sequence number */
 
 	tcp_seq	rcv_nxt;		/* receive next */
 	tcp_seq	rcv_adv;		/* advertised window */
 	u_long	rcv_wnd;		/* receive window */
 	tcp_seq	rcv_up;			/* receive urgent pointer */
 
 	u_long	snd_wnd;		/* send window */
 	u_long	snd_cwnd;		/* congestion-controlled window */
 	u_long	snd_spare1;		/* unused */
 	u_long	snd_ssthresh;		/* snd_cwnd size threshold for
 					 * for slow start exponential to
 					 * linear switch
 					 */
 	u_long	snd_spare2;		/* unused */
 	tcp_seq	snd_recover;		/* for use in NewReno Fast Recovery */
 
-	u_int	t_maxopd;		/* mss plus options */
-
 	u_int	t_rcvtime;		/* inactivity time */
 	u_int	t_starttime;		/* time connection was established */
 	u_int	t_rtttime;		/* RTT measurement start time */
 	tcp_seq	t_rtseq;		/* sequence number being timed */
 
 	u_int	t_bw_spare1;		/* unused */
 	tcp_seq	t_bw_spare2;		/* unused */
 
 	int	t_rxtcur;		/* current retransmit value (ticks) */
 	u_int	t_maxseg;		/* maximum segment size */
+	u_int	t_pmtud_saved_maxseg;	/* pre-blackhole MSS */
 	int	t_srtt;			/* smoothed round-trip time */
 	int	t_rttvar;		/* variance in round-trip time */
 
 	int	t_rxtshift;		/* log(2) of rexmt exp. backoff */
 	u_int	t_rttmin;		/* minimum rtt allowed */
 	u_int	t_rttbest;		/* best rtt we've seen */
 	u_long	t_rttupdated;		/* number of times rtt sampled */
 	u_long	max_sndwnd;		/* largest window peer has offered */
 
 	int	t_softerror;		/* possible error not yet reported */
 /* out-of-band data */
 	char	t_oobflags;		/* have some */
 	char	t_iobc;			/* input character */
 /* RFC 1323 variables */
 	u_char	snd_scale;		/* window scaling for send window */
 	u_char	rcv_scale;		/* window scaling for recv window */
 	u_char	request_r_scale;	/* pending window scaling */
 	u_int32_t  ts_recent;		/* timestamp echo data */
 	u_int	ts_recent_age;		/* when last updated */
 	u_int32_t  ts_offset;		/* our timestamp offset */
 
 	tcp_seq	last_ack_sent;
 /* experimental */
 	u_long	snd_cwnd_prev;		/* cwnd prior to retransmit */
 	u_long	snd_ssthresh_prev;	/* ssthresh prior to retransmit */
 	tcp_seq	snd_recover_prev;	/* snd_recover prior to retransmit */
 	int	t_sndzerowin;		/* zero-window updates sent */
 	u_int	t_badrxtwin;		/* window for retransmit recovery */
 	u_char	snd_limited;		/* segments limited transmitted */
 /* SACK related state */
 	int	snd_numholes;		/* number of holes seen by sender */
 	TAILQ_HEAD(sackhole_head, sackhole) snd_holes;
 					/* SACK scoreboard (sorted) */
 	tcp_seq	snd_fack;		/* last seq number(+1) sack'd by rcv'r*/
 	int	rcv_numsacks;		/* # distinct sack blks present */
 	struct sackblk sackblks[MAX_SACK_BLKS]; /* seq nos. of sack blocks */
 	tcp_seq sack_newdata;		/* New data xmitted in this recovery
 					   episode starts at this seq number */
 	struct sackhint	sackhint;	/* SACK scoreboard hint */
 	int	t_rttlow;		/* smallest observerved RTT */
 	u_int32_t	rfbuf_ts;	/* recv buffer autoscaling timestamp */
 	int	rfbuf_cnt;		/* recv buffer autoscaling byte count */
 	struct toedev	*tod;		/* toedev handling this connection */
 	int	t_sndrexmitpack;	/* retransmit packets sent */
 	int	t_rcvoopack;		/* out-of-order packets received */
 	void	*t_toe;			/* TOE pcb pointer */
 	int	t_bytes_acked;		/* # bytes acked during current RTT */
 	struct cc_algo	*cc_algo;	/* congestion control algorithm */
 	struct cc_var	*ccv;		/* congestion control specific vars */
 	struct osd	*osd;		/* storage for Khelp module data */
 
 	u_int	t_keepinit;		/* time to establish connection */
 	u_int	t_keepidle;		/* time before keepalive probes begin */
 	u_int	t_keepintvl;		/* interval between keepalives */
 	u_int	t_keepcnt;		/* number of keepalives before close */
 
 	u_int	t_tsomax;		/* TSO total burst length limit in bytes */
 	u_int	t_tsomaxsegcount;	/* TSO maximum segment count */
 	u_int	t_tsomaxsegsize;	/* TSO maximum segment size in bytes */
-	u_int	t_pmtud_saved_maxopd;	/* pre-blackhole MSS */
 	u_int	t_flags2;		/* More tcpcb flags storage */
 #if defined(_KERNEL) && defined(TCP_RFC7413)
 	uint32_t t_ispare[6];		/* 5 UTO, 1 TBD */
 	uint64_t t_tfo_cookie;		/* TCP Fast Open cookie */
 #else
 	uint32_t t_ispare[8];		/* 5 UTO, 3 TBD */
 #endif
 	struct tcp_function_block *t_fb;/* TCP function call block */
 	void	*t_fb_ptr;		/* Pointer to t_fb specific data */
 #if defined(_KERNEL) && defined(TCP_RFC7413)
 	unsigned int *t_tfo_pending;	/* TCP Fast Open pending counter */
 	void	*t_pspare2[1];		/* 1 TCP_SIGNATURE */
 #else
 	void	*t_pspare2[2];		/* 1 TCP_SIGNATURE, 1 TBD */
 #endif
 #if defined(_KERNEL) && defined(TCPPCAP)
 	struct mbufq t_inpkts;		/* List of saved input packets. */
 	struct mbufq t_outpkts;		/* List of saved output packets. */
 #ifdef _LP64
 	uint64_t _pad[0];		/* all used! */
 #else
 	uint64_t _pad[2];		/* 2 are available */
 #endif /* _LP64 */
 #else
 	uint64_t _pad[6];
 #endif /* defined(_KERNEL) && defined(TCPPCAP) */
 };
 
 /*
  * Flags and utility macros for the t_flags field.
  */
 #define	TF_ACKNOW	0x000001	/* ack peer immediately */
 #define	TF_DELACK	0x000002	/* ack, but try to delay it */
 #define	TF_NODELAY	0x000004	/* don't delay packets to coalesce */
 #define	TF_NOOPT	0x000008	/* don't use tcp options */
 #define	TF_SENTFIN	0x000010	/* have sent FIN */
 #define	TF_REQ_SCALE	0x000020	/* have/will request window scaling */
 #define	TF_RCVD_SCALE	0x000040	/* other side has requested scaling */
 #define	TF_REQ_TSTMP	0x000080	/* have/will request timestamps */
 #define	TF_RCVD_TSTMP	0x000100	/* a timestamp was received in SYN */
 #define	TF_SACK_PERMIT	0x000200	/* other side said I could SACK */
 #define	TF_NEEDSYN	0x000400	/* send SYN (implicit state) */
 #define	TF_NEEDFIN	0x000800	/* send FIN (implicit state) */
 #define	TF_NOPUSH	0x001000	/* don't push */
 #define	TF_PREVVALID	0x002000	/* saved values for bad rxmit valid */
 #define	TF_MORETOCOME	0x010000	/* More data to be appended to sock */
 #define	TF_LQ_OVERFLOW	0x020000	/* listen queue overflow */
 #define	TF_LASTIDLE	0x040000	/* connection was previously idle */
 #define	TF_RXWIN0SENT	0x080000	/* sent a receiver win 0 in response */
 #define	TF_FASTRECOVERY	0x100000	/* in NewReno Fast Recovery */
 #define	TF_WASFRECOVERY	0x200000	/* was in NewReno Fast Recovery */
 #define	TF_SIGNATURE	0x400000	/* require MD5 digests (RFC2385) */
 #define	TF_FORCEDATA	0x800000	/* force out a byte */
 #define	TF_TSO		0x1000000	/* TSO enabled on this connection */
 #define	TF_TOE		0x2000000	/* this connection is offloaded */
 #define	TF_ECN_PERMIT	0x4000000	/* connection ECN-ready */
 #define	TF_ECN_SND_CWR	0x8000000	/* ECN CWR in queue */
 #define	TF_ECN_SND_ECE	0x10000000	/* ECN ECE in queue */
 #define	TF_CONGRECOVERY	0x20000000	/* congestion recovery mode */
 #define	TF_WASCRECOVERY	0x40000000	/* was in congestion recovery */
 #define	TF_FASTOPEN	0x80000000	/* TCP Fast Open indication */
 
 #define	IN_FASTRECOVERY(t_flags)	(t_flags & TF_FASTRECOVERY)
 #define	ENTER_FASTRECOVERY(t_flags)	t_flags |= TF_FASTRECOVERY
 #define	EXIT_FASTRECOVERY(t_flags)	t_flags &= ~TF_FASTRECOVERY
 
 #define	IN_CONGRECOVERY(t_flags)	(t_flags & TF_CONGRECOVERY)
 #define	ENTER_CONGRECOVERY(t_flags)	t_flags |= TF_CONGRECOVERY
 #define	EXIT_CONGRECOVERY(t_flags)	t_flags &= ~TF_CONGRECOVERY
 
 #define	IN_RECOVERY(t_flags) (t_flags & (TF_CONGRECOVERY | TF_FASTRECOVERY))
 #define	ENTER_RECOVERY(t_flags) t_flags |= (TF_CONGRECOVERY | TF_FASTRECOVERY)
 #define	EXIT_RECOVERY(t_flags) t_flags &= ~(TF_CONGRECOVERY | TF_FASTRECOVERY)
 
 #define	BYTES_THIS_ACK(tp, th)	(th->th_ack - tp->snd_una)
 
 /*
  * Flags for the t_oobflags field.
  */
 #define	TCPOOB_HAVEDATA	0x01
 #define	TCPOOB_HADDATA	0x02
 
 #ifdef TCP_SIGNATURE
 /*
  * Defines which are needed by the xform_tcp module and tcp_[in|out]put
  * for SADB verification and lookup.
  */
 #define	TCP_SIGLEN	16	/* length of computed digest in bytes */
 #define	TCP_KEYLEN_MIN	1	/* minimum length of TCP-MD5 key */
 #define	TCP_KEYLEN_MAX	80	/* maximum length of TCP-MD5 key */
 /*
  * Only a single SA per host may be specified at this time. An SPI is
  * needed in order for the KEY_ALLOCSA() lookup to work.
  */
 #define	TCP_SIG_SPI	0x1000
 #endif /* TCP_SIGNATURE */
 
 /*
  * Flags for PLPMTU handling, t_flags2
  */
 #define	TF2_PLPMTU_BLACKHOLE	0x00000001 /* Possible PLPMTUD Black Hole. */
 #define	TF2_PLPMTU_PMTUD	0x00000002 /* Allowed to attempt PLPMTUD. */
 #define	TF2_PLPMTU_MAXSEGSNT	0x00000004 /* Last seg sent was full seg. */
 
 /*
  * Structure to hold TCP options that are only used during segment
  * processing (in tcp_input), but not held in the tcpcb.
  * It's basically used to reduce the number of parameters
  * to tcp_dooptions and tcp_addoptions.
  * The binary order of the to_flags is relevant for packing of the
  * options in tcp_addoptions.
  */
 struct tcpopt {
 	u_int64_t	to_flags;	/* which options are present */
 #define	TOF_MSS		0x0001		/* maximum segment size */
 #define	TOF_SCALE	0x0002		/* window scaling */
 #define	TOF_SACKPERM	0x0004		/* SACK permitted */
 #define	TOF_TS		0x0010		/* timestamp */
 #define	TOF_SIGNATURE	0x0040		/* TCP-MD5 signature option (RFC2385) */
 #define	TOF_SACK	0x0080		/* Peer sent SACK option */
 #define	TOF_FASTOPEN	0x0100		/* TCP Fast Open (TFO) cookie */
 #define	TOF_MAXOPT	0x0200
 	u_int32_t	to_tsval;	/* new timestamp */
 	u_int32_t	to_tsecr;	/* reflected timestamp */
 	u_char		*to_sacks;	/* pointer to the first SACK blocks */
 	u_char		*to_signature;	/* pointer to the TCP-MD5 signature */
 	u_char		*to_tfo_cookie; /* pointer to the TFO cookie */
 	u_int16_t	to_mss;		/* maximum segment size */
 	u_int8_t	to_wscale;	/* window scaling */
 	u_int8_t	to_nsacks;	/* number of SACK blocks */
 	u_int8_t	to_tfo_len;	/* TFO cookie length */
 	u_int32_t	to_spare;	/* UTO */
 };
 
 /*
  * Flags for tcp_dooptions.
  */
 #define	TO_SYN		0x01		/* parse SYN-only options */
 
 struct hc_metrics_lite {	/* must stay in sync with hc_metrics */
 	u_long	rmx_mtu;	/* MTU for this path */
 	u_long	rmx_ssthresh;	/* outbound gateway buffer limit */
 	u_long	rmx_rtt;	/* estimated round trip time */
 	u_long	rmx_rttvar;	/* estimated rtt variance */
 	u_long	rmx_cwnd;	/* congestion window */
 	u_long	rmx_sendpipe;   /* outbound delay-bandwidth product */
 	u_long	rmx_recvpipe;   /* inbound delay-bandwidth product */
 };
 
 /*
  * Used by tcp_maxmtu() to communicate interface specific features
  * and limits at the time of connection setup.
  */
 struct tcp_ifcap {
 	int	ifcap;
 	u_int	tsomax;
 	u_int	tsomaxsegcount;
 	u_int	tsomaxsegsize;
 };
 
 #ifndef _NETINET_IN_PCB_H_
 struct in_conninfo;
 #endif /* _NETINET_IN_PCB_H_ */
 
 struct tcptw {
 	struct inpcb	*tw_inpcb;	/* XXX back pointer to internet pcb */
 	tcp_seq		snd_nxt;
 	tcp_seq		rcv_nxt;
 	tcp_seq		iss;
 	tcp_seq		irs;
 	u_short		last_win;	/* cached window value */
 	u_short		tw_so_options;	/* copy of so_options */
 	struct ucred	*tw_cred;	/* user credentials */
 	u_int32_t	t_recent;
 	u_int32_t	ts_offset;	/* our timestamp offset */
 	u_int		t_starttime;
 	int		tw_time;
 	TAILQ_ENTRY(tcptw) tw_2msl;
 	void		*tw_pspare;	/* TCP_SIGNATURE */
 	u_int		*tw_spare;	/* TCP_SIGNATURE */
 };
 
 #define	intotcpcb(ip)	((struct tcpcb *)(ip)->inp_ppcb)
 #define	intotw(ip)	((struct tcptw *)(ip)->inp_ppcb)
 #define	sototcpcb(so)	(intotcpcb(sotoinpcb(so)))
 
 /*
  * The smoothed round-trip time and estimated variance
  * are stored as fixed point numbers scaled by the values below.
  * For convenience, these scales are also used in smoothing the average
  * (smoothed = (1/scale)sample + ((scale-1)/scale)smoothed).
  * With these scales, srtt has 3 bits to the right of the binary point,
  * and thus an "ALPHA" of 0.875.  rttvar has 2 bits to the right of the
  * binary point, and is smoothed with an ALPHA of 0.75.
  */
 #define	TCP_RTT_SCALE		32	/* multiplier for srtt; 3 bits frac. */
 #define	TCP_RTT_SHIFT		5	/* shift for srtt; 3 bits frac. */
 #define	TCP_RTTVAR_SCALE	16	/* multiplier for rttvar; 2 bits */
 #define	TCP_RTTVAR_SHIFT	4	/* shift for rttvar; 2 bits */
 #define	TCP_DELTA_SHIFT		2	/* see tcp_input.c */
 
 /*
  * The initial retransmission should happen at rtt + 4 * rttvar.
  * Because of the way we do the smoothing, srtt and rttvar
  * will each average +1/2 tick of bias.  When we compute
  * the retransmit timer, we want 1/2 tick of rounding and
  * 1 extra tick because of +-1/2 tick uncertainty in the
  * firing of the timer.  The bias will give us exactly the
  * 1.5 tick we need.  But, because the bias is
  * statistical, we have to test that we don't drop below
  * the minimum feasible timer (which is 2 ticks).
  * This version of the macro adapted from a paper by Lawrence
  * Brakmo and Larry Peterson which outlines a problem caused
  * by insufficient precision in the original implementation,
  * which results in inappropriately large RTO values for very
  * fast networks.
  */
 #define	TCP_REXMTVAL(tp) \
 	max((tp)->t_rttmin, (((tp)->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT))  \
 	  + (tp)->t_rttvar) >> TCP_DELTA_SHIFT)
 
 /*
  * TCP statistics.
  * Many of these should be kept per connection,
  * but that's inconvenient at the moment.
  */
 struct	tcpstat {
 	uint64_t tcps_connattempt;	/* connections initiated */
 	uint64_t tcps_accepts;		/* connections accepted */
 	uint64_t tcps_connects;		/* connections established */
 	uint64_t tcps_drops;		/* connections dropped */
 	uint64_t tcps_conndrops;	/* embryonic connections dropped */
 	uint64_t tcps_minmssdrops;	/* average minmss too low drops */
 	uint64_t tcps_closed;		/* conn. closed (includes drops) */
 	uint64_t tcps_segstimed;	/* segs where we tried to get rtt */
 	uint64_t tcps_rttupdated;	/* times we succeeded */
 	uint64_t tcps_delack;		/* delayed acks sent */
 	uint64_t tcps_timeoutdrop;	/* conn. dropped in rxmt timeout */
 	uint64_t tcps_rexmttimeo;	/* retransmit timeouts */
 	uint64_t tcps_persisttimeo;	/* persist timeouts */
 	uint64_t tcps_keeptimeo;	/* keepalive timeouts */
 	uint64_t tcps_keepprobe;	/* keepalive probes sent */
 	uint64_t tcps_keepdrops;	/* connections dropped in keepalive */
 
 	uint64_t tcps_sndtotal;		/* total packets sent */
 	uint64_t tcps_sndpack;		/* data packets sent */
 	uint64_t tcps_sndbyte;		/* data bytes sent */
 	uint64_t tcps_sndrexmitpack;	/* data packets retransmitted */
 	uint64_t tcps_sndrexmitbyte;	/* data bytes retransmitted */
 	uint64_t tcps_sndrexmitbad;	/* unnecessary packet retransmissions */
 	uint64_t tcps_sndacks;		/* ack-only packets sent */
 	uint64_t tcps_sndprobe;		/* window probes sent */
 	uint64_t tcps_sndurg;		/* packets sent with URG only */
 	uint64_t tcps_sndwinup;		/* window update-only packets sent */
 	uint64_t tcps_sndctrl;		/* control (SYN|FIN|RST) packets sent */
 
 	uint64_t tcps_rcvtotal;		/* total packets received */
 	uint64_t tcps_rcvpack;		/* packets received in sequence */
 	uint64_t tcps_rcvbyte;		/* bytes received in sequence */
 	uint64_t tcps_rcvbadsum;	/* packets received with ccksum errs */
 	uint64_t tcps_rcvbadoff;	/* packets received with bad offset */
 	uint64_t tcps_rcvreassfull;	/* packets dropped for no reass space */
 	uint64_t tcps_rcvshort;		/* packets received too short */
 	uint64_t tcps_rcvduppack;	/* duplicate-only packets received */
 	uint64_t tcps_rcvdupbyte;	/* duplicate-only bytes received */
 	uint64_t tcps_rcvpartduppack;	/* packets with some duplicate data */
 	uint64_t tcps_rcvpartdupbyte;	/* dup. bytes in part-dup. packets */
 	uint64_t tcps_rcvoopack;	/* out-of-order packets received */
 	uint64_t tcps_rcvoobyte;	/* out-of-order bytes received */
 	uint64_t tcps_rcvpackafterwin;	/* packets with data after window */
 	uint64_t tcps_rcvbyteafterwin;	/* bytes rcvd after window */
 	uint64_t tcps_rcvafterclose;	/* packets rcvd after "close" */
 	uint64_t tcps_rcvwinprobe;	/* rcvd window probe packets */
 	uint64_t tcps_rcvdupack;	/* rcvd duplicate acks */
 	uint64_t tcps_rcvacktoomuch;	/* rcvd acks for unsent data */
 	uint64_t tcps_rcvackpack;	/* rcvd ack packets */
 	uint64_t tcps_rcvackbyte;	/* bytes acked by rcvd acks */
 	uint64_t tcps_rcvwinupd;	/* rcvd window update packets */
 	uint64_t tcps_pawsdrop;		/* segments dropped due to PAWS */
 	uint64_t tcps_predack;		/* times hdr predict ok for acks */
 	uint64_t tcps_preddat;		/* times hdr predict ok for data pkts */
 	uint64_t tcps_pcbcachemiss;
 	uint64_t tcps_cachedrtt;	/* times cached RTT in route updated */
 	uint64_t tcps_cachedrttvar;	/* times cached rttvar updated */
 	uint64_t tcps_cachedssthresh;	/* times cached ssthresh updated */
 	uint64_t tcps_usedrtt;		/* times RTT initialized from route */
 	uint64_t tcps_usedrttvar;	/* times RTTVAR initialized from rt */
 	uint64_t tcps_usedssthresh;	/* times ssthresh initialized from rt*/
 	uint64_t tcps_persistdrop;	/* timeout in persist state */
 	uint64_t tcps_badsyn;		/* bogus SYN, e.g. premature ACK */
 	uint64_t tcps_mturesent;	/* resends due to MTU discovery */
 	uint64_t tcps_listendrop;	/* listen queue overflows */
 	uint64_t tcps_badrst;		/* ignored RSTs in the window */
 
 	uint64_t tcps_sc_added;		/* entry added to syncache */
 	uint64_t tcps_sc_retransmitted;	/* syncache entry was retransmitted */
 	uint64_t tcps_sc_dupsyn;	/* duplicate SYN packet */
 	uint64_t tcps_sc_dropped;	/* could not reply to packet */
 	uint64_t tcps_sc_completed;	/* successful extraction of entry */
 	uint64_t tcps_sc_bucketoverflow;/* syncache per-bucket limit hit */
 	uint64_t tcps_sc_cacheoverflow;	/* syncache cache limit hit */
 	uint64_t tcps_sc_reset;		/* RST removed entry from syncache */
 	uint64_t tcps_sc_stale;		/* timed out or listen socket gone */
 	uint64_t tcps_sc_aborted;	/* syncache entry aborted */
 	uint64_t tcps_sc_badack;	/* removed due to bad ACK */
 	uint64_t tcps_sc_unreach;	/* ICMP unreachable received */
 	uint64_t tcps_sc_zonefail;	/* zalloc() failed */
 	uint64_t tcps_sc_sendcookie;	/* SYN cookie sent */
 	uint64_t tcps_sc_recvcookie;	/* SYN cookie received */
 
 	uint64_t tcps_hc_added;		/* entry added to hostcache */
 	uint64_t tcps_hc_bucketoverflow;/* hostcache per bucket limit hit */
 
 	uint64_t tcps_finwait2_drops;    /* Drop FIN_WAIT_2 connection after time limit */
 
 	/* SACK related stats */
 	uint64_t tcps_sack_recovery_episode; /* SACK recovery episodes */
 	uint64_t tcps_sack_rexmits;	    /* SACK rexmit segments   */
 	uint64_t tcps_sack_rexmit_bytes;    /* SACK rexmit bytes      */
 	uint64_t tcps_sack_rcv_blocks;	    /* SACK blocks (options) received */
 	uint64_t tcps_sack_send_blocks;	    /* SACK blocks (options) sent     */
 	uint64_t tcps_sack_sboverflow;	    /* times scoreboard overflowed */
 	
 	/* ECN related stats */
 	uint64_t tcps_ecn_ce;		/* ECN Congestion Experienced */
 	uint64_t tcps_ecn_ect0;		/* ECN Capable Transport */
 	uint64_t tcps_ecn_ect1;		/* ECN Capable Transport */
 	uint64_t tcps_ecn_shs;		/* ECN successful handshakes */
 	uint64_t tcps_ecn_rcwnd;	/* # times ECN reduced the cwnd */
 
 	/* TCP_SIGNATURE related stats */
 	uint64_t tcps_sig_rcvgoodsig;	/* Total matching signature received */
 	uint64_t tcps_sig_rcvbadsig;	/* Total bad signature received */
 	uint64_t tcps_sig_err_buildsig;	/* Mismatching signature received */
 	uint64_t tcps_sig_err_sigopt;	/* No signature expected by socket */
 	uint64_t tcps_sig_err_nosigopt;	/* No signature provided by segment */
 
 	uint64_t _pad[12];		/* 6 UTO, 6 TBD */
 };
 
 #define	tcps_rcvmemdrop	tcps_rcvreassfull	/* compat */
 
 #ifdef _KERNEL
 #define	TI_UNLOCKED	1
 #define	TI_RLOCKED	2
 #include <sys/counter.h>
 
 VNET_PCPUSTAT_DECLARE(struct tcpstat, tcpstat);	/* tcp statistics */
 /*
  * In-kernel consumers can use these accessor macros directly to update
  * stats.
  */
 #define	TCPSTAT_ADD(name, val)	\
     VNET_PCPUSTAT_ADD(struct tcpstat, tcpstat, name, (val))
 #define	TCPSTAT_INC(name)	TCPSTAT_ADD(name, 1)
 
 /*
  * Kernel module consumers must use this accessor macro.
  */
 void	kmod_tcpstat_inc(int statnum);
 #define	KMOD_TCPSTAT_INC(name)						\
     kmod_tcpstat_inc(offsetof(struct tcpstat, name) / sizeof(uint64_t))
 
 /*
  * TCP specific helper hook point identifiers.
  */
 #define	HHOOK_TCP_EST_IN		0
 #define	HHOOK_TCP_EST_OUT		1
 #define	HHOOK_TCP_LAST			HHOOK_TCP_EST_OUT
 
 struct tcp_hhook_data {
 	struct tcpcb	*tp;
 	struct tcphdr	*th;
 	struct tcpopt	*to;
 	long		len;
 	int		tso;
 	tcp_seq		curack;
 };
 #endif
 
 /*
  * TCB structure exported to user-land via sysctl(3).
  * Evil hack: declare only if in_pcb.h and sys/socketvar.h have been
  * included.  Not all of our clients do.
  */
 #if defined(_NETINET_IN_PCB_H_) && defined(_SYS_SOCKETVAR_H_)
 struct xtcp_timer {
 	int tt_rexmt;	/* retransmit timer */
 	int tt_persist;	/* retransmit persistence */
 	int tt_keep;	/* keepalive */
 	int tt_2msl;	/* 2*msl TIME_WAIT timer */
 	int tt_delack;	/* delayed ACK timer */
 	int t_rcvtime;	/* Time since last packet received */
 };
 struct	xtcpcb {
 	size_t	xt_len;
 	struct	inpcb	xt_inp;
 	struct	tcpcb	xt_tp;
 	struct	xsocket	xt_socket;
 	struct	xtcp_timer xt_timer;
 	u_quad_t	xt_alignment_hack;
 };
 #endif
 
 /*
  * Identifiers for TCP sysctl nodes
  */
 #define	TCPCTL_DO_RFC1323	1	/* use RFC-1323 extensions */
 #define	TCPCTL_MSSDFLT		3	/* MSS default */
 #define TCPCTL_STATS		4	/* statistics (read-only) */
 #define	TCPCTL_RTTDFLT		5	/* default RTT estimate */
 #define	TCPCTL_KEEPIDLE		6	/* keepalive idle timer */
 #define	TCPCTL_KEEPINTVL	7	/* interval to send keepalives */
 #define	TCPCTL_SENDSPACE	8	/* send buffer space */
 #define	TCPCTL_RECVSPACE	9	/* receive buffer space */
 #define	TCPCTL_KEEPINIT		10	/* timeout for establishing syn */
 #define	TCPCTL_PCBLIST		11	/* list of all outstanding PCBs */
 #define	TCPCTL_DELACKTIME	12	/* time before sending delayed ACK */
 #define	TCPCTL_V6MSSDFLT	13	/* MSS default for IPv6 */
 #define	TCPCTL_SACK		14	/* Selective Acknowledgement,rfc 2018 */
 #define	TCPCTL_DROP		15	/* drop tcp connection */
 
 #ifdef _KERNEL
 #ifdef SYSCTL_DECL
 SYSCTL_DECL(_net_inet_tcp);
 SYSCTL_DECL(_net_inet_tcp_sack);
 MALLOC_DECLARE(M_TCPLOG);
 #endif
 
 VNET_DECLARE(struct inpcbhead, tcb);		/* queue of active tcpcb's */
 VNET_DECLARE(struct inpcbinfo, tcbinfo);
 extern	int tcp_log_in_vain;
 VNET_DECLARE(int, tcp_mssdflt);	/* XXX */
 VNET_DECLARE(int, tcp_minmss);
 VNET_DECLARE(int, tcp_delack_enabled);
 VNET_DECLARE(int, tcp_do_rfc3390);
 VNET_DECLARE(int, tcp_initcwnd_segments);
 VNET_DECLARE(int, tcp_sendspace);
 VNET_DECLARE(int, tcp_recvspace);
 VNET_DECLARE(int, path_mtu_discovery);
 VNET_DECLARE(int, tcp_do_rfc3465);
 VNET_DECLARE(int, tcp_abc_l_var);
 #define	V_tcb			VNET(tcb)
 #define	V_tcbinfo		VNET(tcbinfo)
 #define	V_tcp_mssdflt		VNET(tcp_mssdflt)
 #define	V_tcp_minmss		VNET(tcp_minmss)
 #define	V_tcp_delack_enabled	VNET(tcp_delack_enabled)
 #define	V_tcp_do_rfc3390	VNET(tcp_do_rfc3390)
 #define	V_tcp_initcwnd_segments	VNET(tcp_initcwnd_segments)
 #define	V_tcp_sendspace		VNET(tcp_sendspace)
 #define	V_tcp_recvspace		VNET(tcp_recvspace)
 #define	V_path_mtu_discovery	VNET(path_mtu_discovery)
 #define	V_tcp_do_rfc3465	VNET(tcp_do_rfc3465)
 #define	V_tcp_abc_l_var		VNET(tcp_abc_l_var)
 
 VNET_DECLARE(int, tcp_do_sack);			/* SACK enabled/disabled */
 VNET_DECLARE(int, tcp_sc_rst_sock_fail);	/* RST on sock alloc failure */
 #define	V_tcp_do_sack		VNET(tcp_do_sack)
 #define	V_tcp_sc_rst_sock_fail	VNET(tcp_sc_rst_sock_fail)
 
 VNET_DECLARE(int, tcp_do_ecn);			/* TCP ECN enabled/disabled */
 VNET_DECLARE(int, tcp_ecn_maxretries);
 #define	V_tcp_do_ecn		VNET(tcp_do_ecn)
 #define	V_tcp_ecn_maxretries	VNET(tcp_ecn_maxretries)
 
 VNET_DECLARE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST + 1]);
 #define	V_tcp_hhh		VNET(tcp_hhh)
 
 VNET_DECLARE(int, tcp_do_rfc6675_pipe);
 #define V_tcp_do_rfc6675_pipe	VNET(tcp_do_rfc6675_pipe)
 
 int	 tcp_addoptions(struct tcpopt *, u_char *);
 int	 tcp_ccalgounload(struct cc_algo *unload_algo);
 struct tcpcb *
 	 tcp_close(struct tcpcb *);
 void	 tcp_discardcb(struct tcpcb *);
 void	 tcp_twstart(struct tcpcb *);
 void	 tcp_twclose(struct tcptw *, int);
 void	 tcp_ctlinput(int, struct sockaddr *, void *);
 int	 tcp_ctloutput(struct socket *, struct sockopt *);
 struct tcpcb *
 	 tcp_drop(struct tcpcb *, int);
 void	 tcp_drain(void);
 void	 tcp_init(void);
 #ifdef VIMAGE
 void	 tcp_destroy(void);
 #endif
 void	 tcp_fini(void *);
 char	*tcp_log_addrs(struct in_conninfo *, struct tcphdr *, void *,
 	    const void *);
 char	*tcp_log_vain(struct in_conninfo *, struct tcphdr *, void *,
 	    const void *);
 int	 tcp_reass(struct tcpcb *, struct tcphdr *, int *, struct mbuf *);
 void	 tcp_reass_global_init(void);
 void	 tcp_reass_flush(struct tcpcb *);
 void	 tcp_dooptions(struct tcpopt *, u_char *, int, int);
 void	tcp_dropwithreset(struct mbuf *, struct tcphdr *,
 		     struct tcpcb *, int, int);
 void	tcp_pulloutofband(struct socket *,
 		     struct tcphdr *, struct mbuf *, int);
 void	tcp_xmit_timer(struct tcpcb *, int);
 void	tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *);
 void	cc_ack_received(struct tcpcb *tp, struct tcphdr *th,
 			    uint16_t type);
 void 	cc_conn_init(struct tcpcb *tp);
 void 	cc_post_recovery(struct tcpcb *tp, struct tcphdr *th);
 void	cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type);
 void	hhook_run_tcp_est_in(struct tcpcb *tp,
 			    struct tcphdr *th, struct tcpopt *to);
 
 int	 tcp_input(struct mbuf **, int *, int);
 void	 tcp_do_segment(struct mbuf *, struct tcphdr *,
 			struct socket *, struct tcpcb *, int, int, uint8_t,
 			int);
 
 int register_tcp_functions(struct tcp_function_block *blk, int wait);
 int deregister_tcp_functions(struct tcp_function_block *blk);
 struct tcp_function_block *find_and_ref_tcp_functions(struct tcp_function_set *fs);
 struct tcp_function_block *find_and_ref_tcp_fb(struct tcp_function_block *blk);
 int tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp);
 
 u_long	 tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *);
 u_long	 tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *);
+u_int	 tcp_maxseg(const struct tcpcb *);
 void	 tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *,
 	    struct tcp_ifcap *);
 void	 tcp_mss(struct tcpcb *, int);
 int	 tcp_mssopt(struct in_conninfo *);
 struct inpcb *
 	 tcp_drop_syn_sent(struct inpcb *, int);
 struct tcpcb *
 	 tcp_newtcpcb(struct inpcb *);
 int	 tcp_output(struct tcpcb *);
 void	 tcp_state_change(struct tcpcb *, int);
 void	 tcp_respond(struct tcpcb *, void *,
 	    struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int);
 void	 tcp_tw_init(void);
 #ifdef VIMAGE
 void	 tcp_tw_destroy(void);
 #endif
 void	 tcp_tw_zone_change(void);
 int	 tcp_twcheck(struct inpcb *, struct tcpopt *, struct tcphdr *,
 	    struct mbuf *, int);
 void	 tcp_setpersist(struct tcpcb *);
 #ifdef TCP_SIGNATURE
 struct secasvar;
 struct secasvar *tcp_get_sav(struct mbuf *, u_int);
 int	 tcp_signature_do_compute(struct mbuf *, int, int, u_char *,
 	    struct secasvar *);
 int	 tcp_signature_compute(struct mbuf *, int, int, int, u_char *, u_int);
 int	 tcp_signature_verify(struct mbuf *, int, int, int, struct tcpopt *,
 	    struct tcphdr *, u_int);
 int	tcp_signature_check(struct mbuf *m, int off0, int tlen, int optlen,
 	    struct tcpopt *to, struct tcphdr *th, u_int tcpbflag);
 #endif
 void	 tcp_slowtimo(void);
 struct tcptemp *
 	 tcpip_maketemplate(struct inpcb *);
 void	 tcpip_fillheaders(struct inpcb *, void *, void *);
 void	 tcp_timer_activate(struct tcpcb *, uint32_t, u_int);
 int	 tcp_timer_active(struct tcpcb *, uint32_t);
 void	 tcp_timer_stop(struct tcpcb *, uint32_t);
 void	 tcp_trace(short, short, struct tcpcb *, void *, struct tcphdr *, int);
 /*
  * All tcp_hc_* functions are IPv4 and IPv6 (via in_conninfo)
  */
 void	 tcp_hc_init(void);
 #ifdef VIMAGE
 void	 tcp_hc_destroy(void);
 #endif
 void	 tcp_hc_get(struct in_conninfo *, struct hc_metrics_lite *);
 u_long	 tcp_hc_getmtu(struct in_conninfo *);
 void	 tcp_hc_updatemtu(struct in_conninfo *, u_long);
 void	 tcp_hc_update(struct in_conninfo *, struct hc_metrics_lite *);
 
 extern	struct pr_usrreqs tcp_usrreqs;
 tcp_seq tcp_new_isn(struct tcpcb *);
 
 int	 tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
 void	 tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend);
 void	 tcp_clean_sackreport(struct tcpcb *tp);
 void	 tcp_sack_adjust(struct tcpcb *tp);
 struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt);
 void	 tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
 void	 tcp_free_sackholes(struct tcpcb *tp);
 int	 tcp_newreno(struct tcpcb *, struct tcphdr *);
 u_long	 tcp_seq_subtract(u_long, u_long );
 int	 tcp_compute_pipe(struct tcpcb *);
 
 static inline void
 tcp_fields_to_host(struct tcphdr *th)
 {
 
 	th->th_seq = ntohl(th->th_seq);
 	th->th_ack = ntohl(th->th_ack);
 	th->th_win = ntohs(th->th_win);
 	th->th_urp = ntohs(th->th_urp);
 }
 
 #ifdef TCP_SIGNATURE
 static inline void
 tcp_fields_to_net(struct tcphdr *th)
 {
 
 	th->th_seq = htonl(th->th_seq);
 	th->th_ack = htonl(th->th_ack);
 	th->th_win = htons(th->th_win);
 	th->th_urp = htons(th->th_urp);
 }
 #endif
 #endif /* _KERNEL */
 
 #endif /* _NETINET_TCP_VAR_H_ */
Index: projects/release-pkg/sys/netpfil/pf/pf.c
===================================================================
--- projects/release-pkg/sys/netpfil/pf/pf.c	(revision 293335)
+++ projects/release-pkg/sys/netpfil/pf/pf.c	(revision 293336)
@@ -1,6526 +1,6573 @@
 /*-
  * Copyright (c) 2001 Daniel Hartmeier
  * Copyright (c) 2002 - 2008 Henning Brauer
  * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  *    - Redistributions of source code must retain the above copyright
  *      notice, this list of conditions and the following disclaimer.
  *    - Redistributions in binary form must reproduce the above
  *      copyright notice, this list of conditions and the following
  *      disclaimer in the documentation and/or other materials provided
  *      with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * Effort sponsored in part by the Defense Advanced Research Projects
  * Agency (DARPA) and Air Force Research Laboratory, Air Force
  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
  *
  *	$OpenBSD: pf.c,v 1.634 2009/02/27 12:37:45 henning Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_bpf.h"
 #include "opt_pf.h"
 
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/endian.h>
 #include <sys/hash.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/limits.h>
 #include <sys/mbuf.h>
 #include <sys/md5.h>
 #include <sys/random.h>
 #include <sys/refcount.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <sys/ucred.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/radix_mpath.h>
 #include <net/vnet.h>
 
 #include <net/pfvar.h>
 #include <net/if_pflog.h>
 #include <net/if_pfsync.h>
 
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
+#include <netinet/in_fib.h>
 #include <netinet/ip.h>
 #include <netinet/ip_fw.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/icmp_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 
 #include <netpfil/ipfw/ip_fw_private.h> /* XXX: only for DIR_IN/DIR_OUT */
 
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/nd6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/in6_pcb.h>
+#include <netinet6/in6_fib.h>
+#include <netinet6/scope6_var.h>
 #endif /* INET6 */
 
 #include <machine/in_cksum.h>
 #include <security/mac/mac_framework.h>
 
 #define	DPFPRINTF(n, x)	if (V_pf_status.debug >= (n)) printf x
 
 /*
  * Global variables
  */
 
 /* state tables */
 VNET_DEFINE(struct pf_altqqueue,	 pf_altqs[2]);
 VNET_DEFINE(struct pf_palist,		 pf_pabuf);
 VNET_DEFINE(struct pf_altqqueue *,	 pf_altqs_active);
 VNET_DEFINE(struct pf_altqqueue *,	 pf_altqs_inactive);
 VNET_DEFINE(struct pf_kstatus,		 pf_status);
 
 VNET_DEFINE(u_int32_t,			 ticket_altqs_active);
 VNET_DEFINE(u_int32_t,			 ticket_altqs_inactive);
 VNET_DEFINE(int,			 altqs_inactive_open);
 VNET_DEFINE(u_int32_t,			 ticket_pabuf);
 
 VNET_DEFINE(MD5_CTX,			 pf_tcp_secret_ctx);
 #define	V_pf_tcp_secret_ctx		 VNET(pf_tcp_secret_ctx)
 VNET_DEFINE(u_char,			 pf_tcp_secret[16]);
 #define	V_pf_tcp_secret			 VNET(pf_tcp_secret)
 VNET_DEFINE(int,			 pf_tcp_secret_init);
 #define	V_pf_tcp_secret_init		 VNET(pf_tcp_secret_init)
 VNET_DEFINE(int,			 pf_tcp_iss_off);
 #define	V_pf_tcp_iss_off		 VNET(pf_tcp_iss_off)
 
 /*
  * Queue for pf_intr() sends.
  */
 static MALLOC_DEFINE(M_PFTEMP, "pf_temp", "pf(4) temporary allocations");
 struct pf_send_entry {
 	STAILQ_ENTRY(pf_send_entry)	pfse_next;
 	struct mbuf			*pfse_m;
 	enum {
 		PFSE_IP,
 		PFSE_IP6,
 		PFSE_ICMP,
 		PFSE_ICMP6,
 	}				pfse_type;
 	struct {
 		int		type;
 		int		code;
 		int		mtu;
 	} icmpopts;
 };
 
 STAILQ_HEAD(pf_send_head, pf_send_entry);
 static VNET_DEFINE(struct pf_send_head, pf_sendqueue);
 #define	V_pf_sendqueue	VNET(pf_sendqueue)
 
 static struct mtx pf_sendqueue_mtx;
 MTX_SYSINIT(pf_sendqueue_mtx, &pf_sendqueue_mtx, "pf send queue", MTX_DEF);
 #define	PF_SENDQ_LOCK()		mtx_lock(&pf_sendqueue_mtx)
 #define	PF_SENDQ_UNLOCK()	mtx_unlock(&pf_sendqueue_mtx)
 
 /*
  * Queue for pf_overload_task() tasks.
  */
 struct pf_overload_entry {
 	SLIST_ENTRY(pf_overload_entry)	next;
 	struct pf_addr  		addr;
 	sa_family_t			af;
 	uint8_t				dir;
 	struct pf_rule  		*rule;
 };
 
 SLIST_HEAD(pf_overload_head, pf_overload_entry);
 static VNET_DEFINE(struct pf_overload_head, pf_overloadqueue);
 #define V_pf_overloadqueue	VNET(pf_overloadqueue)
 static VNET_DEFINE(struct task, pf_overloadtask);
 #define	V_pf_overloadtask	VNET(pf_overloadtask)
 
 static struct mtx pf_overloadqueue_mtx;
 MTX_SYSINIT(pf_overloadqueue_mtx, &pf_overloadqueue_mtx,
     "pf overload/flush queue", MTX_DEF);
 #define	PF_OVERLOADQ_LOCK()	mtx_lock(&pf_overloadqueue_mtx)
 #define	PF_OVERLOADQ_UNLOCK()	mtx_unlock(&pf_overloadqueue_mtx)
 
 VNET_DEFINE(struct pf_rulequeue, pf_unlinked_rules);
 struct mtx pf_unlnkdrules_mtx;
 MTX_SYSINIT(pf_unlnkdrules_mtx, &pf_unlnkdrules_mtx, "pf unlinked rules",
     MTX_DEF);
 
 static VNET_DEFINE(uma_zone_t,	pf_sources_z);
 #define	V_pf_sources_z	VNET(pf_sources_z)
 uma_zone_t		pf_mtag_z;
 VNET_DEFINE(uma_zone_t,	 pf_state_z);
 VNET_DEFINE(uma_zone_t,	 pf_state_key_z);
 
 VNET_DEFINE(uint64_t, pf_stateid[MAXCPU]);
 #define	PFID_CPUBITS	8
 #define	PFID_CPUSHIFT	(sizeof(uint64_t) * NBBY - PFID_CPUBITS)
 #define	PFID_CPUMASK	((uint64_t)((1 << PFID_CPUBITS) - 1) <<	PFID_CPUSHIFT)
 #define	PFID_MAXID	(~PFID_CPUMASK)
 CTASSERT((1 << PFID_CPUBITS) >= MAXCPU);
 
 static void		 pf_src_tree_remove_state(struct pf_state *);
 static void		 pf_init_threshold(struct pf_threshold *, u_int32_t,
 			    u_int32_t);
 static void		 pf_add_threshold(struct pf_threshold *);
 static int		 pf_check_threshold(struct pf_threshold *);
 
 static void		 pf_change_ap(struct mbuf *, struct pf_addr *, u_int16_t *,
 			    u_int16_t *, u_int16_t *, struct pf_addr *,
 			    u_int16_t, u_int8_t, sa_family_t);
 static int		 pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
 			    struct tcphdr *, struct pf_state_peer *);
 static void		 pf_change_icmp(struct pf_addr *, u_int16_t *,
 			    struct pf_addr *, struct pf_addr *, u_int16_t,
 			    u_int16_t *, u_int16_t *, u_int16_t *,
 			    u_int16_t *, u_int8_t, sa_family_t);
 static void		 pf_send_tcp(struct mbuf *,
 			    const struct pf_rule *, sa_family_t,
 			    const struct pf_addr *, const struct pf_addr *,
 			    u_int16_t, u_int16_t, u_int32_t, u_int32_t,
 			    u_int8_t, u_int16_t, u_int16_t, u_int8_t, int,
 			    u_int16_t, struct ifnet *);
 static void		 pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
 			    sa_family_t, struct pf_rule *);
 static void		 pf_detach_state(struct pf_state *);
 static int		 pf_state_key_attach(struct pf_state_key *,
 			    struct pf_state_key *, struct pf_state *);
 static void		 pf_state_key_detach(struct pf_state *, int);
 static int		 pf_state_key_ctor(void *, int, void *, int);
 static u_int32_t	 pf_tcp_iss(struct pf_pdesc *);
 static int		 pf_test_rule(struct pf_rule **, struct pf_state **,
 			    int, struct pfi_kif *, struct mbuf *, int,
 			    struct pf_pdesc *, struct pf_rule **,
 			    struct pf_ruleset **, struct inpcb *);
 static int		 pf_create_state(struct pf_rule *, struct pf_rule *,
 			    struct pf_rule *, struct pf_pdesc *,
 			    struct pf_src_node *, struct pf_state_key *,
 			    struct pf_state_key *, struct mbuf *, int,
 			    u_int16_t, u_int16_t, int *, struct pfi_kif *,
 			    struct pf_state **, int, u_int16_t, u_int16_t,
 			    int);
 static int		 pf_test_fragment(struct pf_rule **, int,
 			    struct pfi_kif *, struct mbuf *, void *,
 			    struct pf_pdesc *, struct pf_rule **,
 			    struct pf_ruleset **);
 static int		 pf_tcp_track_full(struct pf_state_peer *,
 			    struct pf_state_peer *, struct pf_state **,
 			    struct pfi_kif *, struct mbuf *, int,
 			    struct pf_pdesc *, u_short *, int *);
 static int		 pf_tcp_track_sloppy(struct pf_state_peer *,
 			    struct pf_state_peer *, struct pf_state **,
 			    struct pf_pdesc *, u_short *);
 static int		 pf_test_state_tcp(struct pf_state **, int,
 			    struct pfi_kif *, struct mbuf *, int,
 			    void *, struct pf_pdesc *, u_short *);
 static int		 pf_test_state_udp(struct pf_state **, int,
 			    struct pfi_kif *, struct mbuf *, int,
 			    void *, struct pf_pdesc *);
 static int		 pf_test_state_icmp(struct pf_state **, int,
 			    struct pfi_kif *, struct mbuf *, int,
 			    void *, struct pf_pdesc *, u_short *);
 static int		 pf_test_state_other(struct pf_state **, int,
 			    struct pfi_kif *, struct mbuf *, struct pf_pdesc *);
 static u_int8_t		 pf_get_wscale(struct mbuf *, int, u_int16_t,
 			    sa_family_t);
 static u_int16_t	 pf_get_mss(struct mbuf *, int, u_int16_t,
 			    sa_family_t);
 static u_int16_t	 pf_calc_mss(struct pf_addr *, sa_family_t,
 				int, u_int16_t);
 static int		 pf_check_proto_cksum(struct mbuf *, int, int,
 			    u_int8_t, sa_family_t);
 static void		 pf_print_state_parts(struct pf_state *,
 			    struct pf_state_key *, struct pf_state_key *);
 static int		 pf_addr_wrap_neq(struct pf_addr_wrap *,
 			    struct pf_addr_wrap *);
 static struct pf_state	*pf_find_state(struct pfi_kif *,
 			    struct pf_state_key_cmp *, u_int);
 static int		 pf_src_connlimit(struct pf_state **);
 static void		 pf_overload_task(void *v, int pending);
 static int		 pf_insert_src_node(struct pf_src_node **,
 			    struct pf_rule *, struct pf_addr *, sa_family_t);
 static u_int		 pf_purge_expired_states(u_int, int);
 static void		 pf_purge_unlinked_rules(void);
 static int		 pf_mtag_uminit(void *, int, int);
 static void		 pf_mtag_free(struct m_tag *);
 #ifdef INET
 static void		 pf_route(struct mbuf **, struct pf_rule *, int,
 			    struct ifnet *, struct pf_state *,
 			    struct pf_pdesc *);
 #endif /* INET */
 #ifdef INET6
 static void		 pf_change_a6(struct pf_addr *, u_int16_t *,
 			    struct pf_addr *, u_int8_t);
 static void		 pf_route6(struct mbuf **, struct pf_rule *, int,
 			    struct ifnet *, struct pf_state *,
 			    struct pf_pdesc *);
 #endif /* INET6 */
 
 int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len);
 
 VNET_DECLARE(int, pf_end_threads);
 
 VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]);
 
 #define	PACKET_LOOPED(pd)	((pd)->pf_mtag &&			\
 				 (pd)->pf_mtag->flags & PF_PACKET_LOOPED)
 
 #define	STATE_LOOKUP(i, k, d, s, pd)					\
 	do {								\
 		(s) = pf_find_state((i), (k), (d));			\
 		if ((s) == NULL)					\
 			return (PF_DROP);				\
 		if (PACKET_LOOPED(pd))					\
 			return (PF_PASS);				\
 		if ((d) == PF_OUT &&					\
 		    (((s)->rule.ptr->rt == PF_ROUTETO &&		\
 		    (s)->rule.ptr->direction == PF_OUT) ||		\
 		    ((s)->rule.ptr->rt == PF_REPLYTO &&			\
 		    (s)->rule.ptr->direction == PF_IN)) &&		\
 		    (s)->rt_kif != NULL &&				\
 		    (s)->rt_kif != (i))					\
 			return (PF_PASS);				\
 	} while (0)
 
 #define	BOUND_IFACE(r, k) \
 	((r)->rule_flag & PFRULE_IFBOUND) ? (k) : V_pfi_all
 
 #define	STATE_INC_COUNTERS(s)						\
 	do {								\
 		counter_u64_add(s->rule.ptr->states_cur, 1);		\
 		counter_u64_add(s->rule.ptr->states_tot, 1);		\
 		if (s->anchor.ptr != NULL) {				\
 			counter_u64_add(s->anchor.ptr->states_cur, 1);	\
 			counter_u64_add(s->anchor.ptr->states_tot, 1);	\
 		}							\
 		if (s->nat_rule.ptr != NULL) {				\
 			counter_u64_add(s->nat_rule.ptr->states_cur, 1);\
 			counter_u64_add(s->nat_rule.ptr->states_tot, 1);\
 		}							\
 	} while (0)
 
 #define	STATE_DEC_COUNTERS(s)						\
 	do {								\
 		if (s->nat_rule.ptr != NULL)				\
 			counter_u64_add(s->nat_rule.ptr->states_cur, -1);\
 		if (s->anchor.ptr != NULL)				\
 			counter_u64_add(s->anchor.ptr->states_cur, -1);	\
 		counter_u64_add(s->rule.ptr->states_cur, -1);		\
 	} while (0)
 
 static MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures");
 VNET_DEFINE(struct pf_keyhash *, pf_keyhash);
 VNET_DEFINE(struct pf_idhash *, pf_idhash);
 VNET_DEFINE(struct pf_srchash *, pf_srchash);
 
 SYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW, 0, "pf(4)");
 
 u_long	pf_hashmask;
 u_long	pf_srchashmask;
 static u_long	pf_hashsize;
 static u_long	pf_srchashsize;
 
 SYSCTL_ULONG(_net_pf, OID_AUTO, states_hashsize, CTLFLAG_RDTUN,
     &pf_hashsize, 0, "Size of pf(4) states hashtable");
 SYSCTL_ULONG(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_RDTUN,
     &pf_srchashsize, 0, "Size of pf(4) source nodes hashtable");
 
 VNET_DEFINE(void *, pf_swi_cookie);
 
 VNET_DEFINE(uint32_t, pf_hashseed);
 #define	V_pf_hashseed	VNET(pf_hashseed)
 
 int
 pf_addr_cmp(struct pf_addr *a, struct pf_addr *b, sa_family_t af)
 {
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		if (a->addr32[0] > b->addr32[0])
 			return (1);
 		if (a->addr32[0] < b->addr32[0])
 			return (-1);
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		if (a->addr32[3] > b->addr32[3])
 			return (1);
 		if (a->addr32[3] < b->addr32[3])
 			return (-1);
 		if (a->addr32[2] > b->addr32[2])
 			return (1);
 		if (a->addr32[2] < b->addr32[2])
 			return (-1);
 		if (a->addr32[1] > b->addr32[1])
 			return (1);
 		if (a->addr32[1] < b->addr32[1])
 			return (-1);
 		if (a->addr32[0] > b->addr32[0])
 			return (1);
 		if (a->addr32[0] < b->addr32[0])
 			return (-1);
 		break;
 #endif /* INET6 */
 	default:
 		panic("%s: unknown address family %u", __func__, af);
 	}
 	return (0);
 }
 
 static __inline uint32_t
 pf_hashkey(struct pf_state_key *sk)
 {
 	uint32_t h;
 
 	h = murmur3_32_hash32((uint32_t *)sk,
 	    sizeof(struct pf_state_key_cmp)/sizeof(uint32_t),
 	    V_pf_hashseed);
 
 	return (h & pf_hashmask);
 }
 
 static __inline uint32_t
 pf_hashsrc(struct pf_addr *addr, sa_family_t af)
 {
 	uint32_t h;
 
 	switch (af) {
 	case AF_INET:
 		h = murmur3_32_hash32((uint32_t *)&addr->v4,
 		    sizeof(addr->v4)/sizeof(uint32_t), V_pf_hashseed);
 		break;
 	case AF_INET6:
 		h = murmur3_32_hash32((uint32_t *)&addr->v6,
 		    sizeof(addr->v6)/sizeof(uint32_t), V_pf_hashseed);
 		break;
 	default:
 		panic("%s: unknown address family %u", __func__, af);
 	}
 
 	return (h & pf_srchashmask);
 }
 
 #ifdef ALTQ
 static int
 pf_state_hash(struct pf_state *s)
 {
 	u_int32_t hv = (intptr_t)s / sizeof(*s);
 
 	hv ^= crc32(&s->src, sizeof(s->src));
 	hv ^= crc32(&s->dst, sizeof(s->dst));
 	if (hv == 0)
 		hv = 1;
 	return (hv);
 }
 #endif
 
 #ifdef INET6
 void
 pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
 {
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		dst->addr32[0] = src->addr32[0];
 		break;
 #endif /* INET */
 	case AF_INET6:
 		dst->addr32[0] = src->addr32[0];
 		dst->addr32[1] = src->addr32[1];
 		dst->addr32[2] = src->addr32[2];
 		dst->addr32[3] = src->addr32[3];
 		break;
 	}
 }
 #endif /* INET6 */
 
 static void
 pf_init_threshold(struct pf_threshold *threshold,
     u_int32_t limit, u_int32_t seconds)
 {
 	threshold->limit = limit * PF_THRESHOLD_MULT;
 	threshold->seconds = seconds;
 	threshold->count = 0;
 	threshold->last = time_uptime;
 }
 
 static void
 pf_add_threshold(struct pf_threshold *threshold)
 {
 	u_int32_t t = time_uptime, diff = t - threshold->last;
 
 	if (diff >= threshold->seconds)
 		threshold->count = 0;
 	else
 		threshold->count -= threshold->count * diff /
 		    threshold->seconds;
 	threshold->count += PF_THRESHOLD_MULT;
 	threshold->last = t;
 }
 
 static int
 pf_check_threshold(struct pf_threshold *threshold)
 {
 	return (threshold->count > threshold->limit);
 }
 
 static int
 pf_src_connlimit(struct pf_state **state)
 {
 	struct pf_overload_entry *pfoe;
 	int bad = 0;
 
 	PF_STATE_LOCK_ASSERT(*state);
 
 	(*state)->src_node->conn++;
 	(*state)->src.tcp_est = 1;
 	pf_add_threshold(&(*state)->src_node->conn_rate);
 
 	if ((*state)->rule.ptr->max_src_conn &&
 	    (*state)->rule.ptr->max_src_conn <
 	    (*state)->src_node->conn) {
 		counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONN], 1);
 		bad++;
 	}
 
 	if ((*state)->rule.ptr->max_src_conn_rate.limit &&
 	    pf_check_threshold(&(*state)->src_node->conn_rate)) {
 		counter_u64_add(V_pf_status.lcounters[LCNT_SRCCONNRATE], 1);
 		bad++;
 	}
 
 	if (!bad)
 		return (0);
 
 	/* Kill this state. */
 	(*state)->timeout = PFTM_PURGE;
 	(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
 
 	if ((*state)->rule.ptr->overload_tbl == NULL)
 		return (1);
 
 	/* Schedule overloading and flushing task. */
 	pfoe = malloc(sizeof(*pfoe), M_PFTEMP, M_NOWAIT);
 	if (pfoe == NULL)
 		return (1);	/* too bad :( */
 
 	bcopy(&(*state)->src_node->addr, &pfoe->addr, sizeof(pfoe->addr));
 	pfoe->af = (*state)->key[PF_SK_WIRE]->af;
 	pfoe->rule = (*state)->rule.ptr;
 	pfoe->dir = (*state)->direction;
 	PF_OVERLOADQ_LOCK();
 	SLIST_INSERT_HEAD(&V_pf_overloadqueue, pfoe, next);
 	PF_OVERLOADQ_UNLOCK();
 	taskqueue_enqueue(taskqueue_swi, &V_pf_overloadtask);
 
 	return (1);
 }
 
 static void
 pf_overload_task(void *v, int pending)
 {
 	struct pf_overload_head queue;
 	struct pfr_addr p;
 	struct pf_overload_entry *pfoe, *pfoe1;
 	uint32_t killed = 0;
 
 	CURVNET_SET((struct vnet *)v);
 
 	PF_OVERLOADQ_LOCK();
 	queue = V_pf_overloadqueue;
 	SLIST_INIT(&V_pf_overloadqueue);
 	PF_OVERLOADQ_UNLOCK();
 
 	bzero(&p, sizeof(p));
 	SLIST_FOREACH(pfoe, &queue, next) {
 		counter_u64_add(V_pf_status.lcounters[LCNT_OVERLOAD_TABLE], 1);
 		if (V_pf_status.debug >= PF_DEBUG_MISC) {
 			printf("%s: blocking address ", __func__);
 			pf_print_host(&pfoe->addr, 0, pfoe->af);
 			printf("\n");
 		}
 
 		p.pfra_af = pfoe->af;
 		switch (pfoe->af) {
 #ifdef INET
 		case AF_INET:
 			p.pfra_net = 32;
 			p.pfra_ip4addr = pfoe->addr.v4;
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			p.pfra_net = 128;
 			p.pfra_ip6addr = pfoe->addr.v6;
 			break;
 #endif
 		}
 
 		PF_RULES_WLOCK();
 		pfr_insert_kentry(pfoe->rule->overload_tbl, &p, time_second);
 		PF_RULES_WUNLOCK();
 	}
 
 	/*
 	 * Remove those entries, that don't need flushing.
 	 */
 	SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1)
 		if (pfoe->rule->flush == 0) {
 			SLIST_REMOVE(&queue, pfoe, pf_overload_entry, next);
 			free(pfoe, M_PFTEMP);
 		} else
 			counter_u64_add(
 			    V_pf_status.lcounters[LCNT_OVERLOAD_FLUSH], 1);
 
 	/* If nothing to flush, return. */
 	if (SLIST_EMPTY(&queue)) {
 		CURVNET_RESTORE();
 		return;
 	}
 
 	for (int i = 0; i <= pf_hashmask; i++) {
 		struct pf_idhash *ih = &V_pf_idhash[i];
 		struct pf_state_key *sk;
 		struct pf_state *s;
 
 		PF_HASHROW_LOCK(ih);
 		LIST_FOREACH(s, &ih->states, entry) {
 		    sk = s->key[PF_SK_WIRE];
 		    SLIST_FOREACH(pfoe, &queue, next)
 			if (sk->af == pfoe->af &&
 			    ((pfoe->rule->flush & PF_FLUSH_GLOBAL) ||
 			    pfoe->rule == s->rule.ptr) &&
 			    ((pfoe->dir == PF_OUT &&
 			    PF_AEQ(&pfoe->addr, &sk->addr[1], sk->af)) ||
 			    (pfoe->dir == PF_IN &&
 			    PF_AEQ(&pfoe->addr, &sk->addr[0], sk->af)))) {
 				s->timeout = PFTM_PURGE;
 				s->src.state = s->dst.state = TCPS_CLOSED;
 				killed++;
 			}
 		}
 		PF_HASHROW_UNLOCK(ih);
 	}
 	SLIST_FOREACH_SAFE(pfoe, &queue, next, pfoe1)
 		free(pfoe, M_PFTEMP);
 	if (V_pf_status.debug >= PF_DEBUG_MISC)
 		printf("%s: %u states killed", __func__, killed);
 
 	CURVNET_RESTORE();
 }
 
 /*
  * Can return locked on failure, so that we can consistently
  * allocate and insert a new one.
  */
 struct pf_src_node *
 pf_find_src_node(struct pf_addr *src, struct pf_rule *rule, sa_family_t af,
 	int returnlocked)
 {
 	struct pf_srchash *sh;
 	struct pf_src_node *n;
 
 	counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_SEARCH], 1);
 
 	sh = &V_pf_srchash[pf_hashsrc(src, af)];
 	PF_HASHROW_LOCK(sh);
 	LIST_FOREACH(n, &sh->nodes, entry)
 		if (n->rule.ptr == rule && n->af == af &&
 		    ((af == AF_INET && n->addr.v4.s_addr == src->v4.s_addr) ||
 		    (af == AF_INET6 && bcmp(&n->addr, src, sizeof(*src)) == 0)))
 			break;
 	if (n != NULL) {
 		n->states++;
 		PF_HASHROW_UNLOCK(sh);
 	} else if (returnlocked == 0)
 		PF_HASHROW_UNLOCK(sh);
 
 	return (n);
 }
 
 static int
 pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
     struct pf_addr *src, sa_family_t af)
 {
 
 	KASSERT((rule->rule_flag & PFRULE_RULESRCTRACK ||
 	    rule->rpool.opts & PF_POOL_STICKYADDR),
 	    ("%s for non-tracking rule %p", __func__, rule));
 
 	if (*sn == NULL)
 		*sn = pf_find_src_node(src, rule, af, 1);
 
 	if (*sn == NULL) {
 		struct pf_srchash *sh = &V_pf_srchash[pf_hashsrc(src, af)];
 
 		PF_HASHROW_ASSERT(sh);
 
 		if (!rule->max_src_nodes ||
 		    counter_u64_fetch(rule->src_nodes) < rule->max_src_nodes)
 			(*sn) = uma_zalloc(V_pf_sources_z, M_NOWAIT | M_ZERO);
 		else
 			counter_u64_add(V_pf_status.lcounters[LCNT_SRCNODES],
 			    1);
 		if ((*sn) == NULL) {
 			PF_HASHROW_UNLOCK(sh);
 			return (-1);
 		}
 
 		pf_init_threshold(&(*sn)->conn_rate,
 		    rule->max_src_conn_rate.limit,
 		    rule->max_src_conn_rate.seconds);
 
 		(*sn)->af = af;
 		(*sn)->rule.ptr = rule;
 		PF_ACPY(&(*sn)->addr, src, af);
 		LIST_INSERT_HEAD(&sh->nodes, *sn, entry);
 		(*sn)->creation = time_uptime;
 		(*sn)->ruletype = rule->action;
 		(*sn)->states = 1;
 		if ((*sn)->rule.ptr != NULL)
 			counter_u64_add((*sn)->rule.ptr->src_nodes, 1);
 		PF_HASHROW_UNLOCK(sh);
 		counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_INSERT], 1);
 	} else {
 		if (rule->max_src_states &&
 		    (*sn)->states >= rule->max_src_states) {
 			counter_u64_add(V_pf_status.lcounters[LCNT_SRCSTATES],
 			    1);
 			return (-1);
 		}
 	}
 	return (0);
 }
 
 void
 pf_unlink_src_node(struct pf_src_node *src)
 {
 
 	PF_HASHROW_ASSERT(&V_pf_srchash[pf_hashsrc(&src->addr, src->af)]);
 	LIST_REMOVE(src, entry);
 	if (src->rule.ptr)
 		counter_u64_add(src->rule.ptr->src_nodes, -1);
 }
 
 u_int
 pf_free_src_nodes(struct pf_src_node_list *head)
 {
 	struct pf_src_node *sn, *tmp;
 	u_int count = 0;
 
 	LIST_FOREACH_SAFE(sn, head, entry, tmp) {
 		uma_zfree(V_pf_sources_z, sn);
 		count++;
 	}
 
 	counter_u64_add(V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], count);
 
 	return (count);
 }
 
 void
 pf_mtag_initialize()
 {
 
 	pf_mtag_z = uma_zcreate("pf mtags", sizeof(struct m_tag) +
 	    sizeof(struct pf_mtag), NULL, NULL, pf_mtag_uminit, NULL,
 	    UMA_ALIGN_PTR, 0);
 }
 
 /* Per-vnet data storage structures initialization. */
 void
 pf_initialize()
 {
 	struct pf_keyhash	*kh;
 	struct pf_idhash	*ih;
 	struct pf_srchash	*sh;
 	u_int i;
 
 	if (pf_hashsize == 0 || !powerof2(pf_hashsize))
 		pf_hashsize = PF_HASHSIZ;
 	if (pf_srchashsize == 0 || !powerof2(pf_srchashsize))
 		pf_srchashsize = PF_HASHSIZ / 4;
 
 	V_pf_hashseed = arc4random();
 
 	/* States and state keys storage. */
 	V_pf_state_z = uma_zcreate("pf states", sizeof(struct pf_state),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	V_pf_limits[PF_LIMIT_STATES].zone = V_pf_state_z;
 	uma_zone_set_max(V_pf_state_z, PFSTATE_HIWAT);
 	uma_zone_set_warning(V_pf_state_z, "PF states limit reached");
 
 	V_pf_state_key_z = uma_zcreate("pf state keys",
 	    sizeof(struct pf_state_key), pf_state_key_ctor, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, 0);
 	V_pf_keyhash = malloc(pf_hashsize * sizeof(struct pf_keyhash),
 	    M_PFHASH, M_WAITOK | M_ZERO);
 	V_pf_idhash = malloc(pf_hashsize * sizeof(struct pf_idhash),
 	    M_PFHASH, M_WAITOK | M_ZERO);
 	pf_hashmask = pf_hashsize - 1;
 	for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= pf_hashmask;
 	    i++, kh++, ih++) {
 		mtx_init(&kh->lock, "pf_keyhash", NULL, MTX_DEF | MTX_DUPOK);
 		mtx_init(&ih->lock, "pf_idhash", NULL, MTX_DEF);
 	}
 
 	/* Source nodes. */
 	V_pf_sources_z = uma_zcreate("pf source nodes",
 	    sizeof(struct pf_src_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
 	    0);
 	V_pf_limits[PF_LIMIT_SRC_NODES].zone = V_pf_sources_z;
 	uma_zone_set_max(V_pf_sources_z, PFSNODE_HIWAT);
 	uma_zone_set_warning(V_pf_sources_z, "PF source nodes limit reached");
 	V_pf_srchash = malloc(pf_srchashsize * sizeof(struct pf_srchash),
 	  M_PFHASH, M_WAITOK|M_ZERO);
 	pf_srchashmask = pf_srchashsize - 1;
 	for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++)
 		mtx_init(&sh->lock, "pf_srchash", NULL, MTX_DEF);
 
 	/* ALTQ */
 	TAILQ_INIT(&V_pf_altqs[0]);
 	TAILQ_INIT(&V_pf_altqs[1]);
 	TAILQ_INIT(&V_pf_pabuf);
 	V_pf_altqs_active = &V_pf_altqs[0];
 	V_pf_altqs_inactive = &V_pf_altqs[1];
 
 	/* Send & overload+flush queues. */
 	STAILQ_INIT(&V_pf_sendqueue);
 	SLIST_INIT(&V_pf_overloadqueue);
 	TASK_INIT(&V_pf_overloadtask, 0, pf_overload_task, curvnet);
 
 	/* Unlinked, but may be referenced rules. */
 	TAILQ_INIT(&V_pf_unlinked_rules);
 }
 
 void
 pf_mtag_cleanup()
 {
 
 	uma_zdestroy(pf_mtag_z);
 }
 
 void
 pf_cleanup()
 {
 	struct pf_keyhash	*kh;
 	struct pf_idhash	*ih;
 	struct pf_srchash	*sh;
 	struct pf_send_entry	*pfse, *next;
 	u_int i;
 
 	for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= pf_hashmask;
 	    i++, kh++, ih++) {
 		KASSERT(LIST_EMPTY(&kh->keys), ("%s: key hash not empty",
 		    __func__));
 		KASSERT(LIST_EMPTY(&ih->states), ("%s: id hash not empty",
 		    __func__));
 		mtx_destroy(&kh->lock);
 		mtx_destroy(&ih->lock);
 	}
 	free(V_pf_keyhash, M_PFHASH);
 	free(V_pf_idhash, M_PFHASH);
 
 	for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++) {
 		KASSERT(LIST_EMPTY(&sh->nodes),
 		    ("%s: source node hash not empty", __func__));
 		mtx_destroy(&sh->lock);
 	}
 	free(V_pf_srchash, M_PFHASH);
 
 	STAILQ_FOREACH_SAFE(pfse, &V_pf_sendqueue, pfse_next, next) {
 		m_freem(pfse->pfse_m);
 		free(pfse, M_PFTEMP);
 	}
 
 	uma_zdestroy(V_pf_sources_z);
 	uma_zdestroy(V_pf_state_z);
 	uma_zdestroy(V_pf_state_key_z);
 }
 
 static int
 pf_mtag_uminit(void *mem, int size, int how)
 {
 	struct m_tag *t;
 
 	t = (struct m_tag *)mem;
 	t->m_tag_cookie = MTAG_ABI_COMPAT;
 	t->m_tag_id = PACKET_TAG_PF;
 	t->m_tag_len = sizeof(struct pf_mtag);
 	t->m_tag_free = pf_mtag_free;
 
 	return (0);
 }
 
 static void
 pf_mtag_free(struct m_tag *t)
 {
 
 	uma_zfree(pf_mtag_z, t);
 }
 
 struct pf_mtag *
 pf_get_mtag(struct mbuf *m)
 {
 	struct m_tag *mtag;
 
 	if ((mtag = m_tag_find(m, PACKET_TAG_PF, NULL)) != NULL)
 		return ((struct pf_mtag *)(mtag + 1));
 
 	mtag = uma_zalloc(pf_mtag_z, M_NOWAIT);
 	if (mtag == NULL)
 		return (NULL);
 	bzero(mtag + 1, sizeof(struct pf_mtag));
 	m_tag_prepend(m, mtag);
 
 	return ((struct pf_mtag *)(mtag + 1));
 }
 
 static int
 pf_state_key_attach(struct pf_state_key *skw, struct pf_state_key *sks,
     struct pf_state *s)
 {
 	struct pf_keyhash	*khs, *khw, *kh;
 	struct pf_state_key	*sk, *cur;
 	struct pf_state		*si, *olds = NULL;
 	int idx;
 
 	KASSERT(s->refs == 0, ("%s: state not pristine", __func__));
 	KASSERT(s->key[PF_SK_WIRE] == NULL, ("%s: state has key", __func__));
 	KASSERT(s->key[PF_SK_STACK] == NULL, ("%s: state has key", __func__));
 
 	/*
 	 * We need to lock hash slots of both keys. To avoid deadlock
 	 * we always lock the slot with lower address first. Unlock order
 	 * isn't important.
 	 *
 	 * We also need to lock ID hash slot before dropping key
 	 * locks. On success we return with ID hash slot locked.
 	 */
 
 	if (skw == sks) {
 		khs = khw = &V_pf_keyhash[pf_hashkey(skw)];
 		PF_HASHROW_LOCK(khs);
 	} else {
 		khs = &V_pf_keyhash[pf_hashkey(sks)];
 		khw = &V_pf_keyhash[pf_hashkey(skw)];
 		if (khs == khw) {
 			PF_HASHROW_LOCK(khs);
 		} else if (khs < khw) {
 			PF_HASHROW_LOCK(khs);
 			PF_HASHROW_LOCK(khw);
 		} else {
 			PF_HASHROW_LOCK(khw);
 			PF_HASHROW_LOCK(khs);
 		}
 	}
 
 #define	KEYS_UNLOCK()	do {			\
 	if (khs != khw) {			\
 		PF_HASHROW_UNLOCK(khs);		\
 		PF_HASHROW_UNLOCK(khw);		\
 	} else					\
 		PF_HASHROW_UNLOCK(khs);		\
 } while (0)
 
 	/*
 	 * First run: start with wire key.
 	 */
 	sk = skw;
 	kh = khw;
 	idx = PF_SK_WIRE;
 
 keyattach:
 	LIST_FOREACH(cur, &kh->keys, entry)
 		if (bcmp(cur, sk, sizeof(struct pf_state_key_cmp)) == 0)
 			break;
 
 	if (cur != NULL) {
 		/* Key exists. Check for same kif, if none, add to key. */
 		TAILQ_FOREACH(si, &cur->states[idx], key_list[idx]) {
 			struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(si)];
 
 			PF_HASHROW_LOCK(ih);
 			if (si->kif == s->kif &&
 			    si->direction == s->direction) {
 				if (sk->proto == IPPROTO_TCP &&
 				    si->src.state >= TCPS_FIN_WAIT_2 &&
 				    si->dst.state >= TCPS_FIN_WAIT_2) {
 					/*
 					 * New state matches an old >FIN_WAIT_2
 					 * state. We can't drop key hash locks,
 					 * thus we can't unlink it properly.
 					 *
 					 * As a workaround we drop it into
 					 * TCPS_CLOSED state, schedule purge
 					 * ASAP and push it into the very end
 					 * of the slot TAILQ, so that it won't
 					 * conflict with our new state.
 					 */
 					si->src.state = si->dst.state =
 					    TCPS_CLOSED;
 					si->timeout = PFTM_PURGE;
 					olds = si;
 				} else {
 					if (V_pf_status.debug >= PF_DEBUG_MISC) {
 						printf("pf: %s key attach "
 						    "failed on %s: ",
 						    (idx == PF_SK_WIRE) ?
 						    "wire" : "stack",
 						    s->kif->pfik_name);
 						pf_print_state_parts(s,
 						    (idx == PF_SK_WIRE) ?
 						    sk : NULL,
 						    (idx == PF_SK_STACK) ?
 						    sk : NULL);
 						printf(", existing: ");
 						pf_print_state_parts(si,
 						    (idx == PF_SK_WIRE) ?
 						    sk : NULL,
 						    (idx == PF_SK_STACK) ?
 						    sk : NULL);
 						printf("\n");
 					}
 					PF_HASHROW_UNLOCK(ih);
 					KEYS_UNLOCK();
 					uma_zfree(V_pf_state_key_z, sk);
 					if (idx == PF_SK_STACK)
 						pf_detach_state(s);
 					return (EEXIST); /* collision! */
 				}
 			}
 			PF_HASHROW_UNLOCK(ih);
 		}
 		uma_zfree(V_pf_state_key_z, sk);
 		s->key[idx] = cur;
 	} else {
 		LIST_INSERT_HEAD(&kh->keys, sk, entry);
 		s->key[idx] = sk;
 	}
 
 stateattach:
 	/* List is sorted, if-bound states before floating. */
 	if (s->kif == V_pfi_all)
 		TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], s, key_list[idx]);
 	else
 		TAILQ_INSERT_HEAD(&s->key[idx]->states[idx], s, key_list[idx]);
 
 	if (olds) {
 		TAILQ_REMOVE(&s->key[idx]->states[idx], olds, key_list[idx]);
 		TAILQ_INSERT_TAIL(&s->key[idx]->states[idx], olds,
 		    key_list[idx]);
 		olds = NULL;
 	}
 
 	/*
 	 * Attach done. See how should we (or should not?)
 	 * attach a second key.
 	 */
 	if (sks == skw) {
 		s->key[PF_SK_STACK] = s->key[PF_SK_WIRE];
 		idx = PF_SK_STACK;
 		sks = NULL;
 		goto stateattach;
 	} else if (sks != NULL) {
 		/*
 		 * Continue attaching with stack key.
 		 */
 		sk = sks;
 		kh = khs;
 		idx = PF_SK_STACK;
 		sks = NULL;
 		goto keyattach;
 	}
 
 	PF_STATE_LOCK(s);
 	KEYS_UNLOCK();
 
 	KASSERT(s->key[PF_SK_WIRE] != NULL && s->key[PF_SK_STACK] != NULL,
 	    ("%s failure", __func__));
 
 	return (0);
 #undef	KEYS_UNLOCK
 }
 
 static void
 pf_detach_state(struct pf_state *s)
 {
 	struct pf_state_key *sks = s->key[PF_SK_STACK];
 	struct pf_keyhash *kh;
 
 	if (sks != NULL) {
 		kh = &V_pf_keyhash[pf_hashkey(sks)];
 		PF_HASHROW_LOCK(kh);
 		if (s->key[PF_SK_STACK] != NULL)
 			pf_state_key_detach(s, PF_SK_STACK);
 		/*
 		 * If both point to same key, then we are done.
 		 */
 		if (sks == s->key[PF_SK_WIRE]) {
 			pf_state_key_detach(s, PF_SK_WIRE);
 			PF_HASHROW_UNLOCK(kh);
 			return;
 		}
 		PF_HASHROW_UNLOCK(kh);
 	}
 
 	if (s->key[PF_SK_WIRE] != NULL) {
 		kh = &V_pf_keyhash[pf_hashkey(s->key[PF_SK_WIRE])];
 		PF_HASHROW_LOCK(kh);
 		if (s->key[PF_SK_WIRE] != NULL)
 			pf_state_key_detach(s, PF_SK_WIRE);
 		PF_HASHROW_UNLOCK(kh);
 	}
 }
 
 static void
 pf_state_key_detach(struct pf_state *s, int idx)
 {
 	struct pf_state_key *sk = s->key[idx];
 #ifdef INVARIANTS
 	struct pf_keyhash *kh = &V_pf_keyhash[pf_hashkey(sk)];
 
 	PF_HASHROW_ASSERT(kh);
 #endif
 	TAILQ_REMOVE(&sk->states[idx], s, key_list[idx]);
 	s->key[idx] = NULL;
 
 	if (TAILQ_EMPTY(&sk->states[0]) && TAILQ_EMPTY(&sk->states[1])) {
 		LIST_REMOVE(sk, entry);
 		uma_zfree(V_pf_state_key_z, sk);
 	}
 }
 
 static int
 pf_state_key_ctor(void *mem, int size, void *arg, int flags)
 {
 	struct pf_state_key *sk = mem;
 
 	bzero(sk, sizeof(struct pf_state_key_cmp));
 	TAILQ_INIT(&sk->states[PF_SK_WIRE]);
 	TAILQ_INIT(&sk->states[PF_SK_STACK]);
 
 	return (0);
 }
 
 struct pf_state_key *
 pf_state_key_setup(struct pf_pdesc *pd, struct pf_addr *saddr,
 	struct pf_addr *daddr, u_int16_t sport, u_int16_t dport)
 {
 	struct pf_state_key *sk;
 
 	sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
 	if (sk == NULL)
 		return (NULL);
 
 	PF_ACPY(&sk->addr[pd->sidx], saddr, pd->af);
 	PF_ACPY(&sk->addr[pd->didx], daddr, pd->af);
 	sk->port[pd->sidx] = sport;
 	sk->port[pd->didx] = dport;
 	sk->proto = pd->proto;
 	sk->af = pd->af;
 
 	return (sk);
 }
 
 struct pf_state_key *
 pf_state_key_clone(struct pf_state_key *orig)
 {
 	struct pf_state_key *sk;
 
 	sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
 	if (sk == NULL)
 		return (NULL);
 
 	bcopy(orig, sk, sizeof(struct pf_state_key_cmp));
 
 	return (sk);
 }
 
 int
 pf_state_insert(struct pfi_kif *kif, struct pf_state_key *skw,
     struct pf_state_key *sks, struct pf_state *s)
 {
 	struct pf_idhash *ih;
 	struct pf_state *cur;
 	int error;
 
 	KASSERT(TAILQ_EMPTY(&sks->states[0]) && TAILQ_EMPTY(&sks->states[1]),
 	    ("%s: sks not pristine", __func__));
 	KASSERT(TAILQ_EMPTY(&skw->states[0]) && TAILQ_EMPTY(&skw->states[1]),
 	    ("%s: skw not pristine", __func__));
 	KASSERT(s->refs == 0, ("%s: state not pristine", __func__));
 
 	s->kif = kif;
 
 	if (s->id == 0 && s->creatorid == 0) {
 		/* XXX: should be atomic, but probability of collision low */
 		if ((s->id = V_pf_stateid[curcpu]++) == PFID_MAXID)
 			V_pf_stateid[curcpu] = 1;
 		s->id |= (uint64_t )curcpu << PFID_CPUSHIFT;
 		s->id = htobe64(s->id);
 		s->creatorid = V_pf_status.hostid;
 	}
 
 	/* Returns with ID locked on success. */
 	if ((error = pf_state_key_attach(skw, sks, s)) != 0)
 		return (error);
 
 	ih = &V_pf_idhash[PF_IDHASH(s)];
 	PF_HASHROW_ASSERT(ih);
 	LIST_FOREACH(cur, &ih->states, entry)
 		if (cur->id == s->id && cur->creatorid == s->creatorid)
 			break;
 
 	if (cur != NULL) {
 		PF_HASHROW_UNLOCK(ih);
 		if (V_pf_status.debug >= PF_DEBUG_MISC) {
 			printf("pf: state ID collision: "
 			    "id: %016llx creatorid: %08x\n",
 			    (unsigned long long)be64toh(s->id),
 			    ntohl(s->creatorid));
 		}
 		pf_detach_state(s);
 		return (EEXIST);
 	}
 	LIST_INSERT_HEAD(&ih->states, s, entry);
 	/* One for keys, one for ID hash. */
 	refcount_init(&s->refs, 2);
 
 	counter_u64_add(V_pf_status.fcounters[FCNT_STATE_INSERT], 1);
 	if (pfsync_insert_state_ptr != NULL)
 		pfsync_insert_state_ptr(s);
 
 	/* Returns locked. */
 	return (0);
 }
 
 /*
  * Find state by ID: returns with locked row on success.
  */
 struct pf_state *
 pf_find_state_byid(uint64_t id, uint32_t creatorid)
 {
 	struct pf_idhash *ih;
 	struct pf_state *s;
 
 	counter_u64_add(V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
 
 	ih = &V_pf_idhash[(be64toh(id) % (pf_hashmask + 1))];
 
 	PF_HASHROW_LOCK(ih);
 	LIST_FOREACH(s, &ih->states, entry)
 		if (s->id == id && s->creatorid == creatorid)
 			break;
 
 	if (s == NULL)
 		PF_HASHROW_UNLOCK(ih);
 
 	return (s);
 }
 
 /*
  * Find state by key.
  * Returns with ID hash slot locked on success.
  */
 static struct pf_state *
 pf_find_state(struct pfi_kif *kif, struct pf_state_key_cmp *key, u_int dir)
 {
 	struct pf_keyhash	*kh;
 	struct pf_state_key	*sk;
 	struct pf_state		*s;
 	int idx;
 
 	counter_u64_add(V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
 
 	kh = &V_pf_keyhash[pf_hashkey((struct pf_state_key *)key)];
 
 	PF_HASHROW_LOCK(kh);
 	LIST_FOREACH(sk, &kh->keys, entry)
 		if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0)
 			break;
 	if (sk == NULL) {
 		PF_HASHROW_UNLOCK(kh);
 		return (NULL);
 	}
 
 	idx = (dir == PF_IN ? PF_SK_WIRE : PF_SK_STACK);
 
 	/* List is sorted, if-bound states before floating ones. */
 	TAILQ_FOREACH(s, &sk->states[idx], key_list[idx])
 		if (s->kif == V_pfi_all || s->kif == kif) {
 			PF_STATE_LOCK(s);
 			PF_HASHROW_UNLOCK(kh);
 			if (s->timeout >= PFTM_MAX) {
 				/*
 				 * State is either being processed by
 				 * pf_unlink_state() in an other thread, or
 				 * is scheduled for immediate expiry.
 				 */
 				PF_STATE_UNLOCK(s);
 				return (NULL);
 			}
 			return (s);
 		}
 	PF_HASHROW_UNLOCK(kh);
 
 	return (NULL);
 }
 
 struct pf_state *
 pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more)
 {
 	struct pf_keyhash	*kh;
 	struct pf_state_key	*sk;
 	struct pf_state		*s, *ret = NULL;
 	int			 idx, inout = 0;
 
 	counter_u64_add(V_pf_status.fcounters[FCNT_STATE_SEARCH], 1);
 
 	kh = &V_pf_keyhash[pf_hashkey((struct pf_state_key *)key)];
 
 	PF_HASHROW_LOCK(kh);
 	LIST_FOREACH(sk, &kh->keys, entry)
 		if (bcmp(sk, key, sizeof(struct pf_state_key_cmp)) == 0)
 			break;
 	if (sk == NULL) {
 		PF_HASHROW_UNLOCK(kh);
 		return (NULL);
 	}
 	switch (dir) {
 	case PF_IN:
 		idx = PF_SK_WIRE;
 		break;
 	case PF_OUT:
 		idx = PF_SK_STACK;
 		break;
 	case PF_INOUT:
 		idx = PF_SK_WIRE;
 		inout = 1;
 		break;
 	default:
 		panic("%s: dir %u", __func__, dir);
 	}
 second_run:
 	TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) {
 		if (more == NULL) {
 			PF_HASHROW_UNLOCK(kh);
 			return (s);
 		}
 
 		if (ret)
 			(*more)++;
 		else
 			ret = s;
 	}
 	if (inout == 1) {
 		inout = 0;
 		idx = PF_SK_STACK;
 		goto second_run;
 	}
 	PF_HASHROW_UNLOCK(kh);
 
 	return (ret);
 }
 
 /* END state table stuff */
 
 static void
 pf_send(struct pf_send_entry *pfse)
 {
 
 	PF_SENDQ_LOCK();
 	STAILQ_INSERT_TAIL(&V_pf_sendqueue, pfse, pfse_next);
 	PF_SENDQ_UNLOCK();
 	swi_sched(V_pf_swi_cookie, 0);
 }
 
 void
 pf_intr(void *v)
 {
 	struct pf_send_head queue;
 	struct pf_send_entry *pfse, *next;
 
 	CURVNET_SET((struct vnet *)v);
 
 	PF_SENDQ_LOCK();
 	queue = V_pf_sendqueue;
 	STAILQ_INIT(&V_pf_sendqueue);
 	PF_SENDQ_UNLOCK();
 
 	STAILQ_FOREACH_SAFE(pfse, &queue, pfse_next, next) {
 		switch (pfse->pfse_type) {
 #ifdef INET
 		case PFSE_IP:
 			ip_output(pfse->pfse_m, NULL, NULL, 0, NULL, NULL);
 			break;
 		case PFSE_ICMP:
 			icmp_error(pfse->pfse_m, pfse->icmpopts.type,
 			    pfse->icmpopts.code, 0, pfse->icmpopts.mtu);
 			break;
 #endif /* INET */
 #ifdef INET6
 		case PFSE_IP6:
 			ip6_output(pfse->pfse_m, NULL, NULL, 0, NULL, NULL,
 			    NULL);
 			break;
 		case PFSE_ICMP6:
 			icmp6_error(pfse->pfse_m, pfse->icmpopts.type,
 			    pfse->icmpopts.code, pfse->icmpopts.mtu);
 			break;
 #endif /* INET6 */
 		default:
 			panic("%s: unknown type", __func__);
 		}
 		free(pfse, M_PFTEMP);
 	}
 	CURVNET_RESTORE();
 }
 
 void
 pf_purge_thread(void *v)
 {
 	u_int idx = 0;
 
 	CURVNET_SET((struct vnet *)v);
 
 	for (;;) {
 		PF_RULES_RLOCK();
 		rw_sleep(pf_purge_thread, &pf_rules_lock, 0, "pftm", hz / 10);
 
 		if (V_pf_end_threads) {
 			/*
 			 * To cleanse up all kifs and rules we need
 			 * two runs: first one clears reference flags,
 			 * then pf_purge_expired_states() doesn't
 			 * raise them, and then second run frees.
 			 */
 			PF_RULES_RUNLOCK();
 			pf_purge_unlinked_rules();
 			pfi_kif_purge();
 
 			/*
 			 * Now purge everything.
 			 */
 			pf_purge_expired_states(0, pf_hashmask);
 			pf_purge_expired_fragments();
 			pf_purge_expired_src_nodes();
 
 			/*
 			 * Now all kifs & rules should be unreferenced,
 			 * thus should be successfully freed.
 			 */
 			pf_purge_unlinked_rules();
 			pfi_kif_purge();
 
 			/*
 			 * Announce success and exit.
 			 */
 			PF_RULES_RLOCK();
 			V_pf_end_threads++;
 			PF_RULES_RUNLOCK();
 			wakeup(pf_purge_thread);
 			kproc_exit(0);
 		}
 		PF_RULES_RUNLOCK();
 
 		/* Process 1/interval fraction of the state table every run. */
 		idx = pf_purge_expired_states(idx, pf_hashmask /
 			    (V_pf_default_rule.timeout[PFTM_INTERVAL] * 10));
 
 		/* Purge other expired types every PFTM_INTERVAL seconds. */
 		if (idx == 0) {
 			/*
 			 * Order is important:
 			 * - states and src nodes reference rules
 			 * - states and rules reference kifs
 			 */
 			pf_purge_expired_fragments();
 			pf_purge_expired_src_nodes();
 			pf_purge_unlinked_rules();
 			pfi_kif_purge();
 		}
 	}
 	/* not reached */
 	CURVNET_RESTORE();
 }
 
 u_int32_t
 pf_state_expires(const struct pf_state *state)
 {
 	u_int32_t	timeout;
 	u_int32_t	start;
 	u_int32_t	end;
 	u_int32_t	states;
 
 	/* handle all PFTM_* > PFTM_MAX here */
 	if (state->timeout == PFTM_PURGE)
 		return (time_uptime);
 	KASSERT(state->timeout != PFTM_UNLINKED,
 	    ("pf_state_expires: timeout == PFTM_UNLINKED"));
 	KASSERT((state->timeout < PFTM_MAX),
 	    ("pf_state_expires: timeout > PFTM_MAX"));
 	timeout = state->rule.ptr->timeout[state->timeout];
 	if (!timeout)
 		timeout = V_pf_default_rule.timeout[state->timeout];
 	start = state->rule.ptr->timeout[PFTM_ADAPTIVE_START];
 	if (start) {
 		end = state->rule.ptr->timeout[PFTM_ADAPTIVE_END];
 		states = counter_u64_fetch(state->rule.ptr->states_cur);
 	} else {
 		start = V_pf_default_rule.timeout[PFTM_ADAPTIVE_START];
 		end = V_pf_default_rule.timeout[PFTM_ADAPTIVE_END];
 		states = V_pf_status.states;
 	}
 	if (end && states > start && start < end) {
 		if (states < end)
 			return (state->expire + timeout * (end - states) /
 			    (end - start));
 		else
 			return (time_uptime);
 	}
 	return (state->expire + timeout);
 }
 
 void
 pf_purge_expired_src_nodes()
 {
 	struct pf_src_node_list	 freelist;
 	struct pf_srchash	*sh;
 	struct pf_src_node	*cur, *next;
 	int i;
 
 	LIST_INIT(&freelist);
 	for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++) {
 	    PF_HASHROW_LOCK(sh);
 	    LIST_FOREACH_SAFE(cur, &sh->nodes, entry, next)
 		if (cur->states == 0 && cur->expire <= time_uptime) {
 			pf_unlink_src_node(cur);
 			LIST_INSERT_HEAD(&freelist, cur, entry);
 		} else if (cur->rule.ptr != NULL)
 			cur->rule.ptr->rule_flag |= PFRULE_REFS;
 	    PF_HASHROW_UNLOCK(sh);
 	}
 
 	pf_free_src_nodes(&freelist);
 
 	V_pf_status.src_nodes = uma_zone_get_cur(V_pf_sources_z);
 }
 
 static void
 pf_src_tree_remove_state(struct pf_state *s)
 {
 	struct pf_src_node *sn;
 	struct pf_srchash *sh;
 	uint32_t timeout;
 
 	timeout = s->rule.ptr->timeout[PFTM_SRC_NODE] ?
 	    s->rule.ptr->timeout[PFTM_SRC_NODE] :
 	    V_pf_default_rule.timeout[PFTM_SRC_NODE];
 
 	if (s->src_node != NULL) {
 		sn = s->src_node;
 		sh = &V_pf_srchash[pf_hashsrc(&sn->addr, sn->af)];
 	    	PF_HASHROW_LOCK(sh);
 		if (s->src.tcp_est)
 			--sn->conn;
 		if (--sn->states == 0)
 			sn->expire = time_uptime + timeout;
 	    	PF_HASHROW_UNLOCK(sh);
 	}
 	if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
 		sn = s->nat_src_node;
 		sh = &V_pf_srchash[pf_hashsrc(&sn->addr, sn->af)];
 	    	PF_HASHROW_LOCK(sh);
 		if (--sn->states == 0)
 			sn->expire = time_uptime + timeout;
 	    	PF_HASHROW_UNLOCK(sh);
 	}
 	s->src_node = s->nat_src_node = NULL;
 }
 
 /*
  * Unlink and potentilly free a state. Function may be
  * called with ID hash row locked, but always returns
  * unlocked, since it needs to go through key hash locking.
  */
 int
 pf_unlink_state(struct pf_state *s, u_int flags)
 {
 	struct pf_idhash *ih = &V_pf_idhash[PF_IDHASH(s)];
 
 	if ((flags & PF_ENTER_LOCKED) == 0)
 		PF_HASHROW_LOCK(ih);
 	else
 		PF_HASHROW_ASSERT(ih);
 
 	if (s->timeout == PFTM_UNLINKED) {
 		/*
 		 * State is being processed
 		 * by pf_unlink_state() in
 		 * an other thread.
 		 */
 		PF_HASHROW_UNLOCK(ih);
 		return (0);	/* XXXGL: undefined actually */
 	}
 
 	if (s->src.state == PF_TCPS_PROXY_DST) {
 		/* XXX wire key the right one? */
 		pf_send_tcp(NULL, s->rule.ptr, s->key[PF_SK_WIRE]->af,
 		    &s->key[PF_SK_WIRE]->addr[1],
 		    &s->key[PF_SK_WIRE]->addr[0],
 		    s->key[PF_SK_WIRE]->port[1],
 		    s->key[PF_SK_WIRE]->port[0],
 		    s->src.seqhi, s->src.seqlo + 1,
 		    TH_RST|TH_ACK, 0, 0, 0, 1, s->tag, NULL);
 	}
 
 	LIST_REMOVE(s, entry);
 	pf_src_tree_remove_state(s);
 
 	if (pfsync_delete_state_ptr != NULL)
 		pfsync_delete_state_ptr(s);
 
 	STATE_DEC_COUNTERS(s);
 
 	s->timeout = PFTM_UNLINKED;
 
 	PF_HASHROW_UNLOCK(ih);
 
 	pf_detach_state(s);
 	refcount_release(&s->refs);
 
 	return (pf_release_state(s));
 }
 
 void
 pf_free_state(struct pf_state *cur)
 {
 
 	KASSERT(cur->refs == 0, ("%s: %p has refs", __func__, cur));
 	KASSERT(cur->timeout == PFTM_UNLINKED, ("%s: timeout %u", __func__,
 	    cur->timeout));
 
 	pf_normalize_tcp_cleanup(cur);
 	uma_zfree(V_pf_state_z, cur);
 	counter_u64_add(V_pf_status.fcounters[FCNT_STATE_REMOVALS], 1);
 }
 
 /*
  * Called only from pf_purge_thread(), thus serialized.
  */
 static u_int
 pf_purge_expired_states(u_int i, int maxcheck)
 {
 	struct pf_idhash *ih;
 	struct pf_state *s;
 
 	V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
 
 	/*
 	 * Go through hash and unlink states that expire now.
 	 */
 	while (maxcheck > 0) {
 
 		ih = &V_pf_idhash[i];
 relock:
 		PF_HASHROW_LOCK(ih);
 		LIST_FOREACH(s, &ih->states, entry) {
 			if (pf_state_expires(s) <= time_uptime) {
 				V_pf_status.states -=
 				    pf_unlink_state(s, PF_ENTER_LOCKED);
 				goto relock;
 			}
 			s->rule.ptr->rule_flag |= PFRULE_REFS;
 			if (s->nat_rule.ptr != NULL)
 				s->nat_rule.ptr->rule_flag |= PFRULE_REFS;
 			if (s->anchor.ptr != NULL)
 				s->anchor.ptr->rule_flag |= PFRULE_REFS;
 			s->kif->pfik_flags |= PFI_IFLAG_REFS;
 			if (s->rt_kif)
 				s->rt_kif->pfik_flags |= PFI_IFLAG_REFS;
 		}
 		PF_HASHROW_UNLOCK(ih);
 
 		/* Return when we hit end of hash. */
 		if (++i > pf_hashmask) {
 			V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
 			return (0);
 		}
 
 		maxcheck--;
 	}
 
 	V_pf_status.states = uma_zone_get_cur(V_pf_state_z);
 
 	return (i);
 }
 
 static void
 pf_purge_unlinked_rules()
 {
 	struct pf_rulequeue tmpq;
 	struct pf_rule *r, *r1;
 
 	/*
 	 * If we have overloading task pending, then we'd
 	 * better skip purging this time. There is a tiny
 	 * probability that overloading task references
 	 * an already unlinked rule.
 	 */
 	PF_OVERLOADQ_LOCK();
 	if (!SLIST_EMPTY(&V_pf_overloadqueue)) {
 		PF_OVERLOADQ_UNLOCK();
 		return;
 	}
 	PF_OVERLOADQ_UNLOCK();
 
 	/*
 	 * Do naive mark-and-sweep garbage collecting of old rules.
 	 * Reference flag is raised by pf_purge_expired_states()
 	 * and pf_purge_expired_src_nodes().
 	 *
 	 * To avoid LOR between PF_UNLNKDRULES_LOCK/PF_RULES_WLOCK,
 	 * use a temporary queue.
 	 */
 	TAILQ_INIT(&tmpq);
 	PF_UNLNKDRULES_LOCK();
 	TAILQ_FOREACH_SAFE(r, &V_pf_unlinked_rules, entries, r1) {
 		if (!(r->rule_flag & PFRULE_REFS)) {
 			TAILQ_REMOVE(&V_pf_unlinked_rules, r, entries);
 			TAILQ_INSERT_TAIL(&tmpq, r, entries);
 		} else
 			r->rule_flag &= ~PFRULE_REFS;
 	}
 	PF_UNLNKDRULES_UNLOCK();
 
 	if (!TAILQ_EMPTY(&tmpq)) {
 		PF_RULES_WLOCK();
 		TAILQ_FOREACH_SAFE(r, &tmpq, entries, r1) {
 			TAILQ_REMOVE(&tmpq, r, entries);
 			pf_free_rule(r);
 		}
 		PF_RULES_WUNLOCK();
 	}
 }
 
 void
 pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
 {
 	switch (af) {
 #ifdef INET
 	case AF_INET: {
 		u_int32_t a = ntohl(addr->addr32[0]);
 		printf("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
 		    (a>>8)&255, a&255);
 		if (p) {
 			p = ntohs(p);
 			printf(":%u", p);
 		}
 		break;
 	}
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6: {
 		u_int16_t b;
 		u_int8_t i, curstart, curend, maxstart, maxend;
 		curstart = curend = maxstart = maxend = 255;
 		for (i = 0; i < 8; i++) {
 			if (!addr->addr16[i]) {
 				if (curstart == 255)
 					curstart = i;
 				curend = i;
 			} else {
 				if ((curend - curstart) >
 				    (maxend - maxstart)) {
 					maxstart = curstart;
 					maxend = curend;
 				}
 				curstart = curend = 255;
 			}
 		}
 		if ((curend - curstart) >
 		    (maxend - maxstart)) {
 			maxstart = curstart;
 			maxend = curend;
 		}
 		for (i = 0; i < 8; i++) {
 			if (i >= maxstart && i <= maxend) {
 				if (i == 0)
 					printf(":");
 				if (i == maxend)
 					printf(":");
 			} else {
 				b = ntohs(addr->addr16[i]);
 				printf("%x", b);
 				if (i < 7)
 					printf(":");
 			}
 		}
 		if (p) {
 			p = ntohs(p);
 			printf("[%u]", p);
 		}
 		break;
 	}
 #endif /* INET6 */
 	}
 }
 
 void
 pf_print_state(struct pf_state *s)
 {
 	pf_print_state_parts(s, NULL, NULL);
 }
 
 static void
 pf_print_state_parts(struct pf_state *s,
     struct pf_state_key *skwp, struct pf_state_key *sksp)
 {
 	struct pf_state_key *skw, *sks;
 	u_int8_t proto, dir;
 
 	/* Do our best to fill these, but they're skipped if NULL */
 	skw = skwp ? skwp : (s ? s->key[PF_SK_WIRE] : NULL);
 	sks = sksp ? sksp : (s ? s->key[PF_SK_STACK] : NULL);
 	proto = skw ? skw->proto : (sks ? sks->proto : 0);
 	dir = s ? s->direction : 0;
 
 	switch (proto) {
 	case IPPROTO_IPV4:
 		printf("IPv4");
 		break;
 	case IPPROTO_IPV6:
 		printf("IPv6");
 		break;
 	case IPPROTO_TCP:
 		printf("TCP");
 		break;
 	case IPPROTO_UDP:
 		printf("UDP");
 		break;
 	case IPPROTO_ICMP:
 		printf("ICMP");
 		break;
 	case IPPROTO_ICMPV6:
 		printf("ICMPv6");
 		break;
 	default:
 		printf("%u", skw->proto);
 		break;
 	}
 	switch (dir) {
 	case PF_IN:
 		printf(" in");
 		break;
 	case PF_OUT:
 		printf(" out");
 		break;
 	}
 	if (skw) {
 		printf(" wire: ");
 		pf_print_host(&skw->addr[0], skw->port[0], skw->af);
 		printf(" ");
 		pf_print_host(&skw->addr[1], skw->port[1], skw->af);
 	}
 	if (sks) {
 		printf(" stack: ");
 		if (sks != skw) {
 			pf_print_host(&sks->addr[0], sks->port[0], sks->af);
 			printf(" ");
 			pf_print_host(&sks->addr[1], sks->port[1], sks->af);
 		} else
 			printf("-");
 	}
 	if (s) {
 		if (proto == IPPROTO_TCP) {
 			printf(" [lo=%u high=%u win=%u modulator=%u",
 			    s->src.seqlo, s->src.seqhi,
 			    s->src.max_win, s->src.seqdiff);
 			if (s->src.wscale && s->dst.wscale)
 				printf(" wscale=%u",
 				    s->src.wscale & PF_WSCALE_MASK);
 			printf("]");
 			printf(" [lo=%u high=%u win=%u modulator=%u",
 			    s->dst.seqlo, s->dst.seqhi,
 			    s->dst.max_win, s->dst.seqdiff);
 			if (s->src.wscale && s->dst.wscale)
 				printf(" wscale=%u",
 				s->dst.wscale & PF_WSCALE_MASK);
 			printf("]");
 		}
 		printf(" %u:%u", s->src.state, s->dst.state);
 	}
 }
 
 void
 pf_print_flags(u_int8_t f)
 {
 	if (f)
 		printf(" ");
 	if (f & TH_FIN)
 		printf("F");
 	if (f & TH_SYN)
 		printf("S");
 	if (f & TH_RST)
 		printf("R");
 	if (f & TH_PUSH)
 		printf("P");
 	if (f & TH_ACK)
 		printf("A");
 	if (f & TH_URG)
 		printf("U");
 	if (f & TH_ECE)
 		printf("E");
 	if (f & TH_CWR)
 		printf("W");
 }
 
 #define	PF_SET_SKIP_STEPS(i)					\
 	do {							\
 		while (head[i] != cur) {			\
 			head[i]->skip[i].ptr = cur;		\
 			head[i] = TAILQ_NEXT(head[i], entries);	\
 		}						\
 	} while (0)
 
 void
 pf_calc_skip_steps(struct pf_rulequeue *rules)
 {
 	struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
 	int i;
 
 	cur = TAILQ_FIRST(rules);
 	prev = cur;
 	for (i = 0; i < PF_SKIP_COUNT; ++i)
 		head[i] = cur;
 	while (cur != NULL) {
 
 		if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
 			PF_SET_SKIP_STEPS(PF_SKIP_IFP);
 		if (cur->direction != prev->direction)
 			PF_SET_SKIP_STEPS(PF_SKIP_DIR);
 		if (cur->af != prev->af)
 			PF_SET_SKIP_STEPS(PF_SKIP_AF);
 		if (cur->proto != prev->proto)
 			PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
 		if (cur->src.neg != prev->src.neg ||
 		    pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
 			PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
 		if (cur->src.port[0] != prev->src.port[0] ||
 		    cur->src.port[1] != prev->src.port[1] ||
 		    cur->src.port_op != prev->src.port_op)
 			PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
 		if (cur->dst.neg != prev->dst.neg ||
 		    pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
 			PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
 		if (cur->dst.port[0] != prev->dst.port[0] ||
 		    cur->dst.port[1] != prev->dst.port[1] ||
 		    cur->dst.port_op != prev->dst.port_op)
 			PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
 
 		prev = cur;
 		cur = TAILQ_NEXT(cur, entries);
 	}
 	for (i = 0; i < PF_SKIP_COUNT; ++i)
 		PF_SET_SKIP_STEPS(i);
 }
 
 static int
 pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
 {
 	if (aw1->type != aw2->type)
 		return (1);
 	switch (aw1->type) {
 	case PF_ADDR_ADDRMASK:
 	case PF_ADDR_RANGE:
 		if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6))
 			return (1);
 		if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6))
 			return (1);
 		return (0);
 	case PF_ADDR_DYNIFTL:
 		return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
 	case PF_ADDR_NOROUTE:
 	case PF_ADDR_URPFFAILED:
 		return (0);
 	case PF_ADDR_TABLE:
 		return (aw1->p.tbl != aw2->p.tbl);
 	default:
 		printf("invalid address type: %d\n", aw1->type);
 		return (1);
 	}
 }
 
 /**
  * Checksum updates are a little complicated because the checksum in the TCP/UDP
  * header isn't always a full checksum. In some cases (i.e. output) it's a
  * pseudo-header checksum, which is a partial checksum over src/dst IP
  * addresses, protocol number and length.
  *
  * That means we have the following cases:
  *  * Input or forwarding: we don't have TSO, the checksum fields are full
  *  	checksums, we need to update the checksum whenever we change anything.
  *  * Output (i.e. the checksum is a pseudo-header checksum):
  *  	x The field being updated is src/dst address or affects the length of
  *  	the packet. We need to update the pseudo-header checksum (note that this
  *  	checksum is not ones' complement).
  *  	x Some other field is being modified (e.g. src/dst port numbers): We
  *  	don't have to update anything.
  **/
 u_int16_t
 pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
 {
 	u_int32_t	l;
 
 	if (udp && !cksum)
 		return (0x0000);
 	l = cksum + old - new;
 	l = (l >> 16) + (l & 65535);
 	l = l & 65535;
 	if (udp && !l)
 		return (0xFFFF);
 	return (l);
 }
 
 u_int16_t
 pf_proto_cksum_fixup(struct mbuf *m, u_int16_t cksum, u_int16_t old,
         u_int16_t new, u_int8_t udp)
 {
 	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
 		return (cksum);
 
 	return (pf_cksum_fixup(cksum, old, new, udp));
 }
 
 static void
 pf_change_ap(struct mbuf *m, struct pf_addr *a, u_int16_t *p, u_int16_t *ic,
         u_int16_t *pc, struct pf_addr *an, u_int16_t pn, u_int8_t u,
         sa_family_t af)
 {
 	struct pf_addr	ao;
 	u_int16_t	po = *p;
 
 	PF_ACPY(&ao, a, af);
 	PF_ACPY(a, an, af);
 
 	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
 		*pc = ~*pc;
 
 	*p = pn;
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
 		    ao.addr16[0], an->addr16[0], 0),
 		    ao.addr16[1], an->addr16[1], 0);
 		*p = pn;
 
 		*pc = pf_cksum_fixup(pf_cksum_fixup(*pc,
 		    ao.addr16[0], an->addr16[0], u),
 		    ao.addr16[1], an->addr16[1], u);
 
 		*pc = pf_proto_cksum_fixup(m, *pc, po, pn, u);
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 		    pf_cksum_fixup(pf_cksum_fixup(*pc,
 		    ao.addr16[0], an->addr16[0], u),
 		    ao.addr16[1], an->addr16[1], u),
 		    ao.addr16[2], an->addr16[2], u),
 		    ao.addr16[3], an->addr16[3], u),
 		    ao.addr16[4], an->addr16[4], u),
 		    ao.addr16[5], an->addr16[5], u),
 		    ao.addr16[6], an->addr16[6], u),
 		    ao.addr16[7], an->addr16[7], u);
 
 		*pc = pf_proto_cksum_fixup(m, *pc, po, pn, u);
 		break;
 #endif /* INET6 */
 	}
 
 	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | 
 	    CSUM_DELAY_DATA_IPV6)) {
 		*pc = ~*pc;
 		if (! *pc)
 			*pc = 0xffff;
 	}
 }
 
 /* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
 void
 pf_change_a(void *a, u_int16_t *c, u_int32_t an, u_int8_t u)
 {
 	u_int32_t	ao;
 
 	memcpy(&ao, a, sizeof(ao));
 	memcpy(a, &an, sizeof(u_int32_t));
 	*c = pf_cksum_fixup(pf_cksum_fixup(*c, ao / 65536, an / 65536, u),
 	    ao % 65536, an % 65536, u);
 }
 
 void
 pf_change_proto_a(struct mbuf *m, void *a, u_int16_t *c, u_int32_t an, u_int8_t udp)
 {
 	u_int32_t	ao;
 
 	memcpy(&ao, a, sizeof(ao));
 	memcpy(a, &an, sizeof(u_int32_t));
 
 	*c = pf_proto_cksum_fixup(m,
 	    pf_proto_cksum_fixup(m, *c, ao / 65536, an / 65536, udp),
 	    ao % 65536, an % 65536, udp);
 }
 
 #ifdef INET6
 static void
 pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
 {
 	struct pf_addr	ao;
 
 	PF_ACPY(&ao, a, AF_INET6);
 	PF_ACPY(a, an, AF_INET6);
 
 	*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 	    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 	    pf_cksum_fixup(pf_cksum_fixup(*c,
 	    ao.addr16[0], an->addr16[0], u),
 	    ao.addr16[1], an->addr16[1], u),
 	    ao.addr16[2], an->addr16[2], u),
 	    ao.addr16[3], an->addr16[3], u),
 	    ao.addr16[4], an->addr16[4], u),
 	    ao.addr16[5], an->addr16[5], u),
 	    ao.addr16[6], an->addr16[6], u),
 	    ao.addr16[7], an->addr16[7], u);
 }
 #endif /* INET6 */
 
 static void
 pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
     struct pf_addr *na, u_int16_t np, u_int16_t *pc, u_int16_t *h2c,
     u_int16_t *ic, u_int16_t *hc, u_int8_t u, sa_family_t af)
 {
 	struct pf_addr	oia, ooa;
 
 	PF_ACPY(&oia, ia, af);
 	if (oa)
 		PF_ACPY(&ooa, oa, af);
 
 	/* Change inner protocol port, fix inner protocol checksum. */
 	if (ip != NULL) {
 		u_int16_t	oip = *ip;
 		u_int32_t	opc;
 
 		if (pc != NULL)
 			opc = *pc;
 		*ip = np;
 		if (pc != NULL)
 			*pc = pf_cksum_fixup(*pc, oip, *ip, u);
 		*ic = pf_cksum_fixup(*ic, oip, *ip, 0);
 		if (pc != NULL)
 			*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
 	}
 	/* Change inner ip address, fix inner ip and icmp checksums. */
 	PF_ACPY(ia, na, af);
 	switch (af) {
 #ifdef INET
 	case AF_INET: {
 		u_int32_t	 oh2c = *h2c;
 
 		*h2c = pf_cksum_fixup(pf_cksum_fixup(*h2c,
 		    oia.addr16[0], ia->addr16[0], 0),
 		    oia.addr16[1], ia->addr16[1], 0);
 		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
 		    oia.addr16[0], ia->addr16[0], 0),
 		    oia.addr16[1], ia->addr16[1], 0);
 		*ic = pf_cksum_fixup(*ic, oh2c, *h2c, 0);
 		break;
 	}
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 		    pf_cksum_fixup(pf_cksum_fixup(*ic,
 		    oia.addr16[0], ia->addr16[0], u),
 		    oia.addr16[1], ia->addr16[1], u),
 		    oia.addr16[2], ia->addr16[2], u),
 		    oia.addr16[3], ia->addr16[3], u),
 		    oia.addr16[4], ia->addr16[4], u),
 		    oia.addr16[5], ia->addr16[5], u),
 		    oia.addr16[6], ia->addr16[6], u),
 		    oia.addr16[7], ia->addr16[7], u);
 		break;
 #endif /* INET6 */
 	}
 	/* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */
 	if (oa) {
 		PF_ACPY(oa, na, af);
 		switch (af) {
 #ifdef INET
 		case AF_INET:
 			*hc = pf_cksum_fixup(pf_cksum_fixup(*hc,
 			    ooa.addr16[0], oa->addr16[0], 0),
 			    ooa.addr16[1], oa->addr16[1], 0);
 			break;
 #endif /* INET */
 #ifdef INET6
 		case AF_INET6:
 			*ic = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 			    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 			    pf_cksum_fixup(pf_cksum_fixup(*ic,
 			    ooa.addr16[0], oa->addr16[0], u),
 			    ooa.addr16[1], oa->addr16[1], u),
 			    ooa.addr16[2], oa->addr16[2], u),
 			    ooa.addr16[3], oa->addr16[3], u),
 			    ooa.addr16[4], oa->addr16[4], u),
 			    ooa.addr16[5], oa->addr16[5], u),
 			    ooa.addr16[6], oa->addr16[6], u),
 			    ooa.addr16[7], oa->addr16[7], u);
 			break;
 #endif /* INET6 */
 		}
 	}
 }
 
 
 /*
  * Need to modulate the sequence numbers in the TCP SACK option
  * (credits to Krzysztof Pfaff for report and patch)
  */
 static int
 pf_modulate_sack(struct mbuf *m, int off, struct pf_pdesc *pd,
     struct tcphdr *th, struct pf_state_peer *dst)
 {
 	int hlen = (th->th_off << 2) - sizeof(*th), thoptlen = hlen;
 	u_int8_t opts[TCP_MAXOLEN], *opt = opts;
 	int copyback = 0, i, olen;
 	struct sackblk sack;
 
 #define	TCPOLEN_SACKLEN	(TCPOLEN_SACK + 2)
 	if (hlen < TCPOLEN_SACKLEN ||
 	    !pf_pull_hdr(m, off + sizeof(*th), opts, hlen, NULL, NULL, pd->af))
 		return 0;
 
 	while (hlen >= TCPOLEN_SACKLEN) {
 		olen = opt[1];
 		switch (*opt) {
 		case TCPOPT_EOL:	/* FALLTHROUGH */
 		case TCPOPT_NOP:
 			opt++;
 			hlen--;
 			break;
 		case TCPOPT_SACK:
 			if (olen > hlen)
 				olen = hlen;
 			if (olen >= TCPOLEN_SACKLEN) {
 				for (i = 2; i + TCPOLEN_SACK <= olen;
 				    i += TCPOLEN_SACK) {
 					memcpy(&sack, &opt[i], sizeof(sack));
 					pf_change_proto_a(m, &sack.start, &th->th_sum,
 					    htonl(ntohl(sack.start) - dst->seqdiff), 0);
 					pf_change_proto_a(m, &sack.end, &th->th_sum,
 					    htonl(ntohl(sack.end) - dst->seqdiff), 0);
 					memcpy(&opt[i], &sack, sizeof(sack));
 				}
 				copyback = 1;
 			}
 			/* FALLTHROUGH */
 		default:
 			if (olen < 2)
 				olen = 2;
 			hlen -= olen;
 			opt += olen;
 		}
 	}
 
 	if (copyback)
 		m_copyback(m, off + sizeof(*th), thoptlen, (caddr_t)opts);
 	return (copyback);
 }
 
 static void
 pf_send_tcp(struct mbuf *replyto, const struct pf_rule *r, sa_family_t af,
     const struct pf_addr *saddr, const struct pf_addr *daddr,
     u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
     u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
     u_int16_t rtag, struct ifnet *ifp)
 {
 	struct pf_send_entry *pfse;
 	struct mbuf	*m;
 	int		 len, tlen;
 #ifdef INET
 	struct ip	*h = NULL;
 #endif /* INET */
 #ifdef INET6
 	struct ip6_hdr	*h6 = NULL;
 #endif /* INET6 */
 	struct tcphdr	*th;
 	char		*opt;
 	struct pf_mtag  *pf_mtag;
 
 	len = 0;
 	th = NULL;
 
 	/* maximum segment size tcp option */
 	tlen = sizeof(struct tcphdr);
 	if (mss)
 		tlen += 4;
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		len = sizeof(struct ip) + tlen;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		len = sizeof(struct ip6_hdr) + tlen;
 		break;
 #endif /* INET6 */
 	default:
 		panic("%s: unsupported af %d", __func__, af);
 	}
 
 	/* Allocate outgoing queue entry, mbuf and mbuf tag. */
 	pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
 	if (pfse == NULL)
 		return;
 	m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL) {
 		free(pfse, M_PFTEMP);
 		return;
 	}
 #ifdef MAC
 	mac_netinet_firewall_send(m);
 #endif
 	if ((pf_mtag = pf_get_mtag(m)) == NULL) {
 		free(pfse, M_PFTEMP);
 		m_freem(m);
 		return;
 	}
 	if (tag)
 		m->m_flags |= M_SKIP_FIREWALL;
 	pf_mtag->tag = rtag;
 
 	if (r != NULL && r->rtableid >= 0)
 		M_SETFIB(m, r->rtableid);
 
 #ifdef ALTQ
 	if (r != NULL && r->qid) {
 		pf_mtag->qid = r->qid;
 
 		/* add hints for ecn */
 		pf_mtag->hdr = mtod(m, struct ip *);
 	}
 #endif /* ALTQ */
 	m->m_data += max_linkhdr;
 	m->m_pkthdr.len = m->m_len = len;
 	m->m_pkthdr.rcvif = NULL;
 	bzero(m->m_data, len);
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		h = mtod(m, struct ip *);
 
 		/* IP header fields included in the TCP checksum */
 		h->ip_p = IPPROTO_TCP;
 		h->ip_len = htons(tlen);
 		h->ip_src.s_addr = saddr->v4.s_addr;
 		h->ip_dst.s_addr = daddr->v4.s_addr;
 
 		th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		h6 = mtod(m, struct ip6_hdr *);
 
 		/* IP header fields included in the TCP checksum */
 		h6->ip6_nxt = IPPROTO_TCP;
 		h6->ip6_plen = htons(tlen);
 		memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
 		memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
 
 		th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
 		break;
 #endif /* INET6 */
 	}
 
 	/* TCP header */
 	th->th_sport = sport;
 	th->th_dport = dport;
 	th->th_seq = htonl(seq);
 	th->th_ack = htonl(ack);
 	th->th_off = tlen >> 2;
 	th->th_flags = flags;
 	th->th_win = htons(win);
 
 	if (mss) {
 		opt = (char *)(th + 1);
 		opt[0] = TCPOPT_MAXSEG;
 		opt[1] = 4;
 		HTONS(mss);
 		bcopy((caddr_t)&mss, (caddr_t)(opt + 2), 2);
 	}
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		/* TCP checksum */
 		th->th_sum = in_cksum(m, len);
 
 		/* Finish the IP header */
 		h->ip_v = 4;
 		h->ip_hl = sizeof(*h) >> 2;
 		h->ip_tos = IPTOS_LOWDELAY;
 		h->ip_off = htons(V_path_mtu_discovery ? IP_DF : 0);
 		h->ip_len = htons(len);
 		h->ip_ttl = ttl ? ttl : V_ip_defttl;
 		h->ip_sum = 0;
 
 		pfse->pfse_type = PFSE_IP;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		/* TCP checksum */
 		th->th_sum = in6_cksum(m, IPPROTO_TCP,
 		    sizeof(struct ip6_hdr), tlen);
 
 		h6->ip6_vfc |= IPV6_VERSION;
 		h6->ip6_hlim = IPV6_DEFHLIM;
 
 		pfse->pfse_type = PFSE_IP6;
 		break;
 #endif /* INET6 */
 	}
 	pfse->pfse_m = m;
 	pf_send(pfse);
 }
 
 static void
 pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, sa_family_t af,
     struct pf_rule *r)
 {
 	struct pf_send_entry *pfse;
 	struct mbuf *m0;
 	struct pf_mtag *pf_mtag;
 
 	/* Allocate outgoing queue entry, mbuf and mbuf tag. */
 	pfse = malloc(sizeof(*pfse), M_PFTEMP, M_NOWAIT);
 	if (pfse == NULL)
 		return;
 
 	if ((m0 = m_copypacket(m, M_NOWAIT)) == NULL) {
 		free(pfse, M_PFTEMP);
 		return;
 	}
 
 	if ((pf_mtag = pf_get_mtag(m0)) == NULL) {
 		free(pfse, M_PFTEMP);
 		return;
 	}
 	/* XXX: revisit */
 	m0->m_flags |= M_SKIP_FIREWALL;
 
 	if (r->rtableid >= 0)
 		M_SETFIB(m0, r->rtableid);
 
 #ifdef ALTQ
 	if (r->qid) {
 		pf_mtag->qid = r->qid;
 		/* add hints for ecn */
 		pf_mtag->hdr = mtod(m0, struct ip *);
 	}
 #endif /* ALTQ */
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		pfse->pfse_type = PFSE_ICMP;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		pfse->pfse_type = PFSE_ICMP6;
 		break;
 #endif /* INET6 */
 	}
 	pfse->pfse_m = m0;
 	pfse->icmpopts.type = type;
 	pfse->icmpopts.code = code;
 	pf_send(pfse);
 }
 
 /*
  * Return 1 if the addresses a and b match (with mask m), otherwise return 0.
  * If n is 0, they match if they are equal. If n is != 0, they match if they
  * are different.
  */
 int
 pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
     struct pf_addr *b, sa_family_t af)
 {
 	int	match = 0;
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		if ((a->addr32[0] & m->addr32[0]) ==
 		    (b->addr32[0] & m->addr32[0]))
 			match++;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		if (((a->addr32[0] & m->addr32[0]) ==
 		     (b->addr32[0] & m->addr32[0])) &&
 		    ((a->addr32[1] & m->addr32[1]) ==
 		     (b->addr32[1] & m->addr32[1])) &&
 		    ((a->addr32[2] & m->addr32[2]) ==
 		     (b->addr32[2] & m->addr32[2])) &&
 		    ((a->addr32[3] & m->addr32[3]) ==
 		     (b->addr32[3] & m->addr32[3])))
 			match++;
 		break;
 #endif /* INET6 */
 	}
 	if (match) {
 		if (n)
 			return (0);
 		else
 			return (1);
 	} else {
 		if (n)
 			return (1);
 		else
 			return (0);
 	}
 }
 
 /*
  * Return 1 if b <= a <= e, otherwise return 0.
  */
 int
 pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
     struct pf_addr *a, sa_family_t af)
 {
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		if ((a->addr32[0] < b->addr32[0]) ||
 		    (a->addr32[0] > e->addr32[0]))
 			return (0);
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6: {
 		int	i;
 
 		/* check a >= b */
 		for (i = 0; i < 4; ++i)
 			if (a->addr32[i] > b->addr32[i])
 				break;
 			else if (a->addr32[i] < b->addr32[i])
 				return (0);
 		/* check a <= e */
 		for (i = 0; i < 4; ++i)
 			if (a->addr32[i] < e->addr32[i])
 				break;
 			else if (a->addr32[i] > e->addr32[i])
 				return (0);
 		break;
 	}
 #endif /* INET6 */
 	}
 	return (1);
 }
 
 static int
 pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
 {
 	switch (op) {
 	case PF_OP_IRG:
 		return ((p > a1) && (p < a2));
 	case PF_OP_XRG:
 		return ((p < a1) || (p > a2));
 	case PF_OP_RRG:
 		return ((p >= a1) && (p <= a2));
 	case PF_OP_EQ:
 		return (p == a1);
 	case PF_OP_NE:
 		return (p != a1);
 	case PF_OP_LT:
 		return (p < a1);
 	case PF_OP_LE:
 		return (p <= a1);
 	case PF_OP_GT:
 		return (p > a1);
 	case PF_OP_GE:
 		return (p >= a1);
 	}
 	return (0); /* never reached */
 }
 
 int
 pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
 {
 	NTOHS(a1);
 	NTOHS(a2);
 	NTOHS(p);
 	return (pf_match(op, a1, a2, p));
 }
 
 static int
 pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
 {
 	if (u == UID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
 		return (0);
 	return (pf_match(op, a1, a2, u));
 }
 
 static int
 pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
 {
 	if (g == GID_MAX && op != PF_OP_EQ && op != PF_OP_NE)
 		return (0);
 	return (pf_match(op, a1, a2, g));
 }
 
 int
 pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag, int mtag)
 {
 	if (*tag == -1)
 		*tag = mtag;
 
 	return ((!r->match_tag_not && r->match_tag == *tag) ||
 	    (r->match_tag_not && r->match_tag != *tag));
 }
 
 int
 pf_tag_packet(struct mbuf *m, struct pf_pdesc *pd, int tag)
 {
 
 	KASSERT(tag > 0, ("%s: tag %d", __func__, tag));
 
 	if (pd->pf_mtag == NULL && ((pd->pf_mtag = pf_get_mtag(m)) == NULL))
 		return (ENOMEM);
 
 	pd->pf_mtag->tag = tag;
 
 	return (0);
 }
 
 #define	PF_ANCHOR_STACKSIZE	32
 struct pf_anchor_stackframe {
 	struct pf_ruleset	*rs;
 	struct pf_rule		*r;	/* XXX: + match bit */
 	struct pf_anchor	*child;
 };
 
 /*
  * XXX: We rely on malloc(9) returning pointer aligned addresses.
  */
 #define	PF_ANCHORSTACK_MATCH	0x00000001
 #define	PF_ANCHORSTACK_MASK	(PF_ANCHORSTACK_MATCH)
 
 #define	PF_ANCHOR_MATCH(f)	((uintptr_t)(f)->r & PF_ANCHORSTACK_MATCH)
 #define	PF_ANCHOR_RULE(f)	(struct pf_rule *)			\
 				((uintptr_t)(f)->r & ~PF_ANCHORSTACK_MASK)
 #define	PF_ANCHOR_SET_MATCH(f)	do { (f)->r = (void *) 			\
 				((uintptr_t)(f)->r | PF_ANCHORSTACK_MATCH);  \
 } while (0)
 
 void
 pf_step_into_anchor(struct pf_anchor_stackframe *stack, int *depth,
     struct pf_ruleset **rs, int n, struct pf_rule **r, struct pf_rule **a,
     int *match)
 {
 	struct pf_anchor_stackframe	*f;
 
 	PF_RULES_RASSERT();
 
 	if (match)
 		*match = 0;
 	if (*depth >= PF_ANCHOR_STACKSIZE) {
 		printf("%s: anchor stack overflow on %s\n",
 		    __func__, (*r)->anchor->name);
 		*r = TAILQ_NEXT(*r, entries);
 		return;
 	} else if (*depth == 0 && a != NULL)
 		*a = *r;
 	f = stack + (*depth)++;
 	f->rs = *rs;
 	f->r = *r;
 	if ((*r)->anchor_wildcard) {
 		struct pf_anchor_node *parent = &(*r)->anchor->children;
 
 		if ((f->child = RB_MIN(pf_anchor_node, parent)) == NULL) {
 			*r = NULL;
 			return;
 		}
 		*rs = &f->child->ruleset;
 	} else {
 		f->child = NULL;
 		*rs = &(*r)->anchor->ruleset;
 	}
 	*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
 }
 
 int
 pf_step_out_of_anchor(struct pf_anchor_stackframe *stack, int *depth,
     struct pf_ruleset **rs, int n, struct pf_rule **r, struct pf_rule **a,
     int *match)
 {
 	struct pf_anchor_stackframe	*f;
 	struct pf_rule *fr;
 	int quick = 0;
 
 	PF_RULES_RASSERT();
 
 	do {
 		if (*depth <= 0)
 			break;
 		f = stack + *depth - 1;
 		fr = PF_ANCHOR_RULE(f);
 		if (f->child != NULL) {
 			struct pf_anchor_node *parent;
 
 			/*
 			 * This block traverses through
 			 * a wildcard anchor.
 			 */
 			parent = &fr->anchor->children;
 			if (match != NULL && *match) {
 				/*
 				 * If any of "*" matched, then
 				 * "foo/ *" matched, mark frame
 				 * appropriately.
 				 */
 				PF_ANCHOR_SET_MATCH(f);
 				*match = 0;
 			}
 			f->child = RB_NEXT(pf_anchor_node, parent, f->child);
 			if (f->child != NULL) {
 				*rs = &f->child->ruleset;
 				*r = TAILQ_FIRST((*rs)->rules[n].active.ptr);
 				if (*r == NULL)
 					continue;
 				else
 					break;
 			}
 		}
 		(*depth)--;
 		if (*depth == 0 && a != NULL)
 			*a = NULL;
 		*rs = f->rs;
 		if (PF_ANCHOR_MATCH(f) || (match != NULL && *match))
 			quick = fr->quick;
 		*r = TAILQ_NEXT(fr, entries);
 	} while (*r == NULL);
 
 	return (quick);
 }
 
 #ifdef INET6
 void
 pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
     struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
 {
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
 		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
 		break;
 #endif /* INET */
 	case AF_INET6:
 		naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
 		((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
 		naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
 		((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
 		naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
 		((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
 		naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
 		((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
 		break;
 	}
 }
 
 void
 pf_addr_inc(struct pf_addr *addr, sa_family_t af)
 {
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
 		break;
 #endif /* INET */
 	case AF_INET6:
 		if (addr->addr32[3] == 0xffffffff) {
 			addr->addr32[3] = 0;
 			if (addr->addr32[2] == 0xffffffff) {
 				addr->addr32[2] = 0;
 				if (addr->addr32[1] == 0xffffffff) {
 					addr->addr32[1] = 0;
 					addr->addr32[0] =
 					    htonl(ntohl(addr->addr32[0]) + 1);
 				} else
 					addr->addr32[1] =
 					    htonl(ntohl(addr->addr32[1]) + 1);
 			} else
 				addr->addr32[2] =
 				    htonl(ntohl(addr->addr32[2]) + 1);
 		} else
 			addr->addr32[3] =
 			    htonl(ntohl(addr->addr32[3]) + 1);
 		break;
 	}
 }
 #endif /* INET6 */
 
 int
 pf_socket_lookup(int direction, struct pf_pdesc *pd, struct mbuf *m)
 {
 	struct pf_addr		*saddr, *daddr;
 	u_int16_t		 sport, dport;
 	struct inpcbinfo	*pi;
 	struct inpcb		*inp;
 
 	pd->lookup.uid = UID_MAX;
 	pd->lookup.gid = GID_MAX;
 
 	switch (pd->proto) {
 	case IPPROTO_TCP:
 		if (pd->hdr.tcp == NULL)
 			return (-1);
 		sport = pd->hdr.tcp->th_sport;
 		dport = pd->hdr.tcp->th_dport;
 		pi = &V_tcbinfo;
 		break;
 	case IPPROTO_UDP:
 		if (pd->hdr.udp == NULL)
 			return (-1);
 		sport = pd->hdr.udp->uh_sport;
 		dport = pd->hdr.udp->uh_dport;
 		pi = &V_udbinfo;
 		break;
 	default:
 		return (-1);
 	}
 	if (direction == PF_IN) {
 		saddr = pd->src;
 		daddr = pd->dst;
 	} else {
 		u_int16_t	p;
 
 		p = sport;
 		sport = dport;
 		dport = p;
 		saddr = pd->dst;
 		daddr = pd->src;
 	}
 	switch (pd->af) {
 #ifdef INET
 	case AF_INET:
 		inp = in_pcblookup_mbuf(pi, saddr->v4, sport, daddr->v4,
 		    dport, INPLOOKUP_RLOCKPCB, NULL, m);
 		if (inp == NULL) {
 			inp = in_pcblookup_mbuf(pi, saddr->v4, sport,
 			   daddr->v4, dport, INPLOOKUP_WILDCARD |
 			   INPLOOKUP_RLOCKPCB, NULL, m);
 			if (inp == NULL)
 				return (-1);
 		}
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport, &daddr->v6,
 		    dport, INPLOOKUP_RLOCKPCB, NULL, m);
 		if (inp == NULL) {
 			inp = in6_pcblookup_mbuf(pi, &saddr->v6, sport,
 			    &daddr->v6, dport, INPLOOKUP_WILDCARD |
 			    INPLOOKUP_RLOCKPCB, NULL, m);
 			if (inp == NULL)
 				return (-1);
 		}
 		break;
 #endif /* INET6 */
 
 	default:
 		return (-1);
 	}
 	INP_RLOCK_ASSERT(inp);
 	pd->lookup.uid = inp->inp_cred->cr_uid;
 	pd->lookup.gid = inp->inp_cred->cr_groups[0];
 	INP_RUNLOCK(inp);
 
 	return (1);
 }
 
 static u_int8_t
 pf_get_wscale(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
 {
 	int		 hlen;
 	u_int8_t	 hdr[60];
 	u_int8_t	*opt, optlen;
 	u_int8_t	 wscale = 0;
 
 	hlen = th_off << 2;		/* hlen <= sizeof(hdr) */
 	if (hlen <= sizeof(struct tcphdr))
 		return (0);
 	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
 		return (0);
 	opt = hdr + sizeof(struct tcphdr);
 	hlen -= sizeof(struct tcphdr);
 	while (hlen >= 3) {
 		switch (*opt) {
 		case TCPOPT_EOL:
 		case TCPOPT_NOP:
 			++opt;
 			--hlen;
 			break;
 		case TCPOPT_WINDOW:
 			wscale = opt[2];
 			if (wscale > TCP_MAX_WINSHIFT)
 				wscale = TCP_MAX_WINSHIFT;
 			wscale |= PF_WSCALE_FLAG;
 			/* FALLTHROUGH */
 		default:
 			optlen = opt[1];
 			if (optlen < 2)
 				optlen = 2;
 			hlen -= optlen;
 			opt += optlen;
 			break;
 		}
 	}
 	return (wscale);
 }
 
 static u_int16_t
 pf_get_mss(struct mbuf *m, int off, u_int16_t th_off, sa_family_t af)
 {
 	int		 hlen;
 	u_int8_t	 hdr[60];
 	u_int8_t	*opt, optlen;
 	u_int16_t	 mss = V_tcp_mssdflt;
 
 	hlen = th_off << 2;	/* hlen <= sizeof(hdr) */
 	if (hlen <= sizeof(struct tcphdr))
 		return (0);
 	if (!pf_pull_hdr(m, off, hdr, hlen, NULL, NULL, af))
 		return (0);
 	opt = hdr + sizeof(struct tcphdr);
 	hlen -= sizeof(struct tcphdr);
 	while (hlen >= TCPOLEN_MAXSEG) {
 		switch (*opt) {
 		case TCPOPT_EOL:
 		case TCPOPT_NOP:
 			++opt;
 			--hlen;
 			break;
 		case TCPOPT_MAXSEG:
 			bcopy((caddr_t)(opt + 2), (caddr_t)&mss, 2);
 			NTOHS(mss);
 			/* FALLTHROUGH */
 		default:
 			optlen = opt[1];
 			if (optlen < 2)
 				optlen = 2;
 			hlen -= optlen;
 			opt += optlen;
 			break;
 		}
 	}
 	return (mss);
 }
 
 static u_int16_t
 pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, u_int16_t offer)
 {
 #ifdef INET
-	struct sockaddr_in	*dst;
-	struct route		 ro;
+	struct nhop4_basic	nh4;
 #endif /* INET */
 #ifdef INET6
-	struct sockaddr_in6	*dst6;
-	struct route_in6	 ro6;
+	struct nhop6_basic	nh6;
+	struct in6_addr		dst6;
+	uint32_t		scopeid;
 #endif /* INET6 */
-	struct rtentry		*rt = NULL;
 	int			 hlen = 0;
-	u_int16_t		 mss = V_tcp_mssdflt;
+	uint16_t		 mss = 0;
 
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		hlen = sizeof(struct ip);
-		bzero(&ro, sizeof(ro));
-		dst = (struct sockaddr_in *)&ro.ro_dst;
-		dst->sin_family = AF_INET;
-		dst->sin_len = sizeof(*dst);
-		dst->sin_addr = addr->v4;
-		in_rtalloc_ign(&ro, 0, rtableid);
-		rt = ro.ro_rt;
+		if (fib4_lookup_nh_basic(rtableid, addr->v4, 0, 0, &nh4) == 0)
+			mss = nh4.nh_mtu - hlen - sizeof(struct tcphdr);
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		hlen = sizeof(struct ip6_hdr);
-		bzero(&ro6, sizeof(ro6));
-		dst6 = (struct sockaddr_in6 *)&ro6.ro_dst;
-		dst6->sin6_family = AF_INET6;
-		dst6->sin6_len = sizeof(*dst6);
-		dst6->sin6_addr = addr->v6;
-		in6_rtalloc_ign(&ro6, 0, rtableid);
-		rt = ro6.ro_rt;
+		in6_splitscope(&addr->v6, &dst6, &scopeid);
+		if (fib6_lookup_nh_basic(rtableid, &dst6, scopeid, 0,0,&nh6)==0)
+			mss = nh6.nh_mtu - hlen - sizeof(struct tcphdr);
 		break;
 #endif /* INET6 */
 	}
 
-	if (rt && rt->rt_ifp) {
-		mss = rt->rt_ifp->if_mtu - hlen - sizeof(struct tcphdr);
-		mss = max(V_tcp_mssdflt, mss);
-		RTFREE(rt);
-	}
+	mss = max(V_tcp_mssdflt, mss);
 	mss = min(mss, offer);
 	mss = max(mss, 64);		/* sanity - at least max opt space */
 	return (mss);
 }
 
 static u_int32_t
 pf_tcp_iss(struct pf_pdesc *pd)
 {
 	MD5_CTX ctx;
 	u_int32_t digest[4];
 
 	if (V_pf_tcp_secret_init == 0) {
 		read_random(&V_pf_tcp_secret, sizeof(V_pf_tcp_secret));
 		MD5Init(&V_pf_tcp_secret_ctx);
 		MD5Update(&V_pf_tcp_secret_ctx, V_pf_tcp_secret,
 		    sizeof(V_pf_tcp_secret));
 		V_pf_tcp_secret_init = 1;
 	}
 
 	ctx = V_pf_tcp_secret_ctx;
 
 	MD5Update(&ctx, (char *)&pd->hdr.tcp->th_sport, sizeof(u_short));
 	MD5Update(&ctx, (char *)&pd->hdr.tcp->th_dport, sizeof(u_short));
 	if (pd->af == AF_INET6) {
 		MD5Update(&ctx, (char *)&pd->src->v6, sizeof(struct in6_addr));
 		MD5Update(&ctx, (char *)&pd->dst->v6, sizeof(struct in6_addr));
 	} else {
 		MD5Update(&ctx, (char *)&pd->src->v4, sizeof(struct in_addr));
 		MD5Update(&ctx, (char *)&pd->dst->v4, sizeof(struct in_addr));
 	}
 	MD5Final((u_char *)digest, &ctx);
 	V_pf_tcp_iss_off += 4096;
 #define	ISN_RANDOM_INCREMENT (4096 - 1)
 	return (digest[0] + (arc4random() & ISN_RANDOM_INCREMENT) +
 	    V_pf_tcp_iss_off);
 #undef	ISN_RANDOM_INCREMENT
 }
 
 static int
 pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction,
     struct pfi_kif *kif, struct mbuf *m, int off, struct pf_pdesc *pd,
     struct pf_rule **am, struct pf_ruleset **rsm, struct inpcb *inp)
 {
 	struct pf_rule		*nr = NULL;
 	struct pf_addr		* const saddr = pd->src;
 	struct pf_addr		* const daddr = pd->dst;
 	sa_family_t		 af = pd->af;
 	struct pf_rule		*r, *a = NULL;
 	struct pf_ruleset	*ruleset = NULL;
 	struct pf_src_node	*nsn = NULL;
 	struct tcphdr		*th = pd->hdr.tcp;
 	struct pf_state_key	*sk = NULL, *nk = NULL;
 	u_short			 reason;
 	int			 rewrite = 0, hdrlen = 0;
 	int			 tag = -1, rtableid = -1;
 	int			 asd = 0;
 	int			 match = 0;
 	int			 state_icmp = 0;
 	u_int16_t		 sport = 0, dport = 0;
 	u_int16_t		 bproto_sum = 0, bip_sum = 0;
 	u_int8_t		 icmptype = 0, icmpcode = 0;
 	struct pf_anchor_stackframe	anchor_stack[PF_ANCHOR_STACKSIZE];
 
 	PF_RULES_RASSERT();
 
 	if (inp != NULL) {
 		INP_LOCK_ASSERT(inp);
 		pd->lookup.uid = inp->inp_cred->cr_uid;
 		pd->lookup.gid = inp->inp_cred->cr_groups[0];
 		pd->lookup.done = 1;
 	}
 
 	switch (pd->proto) {
 	case IPPROTO_TCP:
 		sport = th->th_sport;
 		dport = th->th_dport;
 		hdrlen = sizeof(*th);
 		break;
 	case IPPROTO_UDP:
 		sport = pd->hdr.udp->uh_sport;
 		dport = pd->hdr.udp->uh_dport;
 		hdrlen = sizeof(*pd->hdr.udp);
 		break;
 #ifdef INET
 	case IPPROTO_ICMP:
 		if (pd->af != AF_INET)
 			break;
 		sport = dport = pd->hdr.icmp->icmp_id;
 		hdrlen = sizeof(*pd->hdr.icmp);
 		icmptype = pd->hdr.icmp->icmp_type;
 		icmpcode = pd->hdr.icmp->icmp_code;
 
 		if (icmptype == ICMP_UNREACH ||
 		    icmptype == ICMP_SOURCEQUENCH ||
 		    icmptype == ICMP_REDIRECT ||
 		    icmptype == ICMP_TIMXCEED ||
 		    icmptype == ICMP_PARAMPROB)
 			state_icmp++;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case IPPROTO_ICMPV6:
 		if (af != AF_INET6)
 			break;
 		sport = dport = pd->hdr.icmp6->icmp6_id;
 		hdrlen = sizeof(*pd->hdr.icmp6);
 		icmptype = pd->hdr.icmp6->icmp6_type;
 		icmpcode = pd->hdr.icmp6->icmp6_code;
 
 		if (icmptype == ICMP6_DST_UNREACH ||
 		    icmptype == ICMP6_PACKET_TOO_BIG ||
 		    icmptype == ICMP6_TIME_EXCEEDED ||
 		    icmptype == ICMP6_PARAM_PROB)
 			state_icmp++;
 		break;
 #endif /* INET6 */
 	default:
 		sport = dport = hdrlen = 0;
 		break;
 	}
 
 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
 
 	/* check packet for BINAT/NAT/RDR */
 	if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn, &sk,
 	    &nk, saddr, daddr, sport, dport, anchor_stack)) != NULL) {
 		KASSERT(sk != NULL, ("%s: null sk", __func__));
 		KASSERT(nk != NULL, ("%s: null nk", __func__));
 
 		if (pd->ip_sum)
 			bip_sum = *pd->ip_sum;
 
 		switch (pd->proto) {
 		case IPPROTO_TCP:
 			bproto_sum = th->th_sum;
 			pd->proto_sum = &th->th_sum;
 
 			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
 			    nk->port[pd->sidx] != sport) {
 				pf_change_ap(m, saddr, &th->th_sport, pd->ip_sum,
 				    &th->th_sum, &nk->addr[pd->sidx],
 				    nk->port[pd->sidx], 0, af);
 				pd->sport = &th->th_sport;
 				sport = th->th_sport;
 			}
 
 			if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
 			    nk->port[pd->didx] != dport) {
 				pf_change_ap(m, daddr, &th->th_dport, pd->ip_sum,
 				    &th->th_sum, &nk->addr[pd->didx],
 				    nk->port[pd->didx], 0, af);
 				dport = th->th_dport;
 				pd->dport = &th->th_dport;
 			}
 			rewrite++;
 			break;
 		case IPPROTO_UDP:
 			bproto_sum = pd->hdr.udp->uh_sum;
 			pd->proto_sum = &pd->hdr.udp->uh_sum;
 
 			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
 			    nk->port[pd->sidx] != sport) {
 				pf_change_ap(m, saddr, &pd->hdr.udp->uh_sport,
 				    pd->ip_sum, &pd->hdr.udp->uh_sum,
 				    &nk->addr[pd->sidx],
 				    nk->port[pd->sidx], 1, af);
 				sport = pd->hdr.udp->uh_sport;
 				pd->sport = &pd->hdr.udp->uh_sport;
 			}
 
 			if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
 			    nk->port[pd->didx] != dport) {
 				pf_change_ap(m, daddr, &pd->hdr.udp->uh_dport,
 				    pd->ip_sum, &pd->hdr.udp->uh_sum,
 				    &nk->addr[pd->didx],
 				    nk->port[pd->didx], 1, af);
 				dport = pd->hdr.udp->uh_dport;
 				pd->dport = &pd->hdr.udp->uh_dport;
 			}
 			rewrite++;
 			break;
 #ifdef INET
 		case IPPROTO_ICMP:
 			nk->port[0] = nk->port[1];
 			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET))
 				pf_change_a(&saddr->v4.s_addr, pd->ip_sum,
 				    nk->addr[pd->sidx].v4.s_addr, 0);
 
 			if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET))
 				pf_change_a(&daddr->v4.s_addr, pd->ip_sum,
 				    nk->addr[pd->didx].v4.s_addr, 0);
 
 			if (nk->port[1] != pd->hdr.icmp->icmp_id) {
 				pd->hdr.icmp->icmp_cksum = pf_cksum_fixup(
 				    pd->hdr.icmp->icmp_cksum, sport,
 				    nk->port[1], 0);
 				pd->hdr.icmp->icmp_id = nk->port[1];
 				pd->sport = &pd->hdr.icmp->icmp_id;
 			}
 			m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
 			break;
 #endif /* INET */
 #ifdef INET6
 		case IPPROTO_ICMPV6:
 			nk->port[0] = nk->port[1];
 			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], AF_INET6))
 				pf_change_a6(saddr, &pd->hdr.icmp6->icmp6_cksum,
 				    &nk->addr[pd->sidx], 0);
 
 			if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET6))
 				pf_change_a6(daddr, &pd->hdr.icmp6->icmp6_cksum,
 				    &nk->addr[pd->didx], 0);
 			rewrite++;
 			break;
 #endif /* INET */
 		default:
 			switch (af) {
 #ifdef INET
 			case AF_INET:
 				if (PF_ANEQ(saddr,
 				    &nk->addr[pd->sidx], AF_INET))
 					pf_change_a(&saddr->v4.s_addr,
 					    pd->ip_sum,
 					    nk->addr[pd->sidx].v4.s_addr, 0);
 
 				if (PF_ANEQ(daddr,
 				    &nk->addr[pd->didx], AF_INET))
 					pf_change_a(&daddr->v4.s_addr,
 					    pd->ip_sum,
 					    nk->addr[pd->didx].v4.s_addr, 0);
 				break;
 #endif /* INET */
 #ifdef INET6
 			case AF_INET6:
 				if (PF_ANEQ(saddr,
 				    &nk->addr[pd->sidx], AF_INET6))
 					PF_ACPY(saddr, &nk->addr[pd->sidx], af);
 
 				if (PF_ANEQ(daddr,
 				    &nk->addr[pd->didx], AF_INET6))
 					PF_ACPY(saddr, &nk->addr[pd->didx], af);
 				break;
 #endif /* INET */
 			}
 			break;
 		}
 		if (nr->natpass)
 			r = NULL;
 		pd->nat_rule = nr;
 	}
 
 	while (r != NULL) {
 		r->evaluations++;
 		if (pfi_kif_match(r->kif, kif) == r->ifnot)
 			r = r->skip[PF_SKIP_IFP].ptr;
 		else if (r->direction && r->direction != direction)
 			r = r->skip[PF_SKIP_DIR].ptr;
 		else if (r->af && r->af != af)
 			r = r->skip[PF_SKIP_AF].ptr;
 		else if (r->proto && r->proto != pd->proto)
 			r = r->skip[PF_SKIP_PROTO].ptr;
 		else if (PF_MISMATCHAW(&r->src.addr, saddr, af,
 		    r->src.neg, kif, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
 		/* tcp/udp only. port_op always 0 in other cases */
 		else if (r->src.port_op && !pf_match_port(r->src.port_op,
 		    r->src.port[0], r->src.port[1], sport))
 			r = r->skip[PF_SKIP_SRC_PORT].ptr;
 		else if (PF_MISMATCHAW(&r->dst.addr, daddr, af,
 		    r->dst.neg, NULL, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
 		/* tcp/udp only. port_op always 0 in other cases */
 		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
 		    r->dst.port[0], r->dst.port[1], dport))
 			r = r->skip[PF_SKIP_DST_PORT].ptr;
 		/* icmp only. type always 0 in other cases */
 		else if (r->type && r->type != icmptype + 1)
 			r = TAILQ_NEXT(r, entries);
 		/* icmp only. type always 0 in other cases */
 		else if (r->code && r->code != icmpcode + 1)
 			r = TAILQ_NEXT(r, entries);
 		else if (r->tos && !(r->tos == pd->tos))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->rule_flag & PFRULE_FRAGMENT)
 			r = TAILQ_NEXT(r, entries);
 		else if (pd->proto == IPPROTO_TCP &&
 		    (r->flagset & th->th_flags) != r->flags)
 			r = TAILQ_NEXT(r, entries);
 		/* tcp/udp only. uid.op always 0 in other cases */
 		else if (r->uid.op && (pd->lookup.done || (pd->lookup.done =
 		    pf_socket_lookup(direction, pd, m), 1)) &&
 		    !pf_match_uid(r->uid.op, r->uid.uid[0], r->uid.uid[1],
 		    pd->lookup.uid))
 			r = TAILQ_NEXT(r, entries);
 		/* tcp/udp only. gid.op always 0 in other cases */
 		else if (r->gid.op && (pd->lookup.done || (pd->lookup.done =
 		    pf_socket_lookup(direction, pd, m), 1)) &&
 		    !pf_match_gid(r->gid.op, r->gid.gid[0], r->gid.gid[1],
 		    pd->lookup.gid))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->prob &&
 		    r->prob <= arc4random())
 			r = TAILQ_NEXT(r, entries);
 		else if (r->match_tag && !pf_match_tag(m, r, &tag,
 		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->os_fingerprint != PF_OSFP_ANY &&
 		    (pd->proto != IPPROTO_TCP || !pf_osfp_match(
 		    pf_osfp_fingerprint(pd, m, off, th),
 		    r->os_fingerprint)))
 			r = TAILQ_NEXT(r, entries);
 		else {
 			if (r->tag)
 				tag = r->tag;
 			if (r->rtableid >= 0)
 				rtableid = r->rtableid;
 			if (r->anchor == NULL) {
 				match = 1;
 				*rm = r;
 				*am = a;
 				*rsm = ruleset;
 				if ((*rm)->quick)
 					break;
 				r = TAILQ_NEXT(r, entries);
 			} else
 				pf_step_into_anchor(anchor_stack, &asd,
 				    &ruleset, PF_RULESET_FILTER, &r, &a,
 				    &match);
 		}
 		if (r == NULL && pf_step_out_of_anchor(anchor_stack, &asd,
 		    &ruleset, PF_RULESET_FILTER, &r, &a, &match))
 			break;
 	}
 	r = *rm;
 	a = *am;
 	ruleset = *rsm;
 
 	REASON_SET(&reason, PFRES_MATCH);
 
 	if (r->log || (nr != NULL && nr->log)) {
 		if (rewrite)
 			m_copyback(m, off, hdrlen, pd->hdr.any);
 		PFLOG_PACKET(kif, m, af, direction, reason, r->log ? r : nr, a,
 		    ruleset, pd, 1);
 	}
 
 	if ((r->action == PF_DROP) &&
 	    ((r->rule_flag & PFRULE_RETURNRST) ||
 	    (r->rule_flag & PFRULE_RETURNICMP) ||
 	    (r->rule_flag & PFRULE_RETURN))) {
 		/* undo NAT changes, if they have taken place */
 		if (nr != NULL) {
 			PF_ACPY(saddr, &sk->addr[pd->sidx], af);
 			PF_ACPY(daddr, &sk->addr[pd->didx], af);
 			if (pd->sport)
 				*pd->sport = sk->port[pd->sidx];
 			if (pd->dport)
 				*pd->dport = sk->port[pd->didx];
 			if (pd->proto_sum)
 				*pd->proto_sum = bproto_sum;
 			if (pd->ip_sum)
 				*pd->ip_sum = bip_sum;
 			m_copyback(m, off, hdrlen, pd->hdr.any);
 		}
 		if (pd->proto == IPPROTO_TCP &&
 		    ((r->rule_flag & PFRULE_RETURNRST) ||
 		    (r->rule_flag & PFRULE_RETURN)) &&
 		    !(th->th_flags & TH_RST)) {
 			u_int32_t	 ack = ntohl(th->th_seq) + pd->p_len;
 			int		 len = 0;
 #ifdef INET
 			struct ip	*h4;
 #endif
 #ifdef INET6
 			struct ip6_hdr	*h6;
 #endif
 
 			switch (af) {
 #ifdef INET
 			case AF_INET:
 				h4 = mtod(m, struct ip *);
 				len = ntohs(h4->ip_len) - off;
 				break;
 #endif
 #ifdef INET6
 			case AF_INET6:
 				h6 = mtod(m, struct ip6_hdr *);
 				len = ntohs(h6->ip6_plen) - (off - sizeof(*h6));
 				break;
 #endif
 			}
 
 			if (pf_check_proto_cksum(m, off, len, IPPROTO_TCP, af))
 				REASON_SET(&reason, PFRES_PROTCKSUM);
 			else {
 				if (th->th_flags & TH_SYN)
 					ack++;
 				if (th->th_flags & TH_FIN)
 					ack++;
 				pf_send_tcp(m, r, af, pd->dst,
 				    pd->src, th->th_dport, th->th_sport,
 				    ntohl(th->th_ack), ack, TH_RST|TH_ACK, 0, 0,
 				    r->return_ttl, 1, 0, kif->pfik_ifp);
 			}
 		} else if (pd->proto != IPPROTO_ICMP && af == AF_INET &&
 		    r->return_icmp)
 			pf_send_icmp(m, r->return_icmp >> 8,
 			    r->return_icmp & 255, af, r);
 		else if (pd->proto != IPPROTO_ICMPV6 && af == AF_INET6 &&
 		    r->return_icmp6)
 			pf_send_icmp(m, r->return_icmp6 >> 8,
 			    r->return_icmp6 & 255, af, r);
 	}
 
 	if (r->action == PF_DROP)
 		goto cleanup;
 
 	if (tag > 0 && pf_tag_packet(m, pd, tag)) {
 		REASON_SET(&reason, PFRES_MEMORY);
 		goto cleanup;
 	}
 	if (rtableid >= 0)
 		M_SETFIB(m, rtableid);
 
 	if (!state_icmp && (r->keep_state || nr != NULL ||
 	    (pd->flags & PFDESC_TCP_NORM))) {
 		int action;
 		action = pf_create_state(r, nr, a, pd, nsn, nk, sk, m, off,
 		    sport, dport, &rewrite, kif, sm, tag, bproto_sum, bip_sum,
 		    hdrlen);
 		if (action != PF_PASS)
 			return (action);
 	} else {
 		if (sk != NULL)
 			uma_zfree(V_pf_state_key_z, sk);
 		if (nk != NULL)
 			uma_zfree(V_pf_state_key_z, nk);
 	}
 
 	/* copy back packet headers if we performed NAT operations */
 	if (rewrite)
 		m_copyback(m, off, hdrlen, pd->hdr.any);
 
 	if (*sm != NULL && !((*sm)->state_flags & PFSTATE_NOSYNC) &&
 	    direction == PF_OUT &&
 	    pfsync_defer_ptr != NULL && pfsync_defer_ptr(*sm, m))
 		/*
 		 * We want the state created, but we dont
 		 * want to send this in case a partner
 		 * firewall has to know about it to allow
 		 * replies through it.
 		 */
 		return (PF_DEFER);
 
 	return (PF_PASS);
 
 cleanup:
 	if (sk != NULL)
 		uma_zfree(V_pf_state_key_z, sk);
 	if (nk != NULL)
 		uma_zfree(V_pf_state_key_z, nk);
 	return (PF_DROP);
 }
 
 static int
 pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a,
     struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *nk,
     struct pf_state_key *sk, struct mbuf *m, int off, u_int16_t sport,
     u_int16_t dport, int *rewrite, struct pfi_kif *kif, struct pf_state **sm,
     int tag, u_int16_t bproto_sum, u_int16_t bip_sum, int hdrlen)
 {
 	struct pf_state		*s = NULL;
 	struct pf_src_node	*sn = NULL;
 	struct tcphdr		*th = pd->hdr.tcp;
 	u_int16_t		 mss = V_tcp_mssdflt;
 	u_short			 reason;
 
 	/* check maximums */
 	if (r->max_states &&
 	    (counter_u64_fetch(r->states_cur) >= r->max_states)) {
 		counter_u64_add(V_pf_status.lcounters[LCNT_STATES], 1);
 		REASON_SET(&reason, PFRES_MAXSTATES);
 		return (PF_DROP);
 	}
 	/* src node for filter rule */
 	if ((r->rule_flag & PFRULE_SRCTRACK ||
 	    r->rpool.opts & PF_POOL_STICKYADDR) &&
 	    pf_insert_src_node(&sn, r, pd->src, pd->af) != 0) {
 		REASON_SET(&reason, PFRES_SRCLIMIT);
 		goto csfailed;
 	}
 	/* src node for translation rule */
 	if (nr != NULL && (nr->rpool.opts & PF_POOL_STICKYADDR) &&
 	    pf_insert_src_node(&nsn, nr, &sk->addr[pd->sidx], pd->af)) {
 		REASON_SET(&reason, PFRES_SRCLIMIT);
 		goto csfailed;
 	}
 	s = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO);
 	if (s == NULL) {
 		REASON_SET(&reason, PFRES_MEMORY);
 		goto csfailed;
 	}
 	s->rule.ptr = r;
 	s->nat_rule.ptr = nr;
 	s->anchor.ptr = a;
 	STATE_INC_COUNTERS(s);
 	if (r->allow_opts)
 		s->state_flags |= PFSTATE_ALLOWOPTS;
 	if (r->rule_flag & PFRULE_STATESLOPPY)
 		s->state_flags |= PFSTATE_SLOPPY;
 	s->log = r->log & PF_LOG_ALL;
 	s->sync_state = PFSYNC_S_NONE;
 	if (nr != NULL)
 		s->log |= nr->log & PF_LOG_ALL;
 	switch (pd->proto) {
 	case IPPROTO_TCP:
 		s->src.seqlo = ntohl(th->th_seq);
 		s->src.seqhi = s->src.seqlo + pd->p_len + 1;
 		if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
 		    r->keep_state == PF_STATE_MODULATE) {
 			/* Generate sequence number modulator */
 			if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) ==
 			    0)
 				s->src.seqdiff = 1;
 			pf_change_proto_a(m, &th->th_seq, &th->th_sum,
 			    htonl(s->src.seqlo + s->src.seqdiff), 0);
 			*rewrite = 1;
 		} else
 			s->src.seqdiff = 0;
 		if (th->th_flags & TH_SYN) {
 			s->src.seqhi++;
 			s->src.wscale = pf_get_wscale(m, off,
 			    th->th_off, pd->af);
 		}
 		s->src.max_win = MAX(ntohs(th->th_win), 1);
 		if (s->src.wscale & PF_WSCALE_MASK) {
 			/* Remove scale factor from initial window */
 			int win = s->src.max_win;
 			win += 1 << (s->src.wscale & PF_WSCALE_MASK);
 			s->src.max_win = (win - 1) >>
 			    (s->src.wscale & PF_WSCALE_MASK);
 		}
 		if (th->th_flags & TH_FIN)
 			s->src.seqhi++;
 		s->dst.seqhi = 1;
 		s->dst.max_win = 1;
 		s->src.state = TCPS_SYN_SENT;
 		s->dst.state = TCPS_CLOSED;
 		s->timeout = PFTM_TCP_FIRST_PACKET;
 		break;
 	case IPPROTO_UDP:
 		s->src.state = PFUDPS_SINGLE;
 		s->dst.state = PFUDPS_NO_TRAFFIC;
 		s->timeout = PFTM_UDP_FIRST_PACKET;
 		break;
 	case IPPROTO_ICMP:
 #ifdef INET6
 	case IPPROTO_ICMPV6:
 #endif
 		s->timeout = PFTM_ICMP_FIRST_PACKET;
 		break;
 	default:
 		s->src.state = PFOTHERS_SINGLE;
 		s->dst.state = PFOTHERS_NO_TRAFFIC;
 		s->timeout = PFTM_OTHER_FIRST_PACKET;
 	}
 
 	if (r->rt && r->rt != PF_FASTROUTE) {
 		if (pf_map_addr(pd->af, r, pd->src, &s->rt_addr, NULL, &sn)) {
 			REASON_SET(&reason, PFRES_MAPFAILED);
 			pf_src_tree_remove_state(s);
 			STATE_DEC_COUNTERS(s);
 			uma_zfree(V_pf_state_z, s);
 			goto csfailed;
 		}
 		s->rt_kif = r->rpool.cur->kif;
 	}
 
 	s->creation = time_uptime;
 	s->expire = time_uptime;
 
 	if (sn != NULL)
 		s->src_node = sn;
 	if (nsn != NULL) {
 		/* XXX We only modify one side for now. */
 		PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af);
 		s->nat_src_node = nsn;
 	}
 	if (pd->proto == IPPROTO_TCP) {
 		if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
 		    off, pd, th, &s->src, &s->dst)) {
 			REASON_SET(&reason, PFRES_MEMORY);
 			pf_src_tree_remove_state(s);
 			STATE_DEC_COUNTERS(s);
 			uma_zfree(V_pf_state_z, s);
 			return (PF_DROP);
 		}
 		if ((pd->flags & PFDESC_TCP_NORM) && s->src.scrub &&
 		    pf_normalize_tcp_stateful(m, off, pd, &reason, th, s,
 		    &s->src, &s->dst, rewrite)) {
 			/* This really shouldn't happen!!! */
 			DPFPRINTF(PF_DEBUG_URGENT,
 			    ("pf_normalize_tcp_stateful failed on first pkt"));
 			pf_normalize_tcp_cleanup(s);
 			pf_src_tree_remove_state(s);
 			STATE_DEC_COUNTERS(s);
 			uma_zfree(V_pf_state_z, s);
 			return (PF_DROP);
 		}
 	}
 	s->direction = pd->dir;
 
 	/*
 	 * sk/nk could already been setup by pf_get_translation().
 	 */
 	if (nr == NULL) {
 		KASSERT((sk == NULL && nk == NULL), ("%s: nr %p sk %p, nk %p",
 		    __func__, nr, sk, nk));
 		sk = pf_state_key_setup(pd, pd->src, pd->dst, sport, dport);
 		if (sk == NULL)
 			goto csfailed;
 		nk = sk;
 	} else
 		KASSERT((sk != NULL && nk != NULL), ("%s: nr %p sk %p, nk %p",
 		    __func__, nr, sk, nk));
 
 	/* Swap sk/nk for PF_OUT. */
 	if (pf_state_insert(BOUND_IFACE(r, kif),
 	    (pd->dir == PF_IN) ? sk : nk,
 	    (pd->dir == PF_IN) ? nk : sk, s)) {
 		if (pd->proto == IPPROTO_TCP)
 			pf_normalize_tcp_cleanup(s);
 		REASON_SET(&reason, PFRES_STATEINS);
 		pf_src_tree_remove_state(s);
 		STATE_DEC_COUNTERS(s);
 		uma_zfree(V_pf_state_z, s);
 		return (PF_DROP);
 	} else
 		*sm = s;
 
 	if (tag > 0)
 		s->tag = tag;
 	if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) ==
 	    TH_SYN && r->keep_state == PF_STATE_SYNPROXY) {
 		s->src.state = PF_TCPS_PROXY_SRC;
 		/* undo NAT changes, if they have taken place */
 		if (nr != NULL) {
 			struct pf_state_key *skt = s->key[PF_SK_WIRE];
 			if (pd->dir == PF_OUT)
 				skt = s->key[PF_SK_STACK];
 			PF_ACPY(pd->src, &skt->addr[pd->sidx], pd->af);
 			PF_ACPY(pd->dst, &skt->addr[pd->didx], pd->af);
 			if (pd->sport)
 				*pd->sport = skt->port[pd->sidx];
 			if (pd->dport)
 				*pd->dport = skt->port[pd->didx];
 			if (pd->proto_sum)
 				*pd->proto_sum = bproto_sum;
 			if (pd->ip_sum)
 				*pd->ip_sum = bip_sum;
 			m_copyback(m, off, hdrlen, pd->hdr.any);
 		}
 		s->src.seqhi = htonl(arc4random());
 		/* Find mss option */
 		int rtid = M_GETFIB(m);
 		mss = pf_get_mss(m, off, th->th_off, pd->af);
 		mss = pf_calc_mss(pd->src, pd->af, rtid, mss);
 		mss = pf_calc_mss(pd->dst, pd->af, rtid, mss);
 		s->src.mss = mss;
 		pf_send_tcp(NULL, r, pd->af, pd->dst, pd->src, th->th_dport,
 		    th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
 		    TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, NULL);
 		REASON_SET(&reason, PFRES_SYNPROXY);
 		return (PF_SYNPROXY_DROP);
 	}
 
 	return (PF_PASS);
 
 csfailed:
 	if (sk != NULL)
 		uma_zfree(V_pf_state_key_z, sk);
 	if (nk != NULL)
 		uma_zfree(V_pf_state_key_z, nk);
 
 	if (sn != NULL) {
 		struct pf_srchash *sh;
 
 		sh = &V_pf_srchash[pf_hashsrc(&sn->addr, sn->af)];
 		PF_HASHROW_LOCK(sh);
 		if (--sn->states == 0 && sn->expire == 0) {
 			pf_unlink_src_node(sn);
 			uma_zfree(V_pf_sources_z, sn);
 			counter_u64_add(
 			    V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1);
 		}
 		PF_HASHROW_UNLOCK(sh);
 	}
 
 	if (nsn != sn && nsn != NULL) {
 		struct pf_srchash *sh;
 
 		sh = &V_pf_srchash[pf_hashsrc(&nsn->addr, nsn->af)];
 		PF_HASHROW_LOCK(sh);
 		if (--nsn->states == 0 && nsn->expire == 0) {
 			pf_unlink_src_node(nsn);
 			uma_zfree(V_pf_sources_z, nsn);
 			counter_u64_add(
 			    V_pf_status.scounters[SCNT_SRC_NODE_REMOVALS], 1);
 		}
 		PF_HASHROW_UNLOCK(sh);
 	}
 
 	return (PF_DROP);
 }
 
 static int
 pf_test_fragment(struct pf_rule **rm, int direction, struct pfi_kif *kif,
     struct mbuf *m, void *h, struct pf_pdesc *pd, struct pf_rule **am,
     struct pf_ruleset **rsm)
 {
 	struct pf_rule		*r, *a = NULL;
 	struct pf_ruleset	*ruleset = NULL;
 	sa_family_t		 af = pd->af;
 	u_short			 reason;
 	int			 tag = -1;
 	int			 asd = 0;
 	int			 match = 0;
 	struct pf_anchor_stackframe	anchor_stack[PF_ANCHOR_STACKSIZE];
 
 	PF_RULES_RASSERT();
 
 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_FILTER].active.ptr);
 	while (r != NULL) {
 		r->evaluations++;
 		if (pfi_kif_match(r->kif, kif) == r->ifnot)
 			r = r->skip[PF_SKIP_IFP].ptr;
 		else if (r->direction && r->direction != direction)
 			r = r->skip[PF_SKIP_DIR].ptr;
 		else if (r->af && r->af != af)
 			r = r->skip[PF_SKIP_AF].ptr;
 		else if (r->proto && r->proto != pd->proto)
 			r = r->skip[PF_SKIP_PROTO].ptr;
 		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
 		    r->src.neg, kif, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
 		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
 		    r->dst.neg, NULL, M_GETFIB(m)))
 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
 		else if (r->tos && !(r->tos == pd->tos))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->os_fingerprint != PF_OSFP_ANY)
 			r = TAILQ_NEXT(r, entries);
 		else if (pd->proto == IPPROTO_UDP &&
 		    (r->src.port_op || r->dst.port_op))
 			r = TAILQ_NEXT(r, entries);
 		else if (pd->proto == IPPROTO_TCP &&
 		    (r->src.port_op || r->dst.port_op || r->flagset))
 			r = TAILQ_NEXT(r, entries);
 		else if ((pd->proto == IPPROTO_ICMP ||
 		    pd->proto == IPPROTO_ICMPV6) &&
 		    (r->type || r->code))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->prob && r->prob <=
 		    (arc4random() % (UINT_MAX - 1) + 1))
 			r = TAILQ_NEXT(r, entries);
 		else if (r->match_tag && !pf_match_tag(m, r, &tag,
 		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
 			r = TAILQ_NEXT(r, entries);
 		else {
 			if (r->anchor == NULL) {
 				match = 1;
 				*rm = r;
 				*am = a;
 				*rsm = ruleset;
 				if ((*rm)->quick)
 					break;
 				r = TAILQ_NEXT(r, entries);
 			} else
 				pf_step_into_anchor(anchor_stack, &asd,
 				    &ruleset, PF_RULESET_FILTER, &r, &a,
 				    &match);
 		}
 		if (r == NULL && pf_step_out_of_anchor(anchor_stack, &asd,
 		    &ruleset, PF_RULESET_FILTER, &r, &a, &match))
 			break;
 	}
 	r = *rm;
 	a = *am;
 	ruleset = *rsm;
 
 	REASON_SET(&reason, PFRES_MATCH);
 
 	if (r->log)
 		PFLOG_PACKET(kif, m, af, direction, reason, r, a, ruleset, pd,
 		    1);
 
 	if (r->action != PF_PASS)
 		return (PF_DROP);
 
 	if (tag > 0 && pf_tag_packet(m, pd, tag)) {
 		REASON_SET(&reason, PFRES_MEMORY);
 		return (PF_DROP);
 	}
 
 	return (PF_PASS);
 }
 
 static int
 pf_tcp_track_full(struct pf_state_peer *src, struct pf_state_peer *dst,
 	struct pf_state **state, struct pfi_kif *kif, struct mbuf *m, int off,
 	struct pf_pdesc *pd, u_short *reason, int *copyback)
 {
 	struct tcphdr		*th = pd->hdr.tcp;
 	u_int16_t		 win = ntohs(th->th_win);
 	u_int32_t		 ack, end, seq, orig_seq;
 	u_int8_t		 sws, dws;
 	int			 ackskew;
 
 	if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
 		sws = src->wscale & PF_WSCALE_MASK;
 		dws = dst->wscale & PF_WSCALE_MASK;
 	} else
 		sws = dws = 0;
 
 	/*
 	 * Sequence tracking algorithm from Guido van Rooij's paper:
 	 *   http://www.madison-gurkha.com/publications/tcp_filtering/
 	 *	tcp_filtering.ps
 	 */
 
 	orig_seq = seq = ntohl(th->th_seq);
 	if (src->seqlo == 0) {
 		/* First packet from this end. Set its state */
 
 		if ((pd->flags & PFDESC_TCP_NORM || dst->scrub) &&
 		    src->scrub == NULL) {
 			if (pf_normalize_tcp_init(m, off, pd, th, src, dst)) {
 				REASON_SET(reason, PFRES_MEMORY);
 				return (PF_DROP);
 			}
 		}
 
 		/* Deferred generation of sequence number modulator */
 		if (dst->seqdiff && !src->seqdiff) {
 			/* use random iss for the TCP server */
 			while ((src->seqdiff = arc4random() - seq) == 0)
 				;
 			ack = ntohl(th->th_ack) - dst->seqdiff;
 			pf_change_proto_a(m, &th->th_seq, &th->th_sum, htonl(seq +
 			    src->seqdiff), 0);
 			pf_change_proto_a(m, &th->th_ack, &th->th_sum, htonl(ack), 0);
 			*copyback = 1;
 		} else {
 			ack = ntohl(th->th_ack);
 		}
 
 		end = seq + pd->p_len;
 		if (th->th_flags & TH_SYN) {
 			end++;
 			if (dst->wscale & PF_WSCALE_FLAG) {
 				src->wscale = pf_get_wscale(m, off, th->th_off,
 				    pd->af);
 				if (src->wscale & PF_WSCALE_FLAG) {
 					/* Remove scale factor from initial
 					 * window */
 					sws = src->wscale & PF_WSCALE_MASK;
 					win = ((u_int32_t)win + (1 << sws) - 1)
 					    >> sws;
 					dws = dst->wscale & PF_WSCALE_MASK;
 				} else {
 					/* fixup other window */
 					dst->max_win <<= dst->wscale &
 					    PF_WSCALE_MASK;
 					/* in case of a retrans SYN|ACK */
 					dst->wscale = 0;
 				}
 			}
 		}
 		if (th->th_flags & TH_FIN)
 			end++;
 
 		src->seqlo = seq;
 		if (src->state < TCPS_SYN_SENT)
 			src->state = TCPS_SYN_SENT;
 
 		/*
 		 * May need to slide the window (seqhi may have been set by
 		 * the crappy stack check or if we picked up the connection
 		 * after establishment)
 		 */
 		if (src->seqhi == 1 ||
 		    SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
 			src->seqhi = end + MAX(1, dst->max_win << dws);
 		if (win > src->max_win)
 			src->max_win = win;
 
 	} else {
 		ack = ntohl(th->th_ack) - dst->seqdiff;
 		if (src->seqdiff) {
 			/* Modulate sequence numbers */
 			pf_change_proto_a(m, &th->th_seq, &th->th_sum, htonl(seq +
 			    src->seqdiff), 0);
 			pf_change_proto_a(m, &th->th_ack, &th->th_sum, htonl(ack), 0);
 			*copyback = 1;
 		}
 		end = seq + pd->p_len;
 		if (th->th_flags & TH_SYN)
 			end++;
 		if (th->th_flags & TH_FIN)
 			end++;
 	}
 
 	if ((th->th_flags & TH_ACK) == 0) {
 		/* Let it pass through the ack skew check */
 		ack = dst->seqlo;
 	} else if ((ack == 0 &&
 	    (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
 	    /* broken tcp stacks do not set ack */
 	    (dst->state < TCPS_SYN_SENT)) {
 		/*
 		 * Many stacks (ours included) will set the ACK number in an
 		 * FIN|ACK if the SYN times out -- no sequence to ACK.
 		 */
 		ack = dst->seqlo;
 	}
 
 	if (seq == end) {
 		/* Ease sequencing restrictions on no data packets */
 		seq = src->seqlo;
 		end = seq;
 	}
 
 	ackskew = dst->seqlo - ack;
 
 
 	/*
 	 * Need to demodulate the sequence numbers in any TCP SACK options
 	 * (Selective ACK). We could optionally validate the SACK values
 	 * against the current ACK window, either forwards or backwards, but
 	 * I'm not confident that SACK has been implemented properly
 	 * everywhere. It wouldn't surprise me if several stacks accidently
 	 * SACK too far backwards of previously ACKed data. There really aren't
 	 * any security implications of bad SACKing unless the target stack
 	 * doesn't validate the option length correctly. Someone trying to
 	 * spoof into a TCP connection won't bother blindly sending SACK
 	 * options anyway.
 	 */
 	if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) {
 		if (pf_modulate_sack(m, off, pd, th, dst))
 			*copyback = 1;
 	}
 
 
 #define	MAXACKWINDOW (0xffff + 1500)	/* 1500 is an arbitrary fudge factor */
 	if (SEQ_GEQ(src->seqhi, end) &&
 	    /* Last octet inside other's window space */
 	    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
 	    /* Retrans: not more than one window back */
 	    (ackskew >= -MAXACKWINDOW) &&
 	    /* Acking not more than one reassembled fragment backwards */
 	    (ackskew <= (MAXACKWINDOW << sws)) &&
 	    /* Acking not more than one window forward */
 	    ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
 	    (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) ||
 	    (pd->flags & PFDESC_IP_REAS) == 0)) {
 	    /* Require an exact/+1 sequence match on resets when possible */
 
 		if (dst->scrub || src->scrub) {
 			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
 			    *state, src, dst, copyback))
 				return (PF_DROP);
 		}
 
 		/* update max window */
 		if (src->max_win < win)
 			src->max_win = win;
 		/* synchronize sequencing */
 		if (SEQ_GT(end, src->seqlo))
 			src->seqlo = end;
 		/* slide the window of what the other end can send */
 		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
 			dst->seqhi = ack + MAX((win << sws), 1);
 
 
 		/* update states */
 		if (th->th_flags & TH_SYN)
 			if (src->state < TCPS_SYN_SENT)
 				src->state = TCPS_SYN_SENT;
 		if (th->th_flags & TH_FIN)
 			if (src->state < TCPS_CLOSING)
 				src->state = TCPS_CLOSING;
 		if (th->th_flags & TH_ACK) {
 			if (dst->state == TCPS_SYN_SENT) {
 				dst->state = TCPS_ESTABLISHED;
 				if (src->state == TCPS_ESTABLISHED &&
 				    (*state)->src_node != NULL &&
 				    pf_src_connlimit(state)) {
 					REASON_SET(reason, PFRES_SRCLIMIT);
 					return (PF_DROP);
 				}
 			} else if (dst->state == TCPS_CLOSING)
 				dst->state = TCPS_FIN_WAIT_2;
 		}
 		if (th->th_flags & TH_RST)
 			src->state = dst->state = TCPS_TIME_WAIT;
 
 		/* update expire time */
 		(*state)->expire = time_uptime;
 		if (src->state >= TCPS_FIN_WAIT_2 &&
 		    dst->state >= TCPS_FIN_WAIT_2)
 			(*state)->timeout = PFTM_TCP_CLOSED;
 		else if (src->state >= TCPS_CLOSING &&
 		    dst->state >= TCPS_CLOSING)
 			(*state)->timeout = PFTM_TCP_FIN_WAIT;
 		else if (src->state < TCPS_ESTABLISHED ||
 		    dst->state < TCPS_ESTABLISHED)
 			(*state)->timeout = PFTM_TCP_OPENING;
 		else if (src->state >= TCPS_CLOSING ||
 		    dst->state >= TCPS_CLOSING)
 			(*state)->timeout = PFTM_TCP_CLOSING;
 		else
 			(*state)->timeout = PFTM_TCP_ESTABLISHED;
 
 		/* Fall through to PASS packet */
 
 	} else if ((dst->state < TCPS_SYN_SENT ||
 		dst->state >= TCPS_FIN_WAIT_2 ||
 		src->state >= TCPS_FIN_WAIT_2) &&
 	    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) &&
 	    /* Within a window forward of the originating packet */
 	    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
 	    /* Within a window backward of the originating packet */
 
 		/*
 		 * This currently handles three situations:
 		 *  1) Stupid stacks will shotgun SYNs before their peer
 		 *     replies.
 		 *  2) When PF catches an already established stream (the
 		 *     firewall rebooted, the state table was flushed, routes
 		 *     changed...)
 		 *  3) Packets get funky immediately after the connection
 		 *     closes (this should catch Solaris spurious ACK|FINs
 		 *     that web servers like to spew after a close)
 		 *
 		 * This must be a little more careful than the above code
 		 * since packet floods will also be caught here. We don't
 		 * update the TTL here to mitigate the damage of a packet
 		 * flood and so the same code can handle awkward establishment
 		 * and a loosened connection close.
 		 * In the establishment case, a correct peer response will
 		 * validate the connection, go through the normal state code
 		 * and keep updating the state TTL.
 		 */
 
 		if (V_pf_status.debug >= PF_DEBUG_MISC) {
 			printf("pf: loose state match: ");
 			pf_print_state(*state);
 			pf_print_flags(th->th_flags);
 			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
 			    "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack,
 			    pd->p_len, ackskew, (unsigned long long)(*state)->packets[0],
 			    (unsigned long long)(*state)->packets[1],
 			    pd->dir == PF_IN ? "in" : "out",
 			    pd->dir == (*state)->direction ? "fwd" : "rev");
 		}
 
 		if (dst->scrub || src->scrub) {
 			if (pf_normalize_tcp_stateful(m, off, pd, reason, th,
 			    *state, src, dst, copyback))
 				return (PF_DROP);
 		}
 
 		/* update max window */
 		if (src->max_win < win)
 			src->max_win = win;
 		/* synchronize sequencing */
 		if (SEQ_GT(end, src->seqlo))
 			src->seqlo = end;
 		/* slide the window of what the other end can send */
 		if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
 			dst->seqhi = ack + MAX((win << sws), 1);
 
 		/*
 		 * Cannot set dst->seqhi here since this could be a shotgunned
 		 * SYN and not an already established connection.
 		 */
 
 		if (th->th_flags & TH_FIN)
 			if (src->state < TCPS_CLOSING)
 				src->state = TCPS_CLOSING;
 		if (th->th_flags & TH_RST)
 			src->state = dst->state = TCPS_TIME_WAIT;
 
 		/* Fall through to PASS packet */
 
 	} else {
 		if ((*state)->dst.state == TCPS_SYN_SENT &&
 		    (*state)->src.state == TCPS_SYN_SENT) {
 			/* Send RST for state mismatches during handshake */
 			if (!(th->th_flags & TH_RST))
 				pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
 				    pd->dst, pd->src, th->th_dport,
 				    th->th_sport, ntohl(th->th_ack), 0,
 				    TH_RST, 0, 0,
 				    (*state)->rule.ptr->return_ttl, 1, 0,
 				    kif->pfik_ifp);
 			src->seqlo = 0;
 			src->seqhi = 1;
 			src->max_win = 1;
 		} else if (V_pf_status.debug >= PF_DEBUG_MISC) {
 			printf("pf: BAD state: ");
 			pf_print_state(*state);
 			pf_print_flags(th->th_flags);
 			printf(" seq=%u (%u) ack=%u len=%u ackskew=%d "
 			    "pkts=%llu:%llu dir=%s,%s\n",
 			    seq, orig_seq, ack, pd->p_len, ackskew,
 			    (unsigned long long)(*state)->packets[0],
 			    (unsigned long long)(*state)->packets[1],
 			    pd->dir == PF_IN ? "in" : "out",
 			    pd->dir == (*state)->direction ? "fwd" : "rev");
 			printf("pf: State failure on: %c %c %c %c | %c %c\n",
 			    SEQ_GEQ(src->seqhi, end) ? ' ' : '1',
 			    SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
 			    ' ': '2',
 			    (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
 			    (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
 			    SEQ_GEQ(src->seqhi + MAXACKWINDOW, end) ?' ' :'5',
 			    SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
 		}
 		REASON_SET(reason, PFRES_BADSTATE);
 		return (PF_DROP);
 	}
 
 	return (PF_PASS);
 }
 
 static int
 pf_tcp_track_sloppy(struct pf_state_peer *src, struct pf_state_peer *dst,
 	struct pf_state **state, struct pf_pdesc *pd, u_short *reason)
 {
 	struct tcphdr		*th = pd->hdr.tcp;
 
 	if (th->th_flags & TH_SYN)
 		if (src->state < TCPS_SYN_SENT)
 			src->state = TCPS_SYN_SENT;
 	if (th->th_flags & TH_FIN)
 		if (src->state < TCPS_CLOSING)
 			src->state = TCPS_CLOSING;
 	if (th->th_flags & TH_ACK) {
 		if (dst->state == TCPS_SYN_SENT) {
 			dst->state = TCPS_ESTABLISHED;
 			if (src->state == TCPS_ESTABLISHED &&
 			    (*state)->src_node != NULL &&
 			    pf_src_connlimit(state)) {
 				REASON_SET(reason, PFRES_SRCLIMIT);
 				return (PF_DROP);
 			}
 		} else if (dst->state == TCPS_CLOSING) {
 			dst->state = TCPS_FIN_WAIT_2;
 		} else if (src->state == TCPS_SYN_SENT &&
 		    dst->state < TCPS_SYN_SENT) {
 			/*
 			 * Handle a special sloppy case where we only see one
 			 * half of the connection. If there is a ACK after
 			 * the initial SYN without ever seeing a packet from
 			 * the destination, set the connection to established.
 			 */
 			dst->state = src->state = TCPS_ESTABLISHED;
 			if ((*state)->src_node != NULL &&
 			    pf_src_connlimit(state)) {
 				REASON_SET(reason, PFRES_SRCLIMIT);
 				return (PF_DROP);
 			}
 		} else if (src->state == TCPS_CLOSING &&
 		    dst->state == TCPS_ESTABLISHED &&
 		    dst->seqlo == 0) {
 			/*
 			 * Handle the closing of half connections where we
 			 * don't see the full bidirectional FIN/ACK+ACK
 			 * handshake.
 			 */
 			dst->state = TCPS_CLOSING;
 		}
 	}
 	if (th->th_flags & TH_RST)
 		src->state = dst->state = TCPS_TIME_WAIT;
 
 	/* update expire time */
 	(*state)->expire = time_uptime;
 	if (src->state >= TCPS_FIN_WAIT_2 &&
 	    dst->state >= TCPS_FIN_WAIT_2)
 		(*state)->timeout = PFTM_TCP_CLOSED;
 	else if (src->state >= TCPS_CLOSING &&
 	    dst->state >= TCPS_CLOSING)
 		(*state)->timeout = PFTM_TCP_FIN_WAIT;
 	else if (src->state < TCPS_ESTABLISHED ||
 	    dst->state < TCPS_ESTABLISHED)
 		(*state)->timeout = PFTM_TCP_OPENING;
 	else if (src->state >= TCPS_CLOSING ||
 	    dst->state >= TCPS_CLOSING)
 		(*state)->timeout = PFTM_TCP_CLOSING;
 	else
 		(*state)->timeout = PFTM_TCP_ESTABLISHED;
 
 	return (PF_PASS);
 }
 
 static int
 pf_test_state_tcp(struct pf_state **state, int direction, struct pfi_kif *kif,
     struct mbuf *m, int off, void *h, struct pf_pdesc *pd,
     u_short *reason)
 {
 	struct pf_state_key_cmp	 key;
 	struct tcphdr		*th = pd->hdr.tcp;
 	int			 copyback = 0;
 	struct pf_state_peer	*src, *dst;
 	struct pf_state_key	*sk;
 
 	bzero(&key, sizeof(key));
 	key.af = pd->af;
 	key.proto = IPPROTO_TCP;
 	if (direction == PF_IN)	{	/* wire side, straight */
 		PF_ACPY(&key.addr[0], pd->src, key.af);
 		PF_ACPY(&key.addr[1], pd->dst, key.af);
 		key.port[0] = th->th_sport;
 		key.port[1] = th->th_dport;
 	} else {			/* stack side, reverse */
 		PF_ACPY(&key.addr[1], pd->src, key.af);
 		PF_ACPY(&key.addr[0], pd->dst, key.af);
 		key.port[1] = th->th_sport;
 		key.port[0] = th->th_dport;
 	}
 
 	STATE_LOOKUP(kif, &key, direction, *state, pd);
 
 	if (direction == (*state)->direction) {
 		src = &(*state)->src;
 		dst = &(*state)->dst;
 	} else {
 		src = &(*state)->dst;
 		dst = &(*state)->src;
 	}
 
 	sk = (*state)->key[pd->didx];
 
 	if ((*state)->src.state == PF_TCPS_PROXY_SRC) {
 		if (direction != (*state)->direction) {
 			REASON_SET(reason, PFRES_SYNPROXY);
 			return (PF_SYNPROXY_DROP);
 		}
 		if (th->th_flags & TH_SYN) {
 			if (ntohl(th->th_seq) != (*state)->src.seqlo) {
 				REASON_SET(reason, PFRES_SYNPROXY);
 				return (PF_DROP);
 			}
 			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst,
 			    pd->src, th->th_dport, th->th_sport,
 			    (*state)->src.seqhi, ntohl(th->th_seq) + 1,
 			    TH_SYN|TH_ACK, 0, (*state)->src.mss, 0, 1, 0, NULL);
 			REASON_SET(reason, PFRES_SYNPROXY);
 			return (PF_SYNPROXY_DROP);
 		} else if (!(th->th_flags & TH_ACK) ||
 		    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
 		    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
 			REASON_SET(reason, PFRES_SYNPROXY);
 			return (PF_DROP);
 		} else if ((*state)->src_node != NULL &&
 		    pf_src_connlimit(state)) {
 			REASON_SET(reason, PFRES_SRCLIMIT);
 			return (PF_DROP);
 		} else
 			(*state)->src.state = PF_TCPS_PROXY_DST;
 	}
 	if ((*state)->src.state == PF_TCPS_PROXY_DST) {
 		if (direction == (*state)->direction) {
 			if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
 			    (ntohl(th->th_ack) != (*state)->src.seqhi + 1) ||
 			    (ntohl(th->th_seq) != (*state)->src.seqlo + 1)) {
 				REASON_SET(reason, PFRES_SYNPROXY);
 				return (PF_DROP);
 			}
 			(*state)->src.max_win = MAX(ntohs(th->th_win), 1);
 			if ((*state)->dst.seqhi == 1)
 				(*state)->dst.seqhi = htonl(arc4random());
 			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
 			    &sk->addr[pd->sidx], &sk->addr[pd->didx],
 			    sk->port[pd->sidx], sk->port[pd->didx],
 			    (*state)->dst.seqhi, 0, TH_SYN, 0,
 			    (*state)->src.mss, 0, 0, (*state)->tag, NULL);
 			REASON_SET(reason, PFRES_SYNPROXY);
 			return (PF_SYNPROXY_DROP);
 		} else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
 		    (TH_SYN|TH_ACK)) ||
 		    (ntohl(th->th_ack) != (*state)->dst.seqhi + 1)) {
 			REASON_SET(reason, PFRES_SYNPROXY);
 			return (PF_DROP);
 		} else {
 			(*state)->dst.max_win = MAX(ntohs(th->th_win), 1);
 			(*state)->dst.seqlo = ntohl(th->th_seq);
 			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af, pd->dst,
 			    pd->src, th->th_dport, th->th_sport,
 			    ntohl(th->th_ack), ntohl(th->th_seq) + 1,
 			    TH_ACK, (*state)->src.max_win, 0, 0, 0,
 			    (*state)->tag, NULL);
 			pf_send_tcp(NULL, (*state)->rule.ptr, pd->af,
 			    &sk->addr[pd->sidx], &sk->addr[pd->didx],
 			    sk->port[pd->sidx], sk->port[pd->didx],
 			    (*state)->src.seqhi + 1, (*state)->src.seqlo + 1,
 			    TH_ACK, (*state)->dst.max_win, 0, 0, 1, 0, NULL);
 			(*state)->src.seqdiff = (*state)->dst.seqhi -
 			    (*state)->src.seqlo;
 			(*state)->dst.seqdiff = (*state)->src.seqhi -
 			    (*state)->dst.seqlo;
 			(*state)->src.seqhi = (*state)->src.seqlo +
 			    (*state)->dst.max_win;
 			(*state)->dst.seqhi = (*state)->dst.seqlo +
 			    (*state)->src.max_win;
 			(*state)->src.wscale = (*state)->dst.wscale = 0;
 			(*state)->src.state = (*state)->dst.state =
 			    TCPS_ESTABLISHED;
 			REASON_SET(reason, PFRES_SYNPROXY);
 			return (PF_SYNPROXY_DROP);
 		}
 	}
 
 	if (((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN) &&
 	    dst->state >= TCPS_FIN_WAIT_2 &&
 	    src->state >= TCPS_FIN_WAIT_2) {
 		if (V_pf_status.debug >= PF_DEBUG_MISC) {
 			printf("pf: state reuse ");
 			pf_print_state(*state);
 			pf_print_flags(th->th_flags);
 			printf("\n");
 		}
 		/* XXX make sure it's the same direction ?? */
 		(*state)->src.state = (*state)->dst.state = TCPS_CLOSED;
 		pf_unlink_state(*state, PF_ENTER_LOCKED);
 		*state = NULL;
 		return (PF_DROP);
 	}
 
 	if ((*state)->state_flags & PFSTATE_SLOPPY) {
 		if (pf_tcp_track_sloppy(src, dst, state, pd, reason) == PF_DROP)
 			return (PF_DROP);
 	} else {
 		if (pf_tcp_track_full(src, dst, state, kif, m, off, pd, reason,
 		    &copyback) == PF_DROP)
 			return (PF_DROP);
 	}
 
 	/* translate source/destination address, if necessary */
 	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
 		struct pf_state_key *nk = (*state)->key[pd->didx];
 
 		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
 		    nk->port[pd->sidx] != th->th_sport)
 			pf_change_ap(m, pd->src, &th->th_sport,
 			    pd->ip_sum, &th->th_sum, &nk->addr[pd->sidx],
 			    nk->port[pd->sidx], 0, pd->af);
 
 		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
 		    nk->port[pd->didx] != th->th_dport)
 			pf_change_ap(m, pd->dst, &th->th_dport,
 			    pd->ip_sum, &th->th_sum, &nk->addr[pd->didx],
 			    nk->port[pd->didx], 0, pd->af);
 		copyback = 1;
 	}
 
 	/* Copyback sequence modulation or stateful scrub changes if needed */
 	if (copyback)
 		m_copyback(m, off, sizeof(*th), (caddr_t)th);
 
 	return (PF_PASS);
 }
 
 static int
 pf_test_state_udp(struct pf_state **state, int direction, struct pfi_kif *kif,
     struct mbuf *m, int off, void *h, struct pf_pdesc *pd)
 {
 	struct pf_state_peer	*src, *dst;
 	struct pf_state_key_cmp	 key;
 	struct udphdr		*uh = pd->hdr.udp;
 
 	bzero(&key, sizeof(key));
 	key.af = pd->af;
 	key.proto = IPPROTO_UDP;
 	if (direction == PF_IN)	{	/* wire side, straight */
 		PF_ACPY(&key.addr[0], pd->src, key.af);
 		PF_ACPY(&key.addr[1], pd->dst, key.af);
 		key.port[0] = uh->uh_sport;
 		key.port[1] = uh->uh_dport;
 	} else {			/* stack side, reverse */
 		PF_ACPY(&key.addr[1], pd->src, key.af);
 		PF_ACPY(&key.addr[0], pd->dst, key.af);
 		key.port[1] = uh->uh_sport;
 		key.port[0] = uh->uh_dport;
 	}
 
 	STATE_LOOKUP(kif, &key, direction, *state, pd);
 
 	if (direction == (*state)->direction) {
 		src = &(*state)->src;
 		dst = &(*state)->dst;
 	} else {
 		src = &(*state)->dst;
 		dst = &(*state)->src;
 	}
 
 	/* update states */
 	if (src->state < PFUDPS_SINGLE)
 		src->state = PFUDPS_SINGLE;
 	if (dst->state == PFUDPS_SINGLE)
 		dst->state = PFUDPS_MULTIPLE;
 
 	/* update expire time */
 	(*state)->expire = time_uptime;
 	if (src->state == PFUDPS_MULTIPLE && dst->state == PFUDPS_MULTIPLE)
 		(*state)->timeout = PFTM_UDP_MULTIPLE;
 	else
 		(*state)->timeout = PFTM_UDP_SINGLE;
 
 	/* translate source/destination address, if necessary */
 	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
 		struct pf_state_key *nk = (*state)->key[pd->didx];
 
 		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
 		    nk->port[pd->sidx] != uh->uh_sport)
 			pf_change_ap(m, pd->src, &uh->uh_sport, pd->ip_sum,
 			    &uh->uh_sum, &nk->addr[pd->sidx],
 			    nk->port[pd->sidx], 1, pd->af);
 
 		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
 		    nk->port[pd->didx] != uh->uh_dport)
 			pf_change_ap(m, pd->dst, &uh->uh_dport, pd->ip_sum,
 			    &uh->uh_sum, &nk->addr[pd->didx],
 			    nk->port[pd->didx], 1, pd->af);
 		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
 	}
 
 	return (PF_PASS);
 }
 
 static int
 pf_test_state_icmp(struct pf_state **state, int direction, struct pfi_kif *kif,
     struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason)
 {
 	struct pf_addr  *saddr = pd->src, *daddr = pd->dst;
 	u_int16_t	 icmpid = 0, *icmpsum;
 	u_int8_t	 icmptype;
 	int		 state_icmp = 0;
 	struct pf_state_key_cmp key;
 
 	bzero(&key, sizeof(key));
 	switch (pd->proto) {
 #ifdef INET
 	case IPPROTO_ICMP:
 		icmptype = pd->hdr.icmp->icmp_type;
 		icmpid = pd->hdr.icmp->icmp_id;
 		icmpsum = &pd->hdr.icmp->icmp_cksum;
 
 		if (icmptype == ICMP_UNREACH ||
 		    icmptype == ICMP_SOURCEQUENCH ||
 		    icmptype == ICMP_REDIRECT ||
 		    icmptype == ICMP_TIMXCEED ||
 		    icmptype == ICMP_PARAMPROB)
 			state_icmp++;
 		break;
 #endif /* INET */
 #ifdef INET6
 	case IPPROTO_ICMPV6:
 		icmptype = pd->hdr.icmp6->icmp6_type;
 		icmpid = pd->hdr.icmp6->icmp6_id;
 		icmpsum = &pd->hdr.icmp6->icmp6_cksum;
 
 		if (icmptype == ICMP6_DST_UNREACH ||
 		    icmptype == ICMP6_PACKET_TOO_BIG ||
 		    icmptype == ICMP6_TIME_EXCEEDED ||
 		    icmptype == ICMP6_PARAM_PROB)
 			state_icmp++;
 		break;
 #endif /* INET6 */
 	}
 
 	if (!state_icmp) {
 
 		/*
 		 * ICMP query/reply message not related to a TCP/UDP packet.
 		 * Search for an ICMP state.
 		 */
 		key.af = pd->af;
 		key.proto = pd->proto;
 		key.port[0] = key.port[1] = icmpid;
 		if (direction == PF_IN)	{	/* wire side, straight */
 			PF_ACPY(&key.addr[0], pd->src, key.af);
 			PF_ACPY(&key.addr[1], pd->dst, key.af);
 		} else {			/* stack side, reverse */
 			PF_ACPY(&key.addr[1], pd->src, key.af);
 			PF_ACPY(&key.addr[0], pd->dst, key.af);
 		}
 
 		STATE_LOOKUP(kif, &key, direction, *state, pd);
 
 		(*state)->expire = time_uptime;
 		(*state)->timeout = PFTM_ICMP_ERROR_REPLY;
 
 		/* translate source/destination address, if necessary */
 		if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
 			struct pf_state_key *nk = (*state)->key[pd->didx];
 
 			switch (pd->af) {
 #ifdef INET
 			case AF_INET:
 				if (PF_ANEQ(pd->src,
 				    &nk->addr[pd->sidx], AF_INET))
 					pf_change_a(&saddr->v4.s_addr,
 					    pd->ip_sum,
 					    nk->addr[pd->sidx].v4.s_addr, 0);
 
 				if (PF_ANEQ(pd->dst, &nk->addr[pd->didx],
 				    AF_INET))
 					pf_change_a(&daddr->v4.s_addr,
 					    pd->ip_sum,
 					    nk->addr[pd->didx].v4.s_addr, 0);
 
 				if (nk->port[0] !=
 				    pd->hdr.icmp->icmp_id) {
 					pd->hdr.icmp->icmp_cksum =
 					    pf_cksum_fixup(
 					    pd->hdr.icmp->icmp_cksum, icmpid,
 					    nk->port[pd->sidx], 0);
 					pd->hdr.icmp->icmp_id =
 					    nk->port[pd->sidx];
 				}
 
 				m_copyback(m, off, ICMP_MINLEN,
 				    (caddr_t )pd->hdr.icmp);
 				break;
 #endif /* INET */
 #ifdef INET6
 			case AF_INET6:
 				if (PF_ANEQ(pd->src,
 				    &nk->addr[pd->sidx], AF_INET6))
 					pf_change_a6(saddr,
 					    &pd->hdr.icmp6->icmp6_cksum,
 					    &nk->addr[pd->sidx], 0);
 
 				if (PF_ANEQ(pd->dst,
 				    &nk->addr[pd->didx], AF_INET6))
 					pf_change_a6(daddr,
 					    &pd->hdr.icmp6->icmp6_cksum,
 					    &nk->addr[pd->didx], 0);
 
 				m_copyback(m, off, sizeof(struct icmp6_hdr),
 				    (caddr_t )pd->hdr.icmp6);
 				break;
 #endif /* INET6 */
 			}
 		}
 		return (PF_PASS);
 
 	} else {
 		/*
 		 * ICMP error message in response to a TCP/UDP packet.
 		 * Extract the inner TCP/UDP header and search for that state.
 		 */
 
 		struct pf_pdesc	pd2;
 		bzero(&pd2, sizeof pd2);
 #ifdef INET
 		struct ip	h2;
 #endif /* INET */
 #ifdef INET6
 		struct ip6_hdr	h2_6;
 		int		terminal = 0;
 #endif /* INET6 */
 		int		ipoff2 = 0;
 		int		off2 = 0;
 
 		pd2.af = pd->af;
 		/* Payload packet is from the opposite direction. */
 		pd2.sidx = (direction == PF_IN) ? 1 : 0;
 		pd2.didx = (direction == PF_IN) ? 0 : 1;
 		switch (pd->af) {
 #ifdef INET
 		case AF_INET:
 			/* offset of h2 in mbuf chain */
 			ipoff2 = off + ICMP_MINLEN;
 
 			if (!pf_pull_hdr(m, ipoff2, &h2, sizeof(h2),
 			    NULL, reason, pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: ICMP error message too short "
 				    "(ip)\n"));
 				return (PF_DROP);
 			}
 			/*
 			 * ICMP error messages don't refer to non-first
 			 * fragments
 			 */
 			if (h2.ip_off & htons(IP_OFFMASK)) {
 				REASON_SET(reason, PFRES_FRAG);
 				return (PF_DROP);
 			}
 
 			/* offset of protocol header that follows h2 */
 			off2 = ipoff2 + (h2.ip_hl << 2);
 
 			pd2.proto = h2.ip_p;
 			pd2.src = (struct pf_addr *)&h2.ip_src;
 			pd2.dst = (struct pf_addr *)&h2.ip_dst;
 			pd2.ip_sum = &h2.ip_sum;
 			break;
 #endif /* INET */
 #ifdef INET6
 		case AF_INET6:
 			ipoff2 = off + sizeof(struct icmp6_hdr);
 
 			if (!pf_pull_hdr(m, ipoff2, &h2_6, sizeof(h2_6),
 			    NULL, reason, pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: ICMP error message too short "
 				    "(ip6)\n"));
 				return (PF_DROP);
 			}
 			pd2.proto = h2_6.ip6_nxt;
 			pd2.src = (struct pf_addr *)&h2_6.ip6_src;
 			pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
 			pd2.ip_sum = NULL;
 			off2 = ipoff2 + sizeof(h2_6);
 			do {
 				switch (pd2.proto) {
 				case IPPROTO_FRAGMENT:
 					/*
 					 * ICMPv6 error messages for
 					 * non-first fragments
 					 */
 					REASON_SET(reason, PFRES_FRAG);
 					return (PF_DROP);
 				case IPPROTO_AH:
 				case IPPROTO_HOPOPTS:
 				case IPPROTO_ROUTING:
 				case IPPROTO_DSTOPTS: {
 					/* get next header and header length */
 					struct ip6_ext opt6;
 
 					if (!pf_pull_hdr(m, off2, &opt6,
 					    sizeof(opt6), NULL, reason,
 					    pd2.af)) {
 						DPFPRINTF(PF_DEBUG_MISC,
 						    ("pf: ICMPv6 short opt\n"));
 						return (PF_DROP);
 					}
 					if (pd2.proto == IPPROTO_AH)
 						off2 += (opt6.ip6e_len + 2) * 4;
 					else
 						off2 += (opt6.ip6e_len + 1) * 8;
 					pd2.proto = opt6.ip6e_nxt;
 					/* goto the next header */
 					break;
 				}
 				default:
 					terminal++;
 					break;
 				}
 			} while (!terminal);
 			break;
 #endif /* INET6 */
 		}
 
 		switch (pd2.proto) {
 		case IPPROTO_TCP: {
 			struct tcphdr		 th;
 			u_int32_t		 seq;
 			struct pf_state_peer	*src, *dst;
 			u_int8_t		 dws;
 			int			 copyback = 0;
 
 			/*
 			 * Only the first 8 bytes of the TCP header can be
 			 * expected. Don't access any TCP header fields after
 			 * th_seq, an ackskew test is not possible.
 			 */
 			if (!pf_pull_hdr(m, off2, &th, 8, NULL, reason,
 			    pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: ICMP error message too short "
 				    "(tcp)\n"));
 				return (PF_DROP);
 			}
 
 			key.af = pd2.af;
 			key.proto = IPPROTO_TCP;
 			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
 			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
 			key.port[pd2.sidx] = th.th_sport;
 			key.port[pd2.didx] = th.th_dport;
 
 			STATE_LOOKUP(kif, &key, direction, *state, pd);
 
 			if (direction == (*state)->direction) {
 				src = &(*state)->dst;
 				dst = &(*state)->src;
 			} else {
 				src = &(*state)->src;
 				dst = &(*state)->dst;
 			}
 
 			if (src->wscale && dst->wscale)
 				dws = dst->wscale & PF_WSCALE_MASK;
 			else
 				dws = 0;
 
 			/* Demodulate sequence number */
 			seq = ntohl(th.th_seq) - src->seqdiff;
 			if (src->seqdiff) {
 				pf_change_a(&th.th_seq, icmpsum,
 				    htonl(seq), 0);
 				copyback = 1;
 			}
 
 			if (!((*state)->state_flags & PFSTATE_SLOPPY) &&
 			    (!SEQ_GEQ(src->seqhi, seq) ||
 			    !SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)))) {
 				if (V_pf_status.debug >= PF_DEBUG_MISC) {
 					printf("pf: BAD ICMP %d:%d ",
 					    icmptype, pd->hdr.icmp->icmp_code);
 					pf_print_host(pd->src, 0, pd->af);
 					printf(" -> ");
 					pf_print_host(pd->dst, 0, pd->af);
 					printf(" state: ");
 					pf_print_state(*state);
 					printf(" seq=%u\n", seq);
 				}
 				REASON_SET(reason, PFRES_BADSTATE);
 				return (PF_DROP);
 			} else {
 				if (V_pf_status.debug >= PF_DEBUG_MISC) {
 					printf("pf: OK ICMP %d:%d ",
 					    icmptype, pd->hdr.icmp->icmp_code);
 					pf_print_host(pd->src, 0, pd->af);
 					printf(" -> ");
 					pf_print_host(pd->dst, 0, pd->af);
 					printf(" state: ");
 					pf_print_state(*state);
 					printf(" seq=%u\n", seq);
 				}
 			}
 
 			/* translate source/destination address, if necessary */
 			if ((*state)->key[PF_SK_WIRE] !=
 			    (*state)->key[PF_SK_STACK]) {
 				struct pf_state_key *nk =
 				    (*state)->key[pd->didx];
 
 				if (PF_ANEQ(pd2.src,
 				    &nk->addr[pd2.sidx], pd2.af) ||
 				    nk->port[pd2.sidx] != th.th_sport)
 					pf_change_icmp(pd2.src, &th.th_sport,
 					    daddr, &nk->addr[pd2.sidx],
 					    nk->port[pd2.sidx], NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, pd2.af);
 
 				if (PF_ANEQ(pd2.dst,
 				    &nk->addr[pd2.didx], pd2.af) ||
 				    nk->port[pd2.didx] != th.th_dport)
 					pf_change_icmp(pd2.dst, &th.th_dport,
 					    NULL, /* XXX Inbound NAT? */
 					    &nk->addr[pd2.didx],
 					    nk->port[pd2.didx], NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, pd2.af);
 				copyback = 1;
 			}
 
 			if (copyback) {
 				switch (pd2.af) {
 #ifdef INET
 				case AF_INET:
 					m_copyback(m, off, ICMP_MINLEN,
 					    (caddr_t )pd->hdr.icmp);
 					m_copyback(m, ipoff2, sizeof(h2),
 					    (caddr_t )&h2);
 					break;
 #endif /* INET */
 #ifdef INET6
 				case AF_INET6:
 					m_copyback(m, off,
 					    sizeof(struct icmp6_hdr),
 					    (caddr_t )pd->hdr.icmp6);
 					m_copyback(m, ipoff2, sizeof(h2_6),
 					    (caddr_t )&h2_6);
 					break;
 #endif /* INET6 */
 				}
 				m_copyback(m, off2, 8, (caddr_t)&th);
 			}
 
 			return (PF_PASS);
 			break;
 		}
 		case IPPROTO_UDP: {
 			struct udphdr		uh;
 
 			if (!pf_pull_hdr(m, off2, &uh, sizeof(uh),
 			    NULL, reason, pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: ICMP error message too short "
 				    "(udp)\n"));
 				return (PF_DROP);
 			}
 
 			key.af = pd2.af;
 			key.proto = IPPROTO_UDP;
 			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
 			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
 			key.port[pd2.sidx] = uh.uh_sport;
 			key.port[pd2.didx] = uh.uh_dport;
 
 			STATE_LOOKUP(kif, &key, direction, *state, pd);
 
 			/* translate source/destination address, if necessary */
 			if ((*state)->key[PF_SK_WIRE] !=
 			    (*state)->key[PF_SK_STACK]) {
 				struct pf_state_key *nk =
 				    (*state)->key[pd->didx];
 
 				if (PF_ANEQ(pd2.src,
 				    &nk->addr[pd2.sidx], pd2.af) ||
 				    nk->port[pd2.sidx] != uh.uh_sport)
 					pf_change_icmp(pd2.src, &uh.uh_sport,
 					    daddr, &nk->addr[pd2.sidx],
 					    nk->port[pd2.sidx], &uh.uh_sum,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 1, pd2.af);
 
 				if (PF_ANEQ(pd2.dst,
 				    &nk->addr[pd2.didx], pd2.af) ||
 				    nk->port[pd2.didx] != uh.uh_dport)
 					pf_change_icmp(pd2.dst, &uh.uh_dport,
 					    NULL, /* XXX Inbound NAT? */
 					    &nk->addr[pd2.didx],
 					    nk->port[pd2.didx], &uh.uh_sum,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 1, pd2.af);
 
 				switch (pd2.af) {
 #ifdef INET
 				case AF_INET:
 					m_copyback(m, off, ICMP_MINLEN,
 					    (caddr_t )pd->hdr.icmp);
 					m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
 					break;
 #endif /* INET */
 #ifdef INET6
 				case AF_INET6:
 					m_copyback(m, off,
 					    sizeof(struct icmp6_hdr),
 					    (caddr_t )pd->hdr.icmp6);
 					m_copyback(m, ipoff2, sizeof(h2_6),
 					    (caddr_t )&h2_6);
 					break;
 #endif /* INET6 */
 				}
 				m_copyback(m, off2, sizeof(uh), (caddr_t)&uh);
 			}
 			return (PF_PASS);
 			break;
 		}
 #ifdef INET
 		case IPPROTO_ICMP: {
 			struct icmp		iih;
 
 			if (!pf_pull_hdr(m, off2, &iih, ICMP_MINLEN,
 			    NULL, reason, pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: ICMP error message too short i"
 				    "(icmp)\n"));
 				return (PF_DROP);
 			}
 
 			key.af = pd2.af;
 			key.proto = IPPROTO_ICMP;
 			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
 			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
 			key.port[0] = key.port[1] = iih.icmp_id;
 
 			STATE_LOOKUP(kif, &key, direction, *state, pd);
 
 			/* translate source/destination address, if necessary */
 			if ((*state)->key[PF_SK_WIRE] !=
 			    (*state)->key[PF_SK_STACK]) {
 				struct pf_state_key *nk =
 				    (*state)->key[pd->didx];
 
 				if (PF_ANEQ(pd2.src,
 				    &nk->addr[pd2.sidx], pd2.af) ||
 				    nk->port[pd2.sidx] != iih.icmp_id)
 					pf_change_icmp(pd2.src, &iih.icmp_id,
 					    daddr, &nk->addr[pd2.sidx],
 					    nk->port[pd2.sidx], NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, AF_INET);
 
 				if (PF_ANEQ(pd2.dst,
 				    &nk->addr[pd2.didx], pd2.af) ||
 				    nk->port[pd2.didx] != iih.icmp_id)
 					pf_change_icmp(pd2.dst, &iih.icmp_id,
 					    NULL, /* XXX Inbound NAT? */
 					    &nk->addr[pd2.didx],
 					    nk->port[pd2.didx], NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, AF_INET);
 
 				m_copyback(m, off, ICMP_MINLEN, (caddr_t)pd->hdr.icmp);
 				m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
 				m_copyback(m, off2, ICMP_MINLEN, (caddr_t)&iih);
 			}
 			return (PF_PASS);
 			break;
 		}
 #endif /* INET */
 #ifdef INET6
 		case IPPROTO_ICMPV6: {
 			struct icmp6_hdr	iih;
 
 			if (!pf_pull_hdr(m, off2, &iih,
 			    sizeof(struct icmp6_hdr), NULL, reason, pd2.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: ICMP error message too short "
 				    "(icmp6)\n"));
 				return (PF_DROP);
 			}
 
 			key.af = pd2.af;
 			key.proto = IPPROTO_ICMPV6;
 			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
 			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
 			key.port[0] = key.port[1] = iih.icmp6_id;
 
 			STATE_LOOKUP(kif, &key, direction, *state, pd);
 
 			/* translate source/destination address, if necessary */
 			if ((*state)->key[PF_SK_WIRE] !=
 			    (*state)->key[PF_SK_STACK]) {
 				struct pf_state_key *nk =
 				    (*state)->key[pd->didx];
 
 				if (PF_ANEQ(pd2.src,
 				    &nk->addr[pd2.sidx], pd2.af) ||
 				    nk->port[pd2.sidx] != iih.icmp6_id)
 					pf_change_icmp(pd2.src, &iih.icmp6_id,
 					    daddr, &nk->addr[pd2.sidx],
 					    nk->port[pd2.sidx], NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, AF_INET6);
 
 				if (PF_ANEQ(pd2.dst,
 				    &nk->addr[pd2.didx], pd2.af) ||
 				    nk->port[pd2.didx] != iih.icmp6_id)
 					pf_change_icmp(pd2.dst, &iih.icmp6_id,
 					    NULL, /* XXX Inbound NAT? */
 					    &nk->addr[pd2.didx],
 					    nk->port[pd2.didx], NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, AF_INET6);
 
 				m_copyback(m, off, sizeof(struct icmp6_hdr),
 				    (caddr_t)pd->hdr.icmp6);
 				m_copyback(m, ipoff2, sizeof(h2_6), (caddr_t)&h2_6);
 				m_copyback(m, off2, sizeof(struct icmp6_hdr),
 				    (caddr_t)&iih);
 			}
 			return (PF_PASS);
 			break;
 		}
 #endif /* INET6 */
 		default: {
 			key.af = pd2.af;
 			key.proto = pd2.proto;
 			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
 			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
 			key.port[0] = key.port[1] = 0;
 
 			STATE_LOOKUP(kif, &key, direction, *state, pd);
 
 			/* translate source/destination address, if necessary */
 			if ((*state)->key[PF_SK_WIRE] !=
 			    (*state)->key[PF_SK_STACK]) {
 				struct pf_state_key *nk =
 				    (*state)->key[pd->didx];
 
 				if (PF_ANEQ(pd2.src,
 				    &nk->addr[pd2.sidx], pd2.af))
 					pf_change_icmp(pd2.src, NULL, daddr,
 					    &nk->addr[pd2.sidx], 0, NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, pd2.af);
 
 				if (PF_ANEQ(pd2.dst,
 				    &nk->addr[pd2.didx], pd2.af))
 					pf_change_icmp(pd2.src, NULL,
 					    NULL, /* XXX Inbound NAT? */
 					    &nk->addr[pd2.didx], 0, NULL,
 					    pd2.ip_sum, icmpsum,
 					    pd->ip_sum, 0, pd2.af);
 
 				switch (pd2.af) {
 #ifdef INET
 				case AF_INET:
 					m_copyback(m, off, ICMP_MINLEN,
 					    (caddr_t)pd->hdr.icmp);
 					m_copyback(m, ipoff2, sizeof(h2), (caddr_t)&h2);
 					break;
 #endif /* INET */
 #ifdef INET6
 				case AF_INET6:
 					m_copyback(m, off,
 					    sizeof(struct icmp6_hdr),
 					    (caddr_t )pd->hdr.icmp6);
 					m_copyback(m, ipoff2, sizeof(h2_6),
 					    (caddr_t )&h2_6);
 					break;
 #endif /* INET6 */
 				}
 			}
 			return (PF_PASS);
 			break;
 		}
 		}
 	}
 }
 
 static int
 pf_test_state_other(struct pf_state **state, int direction, struct pfi_kif *kif,
     struct mbuf *m, struct pf_pdesc *pd)
 {
 	struct pf_state_peer	*src, *dst;
 	struct pf_state_key_cmp	 key;
 
 	bzero(&key, sizeof(key));
 	key.af = pd->af;
 	key.proto = pd->proto;
 	if (direction == PF_IN)	{
 		PF_ACPY(&key.addr[0], pd->src, key.af);
 		PF_ACPY(&key.addr[1], pd->dst, key.af);
 		key.port[0] = key.port[1] = 0;
 	} else {
 		PF_ACPY(&key.addr[1], pd->src, key.af);
 		PF_ACPY(&key.addr[0], pd->dst, key.af);
 		key.port[1] = key.port[0] = 0;
 	}
 
 	STATE_LOOKUP(kif, &key, direction, *state, pd);
 
 	if (direction == (*state)->direction) {
 		src = &(*state)->src;
 		dst = &(*state)->dst;
 	} else {
 		src = &(*state)->dst;
 		dst = &(*state)->src;
 	}
 
 	/* update states */
 	if (src->state < PFOTHERS_SINGLE)
 		src->state = PFOTHERS_SINGLE;
 	if (dst->state == PFOTHERS_SINGLE)
 		dst->state = PFOTHERS_MULTIPLE;
 
 	/* update expire time */
 	(*state)->expire = time_uptime;
 	if (src->state == PFOTHERS_MULTIPLE && dst->state == PFOTHERS_MULTIPLE)
 		(*state)->timeout = PFTM_OTHER_MULTIPLE;
 	else
 		(*state)->timeout = PFTM_OTHER_SINGLE;
 
 	/* translate source/destination address, if necessary */
 	if ((*state)->key[PF_SK_WIRE] != (*state)->key[PF_SK_STACK]) {
 		struct pf_state_key *nk = (*state)->key[pd->didx];
 
 		KASSERT(nk, ("%s: nk is null", __func__));
 		KASSERT(pd, ("%s: pd is null", __func__));
 		KASSERT(pd->src, ("%s: pd->src is null", __func__));
 		KASSERT(pd->dst, ("%s: pd->dst is null", __func__));
 		switch (pd->af) {
 #ifdef INET
 		case AF_INET:
 			if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET))
 				pf_change_a(&pd->src->v4.s_addr,
 				    pd->ip_sum,
 				    nk->addr[pd->sidx].v4.s_addr,
 				    0);
 
 
 			if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET))
 				pf_change_a(&pd->dst->v4.s_addr,
 				    pd->ip_sum,
 				    nk->addr[pd->didx].v4.s_addr,
 				    0);
 
 				break;
 #endif /* INET */
 #ifdef INET6
 		case AF_INET6:
 			if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], AF_INET))
 				PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af);
 
 			if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], AF_INET))
 				PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af);
 #endif /* INET6 */
 		}
 	}
 	return (PF_PASS);
 }
 
 /*
  * ipoff and off are measured from the start of the mbuf chain.
  * h must be at "ipoff" on the mbuf chain.
  */
 void *
 pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
     u_short *actionp, u_short *reasonp, sa_family_t af)
 {
 	switch (af) {
 #ifdef INET
 	case AF_INET: {
 		struct ip	*h = mtod(m, struct ip *);
 		u_int16_t	 fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
 
 		if (fragoff) {
 			if (fragoff >= len)
 				ACTION_SET(actionp, PF_PASS);
 			else {
 				ACTION_SET(actionp, PF_DROP);
 				REASON_SET(reasonp, PFRES_FRAG);
 			}
 			return (NULL);
 		}
 		if (m->m_pkthdr.len < off + len ||
 		    ntohs(h->ip_len) < off + len) {
 			ACTION_SET(actionp, PF_DROP);
 			REASON_SET(reasonp, PFRES_SHORT);
 			return (NULL);
 		}
 		break;
 	}
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6: {
 		struct ip6_hdr	*h = mtod(m, struct ip6_hdr *);
 
 		if (m->m_pkthdr.len < off + len ||
 		    (ntohs(h->ip6_plen) + sizeof(struct ip6_hdr)) <
 		    (unsigned)(off + len)) {
 			ACTION_SET(actionp, PF_DROP);
 			REASON_SET(reasonp, PFRES_SHORT);
 			return (NULL);
 		}
 		break;
 	}
 #endif /* INET6 */
 	}
 	m_copydata(m, off, len, p);
 	return (p);
 }
 
-int
-pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif,
+#ifdef RADIX_MPATH
+static int
+pf_routable_oldmpath(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif,
     int rtableid)
 {
-#ifdef RADIX_MPATH
 	struct radix_node_head	*rnh;
-#endif
 	struct sockaddr_in	*dst;
 	int			 ret = 1;
 	int			 check_mpath;
 #ifdef INET6
 	struct sockaddr_in6	*dst6;
 	struct route_in6	 ro;
 #else
 	struct route		 ro;
 #endif
 	struct radix_node	*rn;
 	struct rtentry		*rt;
 	struct ifnet		*ifp;
 
 	check_mpath = 0;
-#ifdef RADIX_MPATH
 	/* XXX: stick to table 0 for now */
 	rnh = rt_tables_get_rnh(0, af);
 	if (rnh != NULL && rn_mpath_capable(rnh))
 		check_mpath = 1;
-#endif
 	bzero(&ro, sizeof(ro));
 	switch (af) {
 	case AF_INET:
 		dst = satosin(&ro.ro_dst);
 		dst->sin_family = AF_INET;
 		dst->sin_len = sizeof(*dst);
 		dst->sin_addr = addr->v4;
 		break;
 #ifdef INET6
 	case AF_INET6:
 		/*
 		 * Skip check for addresses with embedded interface scope,
 		 * as they would always match anyway.
 		 */
 		if (IN6_IS_SCOPE_EMBED(&addr->v6))
 			goto out;
 		dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
 		dst6->sin6_family = AF_INET6;
 		dst6->sin6_len = sizeof(*dst6);
 		dst6->sin6_addr = addr->v6;
 		break;
 #endif /* INET6 */
 	default:
 		return (0);
 	}
 
 	/* Skip checks for ipsec interfaces */
 	if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
 		goto out;
 
 	switch (af) {
 #ifdef INET6
 	case AF_INET6:
 		in6_rtalloc_ign(&ro, 0, rtableid);
 		break;
 #endif
 #ifdef INET
 	case AF_INET:
 		in_rtalloc_ign((struct route *)&ro, 0, rtableid);
 		break;
 #endif
 	}
 
 	if (ro.ro_rt != NULL) {
 		/* No interface given, this is a no-route check */
 		if (kif == NULL)
 			goto out;
 
 		if (kif->pfik_ifp == NULL) {
 			ret = 0;
 			goto out;
 		}
 
 		/* Perform uRPF check if passed input interface */
 		ret = 0;
 		rn = (struct radix_node *)ro.ro_rt;
 		do {
 			rt = (struct rtentry *)rn;
 			ifp = rt->rt_ifp;
 
 			if (kif->pfik_ifp == ifp)
 				ret = 1;
-#ifdef RADIX_MPATH
 			rn = rn_mpath_next(rn);
-#endif
 		} while (check_mpath == 1 && rn != NULL && ret == 0);
 	} else
 		ret = 0;
 out:
 	if (ro.ro_rt != NULL)
 		RTFREE(ro.ro_rt);
 	return (ret);
 }
+#endif
 
+int
+pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif,
+    int rtableid)
+{
 #ifdef INET
+	struct nhop4_basic	nh4;
+#endif
+#ifdef INET6
+	struct nhop6_basic	nh6;
+#endif
+	struct ifnet		*ifp;
+#ifdef RADIX_MPATH
+	struct radix_node_head	*rnh;
+
+	/* XXX: stick to table 0 for now */
+	rnh = rt_tables_get_rnh(0, af);
+	if (rnh != NULL && rn_mpath_capable(rnh))
+		return (pf_routable_oldmpath(addr, af, kif, rtableid));
+#endif
+	/*
+	 * Skip check for addresses with embedded interface scope,
+	 * as they would always match anyway.
+	 */
+	if (af == AF_INET6 && IN6_IS_SCOPE_EMBED(&addr->v6))
+		return (1);
+
+	if (af != AF_INET && af != AF_INET6)
+		return (0);
+
+	/* Skip checks for ipsec interfaces */
+	if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
+		return (1);
+
+	ifp = NULL;
+
+	switch (af) {
+#ifdef INET6
+	case AF_INET6:
+		if (fib6_lookup_nh_basic(rtableid, &addr->v6, 0, 0, 0, &nh6)!=0)
+			return (0);
+		ifp = nh6.nh_ifp;
+		break;
+#endif
+#ifdef INET
+	case AF_INET:
+		if (fib4_lookup_nh_basic(rtableid, addr->v4, 0, 0, &nh4) != 0)
+			return (0);
+		ifp = nh4.nh_ifp;
+		break;
+#endif
+	}
+
+	/* No interface given, this is a no-route check */
+	if (kif == NULL)
+		return (1);
+
+	if (kif->pfik_ifp == NULL)
+		return (0);
+
+	/* Perform uRPF check if passed input interface */
+	if (kif->pfik_ifp == ifp)
+		return (1);
+	return (0);
+}
+
+#ifdef INET
 static void
 pf_route(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
     struct pf_state *s, struct pf_pdesc *pd)
 {
 	struct mbuf		*m0, *m1;
 	struct sockaddr_in	dst;
 	struct ip		*ip;
 	struct ifnet		*ifp = NULL;
 	struct pf_addr		 naddr;
 	struct pf_src_node	*sn = NULL;
 	int			 error = 0;
 	uint16_t		 ip_len, ip_off;
 
 	KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__));
 	KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction",
 	    __func__));
 
 	if ((pd->pf_mtag == NULL &&
 	    ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) ||
 	    pd->pf_mtag->routed++ > 3) {
 		m0 = *m;
 		*m = NULL;
 		goto bad_locked;
 	}
 
 	if (r->rt == PF_DUPTO) {
 		if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) {
 			if (s)
 				PF_STATE_UNLOCK(s);
 			return;
 		}
 	} else {
 		if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
 			if (s)
 				PF_STATE_UNLOCK(s);
 			return;
 		}
 		m0 = *m;
 	}
 
 	ip = mtod(m0, struct ip *);
 
 	bzero(&dst, sizeof(dst));
 	dst.sin_family = AF_INET;
 	dst.sin_len = sizeof(dst);
 	dst.sin_addr = ip->ip_dst;
 
 	if (r->rt == PF_FASTROUTE) {
-		struct rtentry *rt;
+		struct nhop4_basic nh4;
 
 		if (s)
 			PF_STATE_UNLOCK(s);
-		rt = rtalloc1_fib(sintosa(&dst), 0, 0, M_GETFIB(m0));
-		if (rt == NULL) {
+
+		if (fib4_lookup_nh_basic(M_GETFIB(m0), ip->ip_dst, 0,
+		    m0->m_pkthdr.flowid, &nh4) != 0) {
 			KMOD_IPSTAT_INC(ips_noroute);
 			error = EHOSTUNREACH;
 			goto bad;
 		}
 
-		ifp = rt->rt_ifp;
-		counter_u64_add(rt->rt_pksent, 1);
-
-		if (rt->rt_flags & RTF_GATEWAY)
-			bcopy(satosin(rt->rt_gateway), &dst, sizeof(dst));
-		RTFREE_LOCKED(rt);
+		ifp = nh4.nh_ifp;
+		dst.sin_addr = nh4.nh_addr;
 	} else {
 		if (TAILQ_EMPTY(&r->rpool.list)) {
 			DPFPRINTF(PF_DEBUG_URGENT,
 			    ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__));
 			goto bad_locked;
 		}
 		if (s == NULL) {
 			pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src,
 			    &naddr, NULL, &sn);
 			if (!PF_AZERO(&naddr, AF_INET))
 				dst.sin_addr.s_addr = naddr.v4.s_addr;
 			ifp = r->rpool.cur->kif ?
 			    r->rpool.cur->kif->pfik_ifp : NULL;
 		} else {
 			if (!PF_AZERO(&s->rt_addr, AF_INET))
 				dst.sin_addr.s_addr =
 				    s->rt_addr.v4.s_addr;
 			ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
 			PF_STATE_UNLOCK(s);
 		}
 	}
 	if (ifp == NULL)
 		goto bad;
 
 	if (oifp != ifp) {
 		if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS)
 			goto bad;
 		else if (m0 == NULL)
 			goto done;
 		if (m0->m_len < sizeof(struct ip)) {
 			DPFPRINTF(PF_DEBUG_URGENT,
 			    ("%s: m0->m_len < sizeof(struct ip)\n", __func__));
 			goto bad;
 		}
 		ip = mtod(m0, struct ip *);
 	}
 
 	if (ifp->if_flags & IFF_LOOPBACK)
 		m0->m_flags |= M_SKIP_FIREWALL;
 
 	ip_len = ntohs(ip->ip_len);
 	ip_off = ntohs(ip->ip_off);
 
 	/* Copied from FreeBSD 10.0-CURRENT ip_output. */
 	m0->m_pkthdr.csum_flags |= CSUM_IP;
 	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~ifp->if_hwassist) {
 		in_delayed_cksum(m0);
 		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
 	}
 #ifdef SCTP
 	if (m0->m_pkthdr.csum_flags & CSUM_SCTP & ~ifp->if_hwassist) {
 		sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
 		m0->m_pkthdr.csum_flags &= ~CSUM_SCTP;
 	}
 #endif
 
 	/*
 	 * If small enough for interface, or the interface will take
 	 * care of the fragmentation for us, we can just send directly.
 	 */
 	if (ip_len <= ifp->if_mtu ||
 	    (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0) {
 		ip->ip_sum = 0;
 		if (m0->m_pkthdr.csum_flags & CSUM_IP & ~ifp->if_hwassist) {
 			ip->ip_sum = in_cksum(m0, ip->ip_hl << 2);
 			m0->m_pkthdr.csum_flags &= ~CSUM_IP;
 		}
 		m_clrprotoflags(m0);	/* Avoid confusing lower layers. */
 		error = (*ifp->if_output)(ifp, m0, sintosa(&dst), NULL);
 		goto done;
 	}
 
 	/* Balk when DF bit is set or the interface didn't support TSO. */
 	if ((ip_off & IP_DF) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) {
 		error = EMSGSIZE;
 		KMOD_IPSTAT_INC(ips_cantfrag);
 		if (r->rt != PF_DUPTO) {
 			icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0,
 			    ifp->if_mtu);
 			goto done;
 		} else
 			goto bad;
 	}
 
 	error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist);
 	if (error)
 		goto bad;
 
 	for (; m0; m0 = m1) {
 		m1 = m0->m_nextpkt;
 		m0->m_nextpkt = NULL;
 		if (error == 0) {
 			m_clrprotoflags(m0);
 			error = (*ifp->if_output)(ifp, m0, sintosa(&dst), NULL);
 		} else
 			m_freem(m0);
 	}
 
 	if (error == 0)
 		KMOD_IPSTAT_INC(ips_fragmented);
 
 done:
 	if (r->rt != PF_DUPTO)
 		*m = NULL;
 	return;
 
 bad_locked:
 	if (s)
 		PF_STATE_UNLOCK(s);
 bad:
 	m_freem(m0);
 	goto done;
 }
 #endif /* INET */
 
 #ifdef INET6
 static void
 pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
     struct pf_state *s, struct pf_pdesc *pd)
 {
 	struct mbuf		*m0;
 	struct sockaddr_in6	dst;
 	struct ip6_hdr		*ip6;
 	struct ifnet		*ifp = NULL;
 	struct pf_addr		 naddr;
 	struct pf_src_node	*sn = NULL;
 
 	KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__));
 	KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction",
 	    __func__));
 
 	if ((pd->pf_mtag == NULL &&
 	    ((pd->pf_mtag = pf_get_mtag(*m)) == NULL)) ||
 	    pd->pf_mtag->routed++ > 3) {
 		m0 = *m;
 		*m = NULL;
 		goto bad_locked;
 	}
 
 	if (r->rt == PF_DUPTO) {
 		if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) {
 			if (s)
 				PF_STATE_UNLOCK(s);
 			return;
 		}
 	} else {
 		if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
 			if (s)
 				PF_STATE_UNLOCK(s);
 			return;
 		}
 		m0 = *m;
 	}
 
 	ip6 = mtod(m0, struct ip6_hdr *);
 
 	bzero(&dst, sizeof(dst));
 	dst.sin6_family = AF_INET6;
 	dst.sin6_len = sizeof(dst);
 	dst.sin6_addr = ip6->ip6_dst;
 
 	/* Cheat. XXX why only in the v6 case??? */
 	if (r->rt == PF_FASTROUTE) {
 		if (s)
 			PF_STATE_UNLOCK(s);
 		m0->m_flags |= M_SKIP_FIREWALL;
 		ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
 		*m = NULL;
 		return;
 	}
 
 	if (TAILQ_EMPTY(&r->rpool.list)) {
 		DPFPRINTF(PF_DEBUG_URGENT,
 		    ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__));
 		goto bad_locked;
 	}
 	if (s == NULL) {
 		pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src,
 		    &naddr, NULL, &sn);
 		if (!PF_AZERO(&naddr, AF_INET6))
 			PF_ACPY((struct pf_addr *)&dst.sin6_addr,
 			    &naddr, AF_INET6);
 		ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL;
 	} else {
 		if (!PF_AZERO(&s->rt_addr, AF_INET6))
 			PF_ACPY((struct pf_addr *)&dst.sin6_addr,
 			    &s->rt_addr, AF_INET6);
 		ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
 	}
 
 	if (s)
 		PF_STATE_UNLOCK(s);
 
 	if (ifp == NULL)
 		goto bad;
 
 	if (oifp != ifp) {
 		if (pf_test6(PF_FWD, ifp, &m0, NULL) != PF_PASS)
 			goto bad;
 		else if (m0 == NULL)
 			goto done;
 		if (m0->m_len < sizeof(struct ip6_hdr)) {
 			DPFPRINTF(PF_DEBUG_URGENT,
 			    ("%s: m0->m_len < sizeof(struct ip6_hdr)\n",
 			    __func__));
 			goto bad;
 		}
 		ip6 = mtod(m0, struct ip6_hdr *);
 	}
 
 	if (ifp->if_flags & IFF_LOOPBACK)
 		m0->m_flags |= M_SKIP_FIREWALL;
 
 	if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 &
 	    ~ifp->if_hwassist) {
 		uint32_t plen = m0->m_pkthdr.len - sizeof(*ip6);
 		in6_delayed_cksum(m0, plen, sizeof(struct ip6_hdr));
 		m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
 	}
 
 	/*
 	 * If the packet is too large for the outgoing interface,
 	 * send back an icmp6 error.
 	 */
 	if (IN6_IS_SCOPE_EMBED(&dst.sin6_addr))
 		dst.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
 	if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu)
 		nd6_output_ifp(ifp, ifp, m0, &dst, NULL);
 	else {
 		in6_ifstat_inc(ifp, ifs6_in_toobig);
 		if (r->rt != PF_DUPTO)
 			icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu);
 		else
 			goto bad;
 	}
 
 done:
 	if (r->rt != PF_DUPTO)
 		*m = NULL;
 	return;
 
 bad_locked:
 	if (s)
 		PF_STATE_UNLOCK(s);
 bad:
 	m_freem(m0);
 	goto done;
 }
 #endif /* INET6 */
 
 /*
  * FreeBSD supports cksum offloads for the following drivers.
  *  em(4), fxp(4), ixgb(4), lge(4), ndis(4), nge(4), re(4),
  *   ti(4), txp(4), xl(4)
  *
  * CSUM_DATA_VALID | CSUM_PSEUDO_HDR :
  *  network driver performed cksum including pseudo header, need to verify
  *   csum_data
  * CSUM_DATA_VALID :
  *  network driver performed cksum, needs to additional pseudo header
  *  cksum computation with partial csum_data(i.e. lack of H/W support for
  *  pseudo header, for instance hme(4), sk(4) and possibly gem(4))
  *
  * After validating the cksum of packet, set both flag CSUM_DATA_VALID and
  * CSUM_PSEUDO_HDR in order to avoid recomputation of the cksum in upper
  * TCP/UDP layer.
  * Also, set csum_data to 0xffff to force cksum validation.
  */
 static int
 pf_check_proto_cksum(struct mbuf *m, int off, int len, u_int8_t p, sa_family_t af)
 {
 	u_int16_t sum = 0;
 	int hw_assist = 0;
 	struct ip *ip;
 
 	if (off < sizeof(struct ip) || len < sizeof(struct udphdr))
 		return (1);
 	if (m->m_pkthdr.len < off + len)
 		return (1);
 
 	switch (p) {
 	case IPPROTO_TCP:
 		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
 				sum = m->m_pkthdr.csum_data;
 			} else {
 				ip = mtod(m, struct ip *);
 				sum = in_pseudo(ip->ip_src.s_addr,
 				ip->ip_dst.s_addr, htonl((u_short)len +
 				m->m_pkthdr.csum_data + IPPROTO_TCP));
 			}
 			sum ^= 0xffff;
 			++hw_assist;
 		}
 		break;
 	case IPPROTO_UDP:
 		if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
 			if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) {
 				sum = m->m_pkthdr.csum_data;
 			} else {
 				ip = mtod(m, struct ip *);
 				sum = in_pseudo(ip->ip_src.s_addr,
 				ip->ip_dst.s_addr, htonl((u_short)len +
 				m->m_pkthdr.csum_data + IPPROTO_UDP));
 			}
 			sum ^= 0xffff;
 			++hw_assist;
 		}
 		break;
 	case IPPROTO_ICMP:
 #ifdef INET6
 	case IPPROTO_ICMPV6:
 #endif /* INET6 */
 		break;
 	default:
 		return (1);
 	}
 
 	if (!hw_assist) {
 		switch (af) {
 		case AF_INET:
 			if (p == IPPROTO_ICMP) {
 				if (m->m_len < off)
 					return (1);
 				m->m_data += off;
 				m->m_len -= off;
 				sum = in_cksum(m, len);
 				m->m_data -= off;
 				m->m_len += off;
 			} else {
 				if (m->m_len < sizeof(struct ip))
 					return (1);
 				sum = in4_cksum(m, p, off, len);
 			}
 			break;
 #ifdef INET6
 		case AF_INET6:
 			if (m->m_len < sizeof(struct ip6_hdr))
 				return (1);
 			sum = in6_cksum(m, p, off, len);
 			break;
 #endif /* INET6 */
 		default:
 			return (1);
 		}
 	}
 	if (sum) {
 		switch (p) {
 		case IPPROTO_TCP:
 		    {
 			KMOD_TCPSTAT_INC(tcps_rcvbadsum);
 			break;
 		    }
 		case IPPROTO_UDP:
 		    {
 			KMOD_UDPSTAT_INC(udps_badsum);
 			break;
 		    }
 #ifdef INET
 		case IPPROTO_ICMP:
 		    {
 			KMOD_ICMPSTAT_INC(icps_checksum);
 			break;
 		    }
 #endif
 #ifdef INET6
 		case IPPROTO_ICMPV6:
 		    {
 			KMOD_ICMP6STAT_INC(icp6s_checksum);
 			break;
 		    }
 #endif /* INET6 */
 		}
 		return (1);
 	} else {
 		if (p == IPPROTO_TCP || p == IPPROTO_UDP) {
 			m->m_pkthdr.csum_flags |=
 			    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 			m->m_pkthdr.csum_data = 0xffff;
 		}
 	}
 	return (0);
 }
 
 
 #ifdef INET
 int
 pf_test(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp)
 {
 	struct pfi_kif		*kif;
 	u_short			 action, reason = 0, log = 0;
 	struct mbuf		*m = *m0;
 	struct ip		*h = NULL;
 	struct m_tag		*ipfwtag;
 	struct pf_rule		*a = NULL, *r = &V_pf_default_rule, *tr, *nr;
 	struct pf_state		*s = NULL;
 	struct pf_ruleset	*ruleset = NULL;
 	struct pf_pdesc		 pd;
 	int			 off, dirndx, pqid = 0;
 
 	M_ASSERTPKTHDR(m);
 
 	if (!V_pf_status.running)
 		return (PF_PASS);
 
 	memset(&pd, 0, sizeof(pd));
 
 	kif = (struct pfi_kif *)ifp->if_pf_kif;
 
 	if (kif == NULL) {
 		DPFPRINTF(PF_DEBUG_URGENT,
 		    ("pf_test: kif == NULL, if_xname %s\n", ifp->if_xname));
 		return (PF_DROP);
 	}
 	if (kif->pfik_flags & PFI_IFLAG_SKIP)
 		return (PF_PASS);
 
 	if (m->m_flags & M_SKIP_FIREWALL)
 		return (PF_PASS);
 
 	pd.pf_mtag = pf_find_mtag(m);
 
 	PF_RULES_RLOCK();
 
 	if (ip_divert_ptr != NULL &&
 	    ((ipfwtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL)) != NULL)) {
 		struct ipfw_rule_ref *rr = (struct ipfw_rule_ref *)(ipfwtag+1);
 		if (rr->info & IPFW_IS_DIVERT && rr->rulenum == 0) {
 			if (pd.pf_mtag == NULL &&
 			    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
 				action = PF_DROP;
 				goto done;
 			}
 			pd.pf_mtag->flags |= PF_PACKET_LOOPED;
 			m_tag_delete(m, ipfwtag);
 		}
 		if (pd.pf_mtag && pd.pf_mtag->flags & PF_FASTFWD_OURS_PRESENT) {
 			m->m_flags |= M_FASTFWD_OURS;
 			pd.pf_mtag->flags &= ~PF_FASTFWD_OURS_PRESENT;
 		}
 	} else if (pf_normalize_ip(m0, dir, kif, &reason, &pd) != PF_PASS) {
 		/* We do IP header normalization and packet reassembly here */
 		action = PF_DROP;
 		goto done;
 	}
 	m = *m0;	/* pf_normalize messes with m0 */
 	h = mtod(m, struct ip *);
 
 	off = h->ip_hl << 2;
 	if (off < (int)sizeof(struct ip)) {
 		action = PF_DROP;
 		REASON_SET(&reason, PFRES_SHORT);
 		log = 1;
 		goto done;
 	}
 
 	pd.src = (struct pf_addr *)&h->ip_src;
 	pd.dst = (struct pf_addr *)&h->ip_dst;
 	pd.sport = pd.dport = NULL;
 	pd.ip_sum = &h->ip_sum;
 	pd.proto_sum = NULL;
 	pd.proto = h->ip_p;
 	pd.dir = dir;
 	pd.sidx = (dir == PF_IN) ? 0 : 1;
 	pd.didx = (dir == PF_IN) ? 1 : 0;
 	pd.af = AF_INET;
 	pd.tos = h->ip_tos;
 	pd.tot_len = ntohs(h->ip_len);
 
 	/* handle fragments that didn't get reassembled by normalization */
 	if (h->ip_off & htons(IP_MF | IP_OFFMASK)) {
 		action = pf_test_fragment(&r, dir, kif, m, h,
 		    &pd, &a, &ruleset);
 		goto done;
 	}
 
 	switch (h->ip_p) {
 
 	case IPPROTO_TCP: {
 		struct tcphdr	th;
 
 		pd.hdr.tcp = &th;
 		if (!pf_pull_hdr(m, off, &th, sizeof(th),
 		    &action, &reason, AF_INET)) {
 			log = action != PF_PASS;
 			goto done;
 		}
 		pd.p_len = pd.tot_len - off - (th.th_off << 2);
 		if ((th.th_flags & TH_ACK) && pd.p_len == 0)
 			pqid = 1;
 		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
 		if (action == PF_DROP)
 			goto done;
 		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
 		    &reason);
 		if (action == PF_PASS) {
 			if (pfsync_update_state_ptr != NULL)
 				pfsync_update_state_ptr(s);
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
 			    &a, &ruleset, inp);
 		break;
 	}
 
 	case IPPROTO_UDP: {
 		struct udphdr	uh;
 
 		pd.hdr.udp = &uh;
 		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
 		    &action, &reason, AF_INET)) {
 			log = action != PF_PASS;
 			goto done;
 		}
 		if (uh.uh_dport == 0 ||
 		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
 		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
 			action = PF_DROP;
 			REASON_SET(&reason, PFRES_SHORT);
 			goto done;
 		}
 		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
 		if (action == PF_PASS) {
 			if (pfsync_update_state_ptr != NULL)
 				pfsync_update_state_ptr(s);
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
 			    &a, &ruleset, inp);
 		break;
 	}
 
 	case IPPROTO_ICMP: {
 		struct icmp	ih;
 
 		pd.hdr.icmp = &ih;
 		if (!pf_pull_hdr(m, off, &ih, ICMP_MINLEN,
 		    &action, &reason, AF_INET)) {
 			log = action != PF_PASS;
 			goto done;
 		}
 		action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd,
 		    &reason);
 		if (action == PF_PASS) {
 			if (pfsync_update_state_ptr != NULL)
 				pfsync_update_state_ptr(s);
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
 			    &a, &ruleset, inp);
 		break;
 	}
 
 #ifdef INET6
 	case IPPROTO_ICMPV6: {
 		action = PF_DROP;
 		DPFPRINTF(PF_DEBUG_MISC,
 		    ("pf: dropping IPv4 packet with ICMPv6 payload\n"));
 		goto done;
 	}
 #endif
 
 	default:
 		action = pf_test_state_other(&s, dir, kif, m, &pd);
 		if (action == PF_PASS) {
 			if (pfsync_update_state_ptr != NULL)
 				pfsync_update_state_ptr(s);
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
 			    &a, &ruleset, inp);
 		break;
 	}
 
 done:
 	PF_RULES_RUNLOCK();
 	if (action == PF_PASS && h->ip_hl > 5 &&
 	    !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
 		action = PF_DROP;
 		REASON_SET(&reason, PFRES_IPOPTIONS);
 		log = r->log;
 		DPFPRINTF(PF_DEBUG_MISC,
 		    ("pf: dropping packet with ip options\n"));
 	}
 
 	if (s && s->tag > 0 && pf_tag_packet(m, &pd, s->tag)) {
 		action = PF_DROP;
 		REASON_SET(&reason, PFRES_MEMORY);
 	}
 	if (r->rtableid >= 0)
 		M_SETFIB(m, r->rtableid);
 
 #ifdef ALTQ
 	if (action == PF_PASS && r->qid) {
 		if (pd.pf_mtag == NULL &&
 		    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
 			action = PF_DROP;
 			REASON_SET(&reason, PFRES_MEMORY);
 		} else {
 			if (s != NULL)
 				pd.pf_mtag->qid_hash = pf_state_hash(s);
 			if (pqid || (pd.tos & IPTOS_LOWDELAY))
 				pd.pf_mtag->qid = r->pqid;
 			else
 				pd.pf_mtag->qid = r->qid;
 			/* Add hints for ecn. */
 			pd.pf_mtag->hdr = h;
 		}
 
 	}
 #endif /* ALTQ */
 
 	/*
 	 * connections redirected to loopback should not match sockets
 	 * bound specifically to loopback due to security implications,
 	 * see tcp_input() and in_pcblookup_listen().
 	 */
 	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
 	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
 	    (s->nat_rule.ptr->action == PF_RDR ||
 	    s->nat_rule.ptr->action == PF_BINAT) &&
 	    (ntohl(pd.dst->v4.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
 		m->m_flags |= M_SKIP_FIREWALL;
 
 	if (action == PF_PASS && r->divert.port && ip_divert_ptr != NULL &&
 	    !PACKET_LOOPED(&pd)) {
 
 		ipfwtag = m_tag_alloc(MTAG_IPFW_RULE, 0,
 		    sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO);
 		if (ipfwtag != NULL) {
 			((struct ipfw_rule_ref *)(ipfwtag+1))->info =
 			    ntohs(r->divert.port);
 			((struct ipfw_rule_ref *)(ipfwtag+1))->rulenum = dir;
 
 			if (s)
 				PF_STATE_UNLOCK(s);
 
 			m_tag_prepend(m, ipfwtag);
 			if (m->m_flags & M_FASTFWD_OURS) {
 				if (pd.pf_mtag == NULL &&
 				    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
 					action = PF_DROP;
 					REASON_SET(&reason, PFRES_MEMORY);
 					log = 1;
 					DPFPRINTF(PF_DEBUG_MISC,
 					    ("pf: failed to allocate tag\n"));
 				} else {
 					pd.pf_mtag->flags |=
 					    PF_FASTFWD_OURS_PRESENT;
 					m->m_flags &= ~M_FASTFWD_OURS;
 				}
 			}
 			ip_divert_ptr(*m0, dir ==  PF_IN ? DIR_IN : DIR_OUT);
 			*m0 = NULL;
 
 			return (action);
 		} else {
 			/* XXX: ipfw has the same behaviour! */
 			action = PF_DROP;
 			REASON_SET(&reason, PFRES_MEMORY);
 			log = 1;
 			DPFPRINTF(PF_DEBUG_MISC,
 			    ("pf: failed to allocate divert tag\n"));
 		}
 	}
 
 	if (log) {
 		struct pf_rule *lr;
 
 		if (s != NULL && s->nat_rule.ptr != NULL &&
 		    s->nat_rule.ptr->log & PF_LOG_ALL)
 			lr = s->nat_rule.ptr;
 		else
 			lr = r;
 		PFLOG_PACKET(kif, m, AF_INET, dir, reason, lr, a, ruleset, &pd,
 		    (s == NULL));
 	}
 
 	kif->pfik_bytes[0][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
 	kif->pfik_packets[0][dir == PF_OUT][action != PF_PASS]++;
 
 	if (action == PF_PASS || r->action == PF_DROP) {
 		dirndx = (dir == PF_OUT);
 		r->packets[dirndx]++;
 		r->bytes[dirndx] += pd.tot_len;
 		if (a != NULL) {
 			a->packets[dirndx]++;
 			a->bytes[dirndx] += pd.tot_len;
 		}
 		if (s != NULL) {
 			if (s->nat_rule.ptr != NULL) {
 				s->nat_rule.ptr->packets[dirndx]++;
 				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
 			}
 			if (s->src_node != NULL) {
 				s->src_node->packets[dirndx]++;
 				s->src_node->bytes[dirndx] += pd.tot_len;
 			}
 			if (s->nat_src_node != NULL) {
 				s->nat_src_node->packets[dirndx]++;
 				s->nat_src_node->bytes[dirndx] += pd.tot_len;
 			}
 			dirndx = (dir == s->direction) ? 0 : 1;
 			s->packets[dirndx]++;
 			s->bytes[dirndx] += pd.tot_len;
 		}
 		tr = r;
 		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
 		if (nr != NULL && r == &V_pf_default_rule)
 			tr = nr;
 		if (tr->src.addr.type == PF_ADDR_TABLE)
 			pfr_update_stats(tr->src.addr.p.tbl,
 			    (s == NULL) ? pd.src :
 			    &s->key[(s->direction == PF_IN)]->
 				addr[(s->direction == PF_OUT)],
 			    pd.af, pd.tot_len, dir == PF_OUT,
 			    r->action == PF_PASS, tr->src.neg);
 		if (tr->dst.addr.type == PF_ADDR_TABLE)
 			pfr_update_stats(tr->dst.addr.p.tbl,
 			    (s == NULL) ? pd.dst :
 			    &s->key[(s->direction == PF_IN)]->
 				addr[(s->direction == PF_IN)],
 			    pd.af, pd.tot_len, dir == PF_OUT,
 			    r->action == PF_PASS, tr->dst.neg);
 	}
 
 	switch (action) {
 	case PF_SYNPROXY_DROP:
 		m_freem(*m0);
 	case PF_DEFER:
 		*m0 = NULL;
 		action = PF_PASS;
 		break;
 	case PF_DROP:
 		m_freem(*m0);
 		*m0 = NULL;
 		break;
 	default:
 		/* pf_route() returns unlocked. */
 		if (r->rt) {
 			pf_route(m0, r, dir, kif->pfik_ifp, s, &pd);
 			return (action);
 		}
 		break;
 	}
 	if (s)
 		PF_STATE_UNLOCK(s);
 
 	return (action);
 }
 #endif /* INET */
 
 #ifdef INET6
 int
 pf_test6(int dir, struct ifnet *ifp, struct mbuf **m0, struct inpcb *inp)
 {
 	struct pfi_kif		*kif;
 	u_short			 action, reason = 0, log = 0;
 	struct mbuf		*m = *m0, *n = NULL;
 	struct m_tag		*mtag;
 	struct ip6_hdr		*h = NULL;
 	struct pf_rule		*a = NULL, *r = &V_pf_default_rule, *tr, *nr;
 	struct pf_state		*s = NULL;
 	struct pf_ruleset	*ruleset = NULL;
 	struct pf_pdesc		 pd;
 	int			 off, terminal = 0, dirndx, rh_cnt = 0;
 	int			 fwdir = dir;
 
 	M_ASSERTPKTHDR(m);
 
 	/* Detect packet forwarding.
 	 * If the input interface is different from the output interface we're
 	 * forwarding.
 	 * We do need to be careful about bridges. If the
 	 * net.link.bridge.pfil_bridge sysctl is set we can be filtering on a
 	 * bridge, so if the input interface is a bridge member and the output
 	 * interface is its bridge we're not actually forwarding but bridging.
 	 */
 	if (dir == PF_OUT && m->m_pkthdr.rcvif && ifp != m->m_pkthdr.rcvif
 	    && (m->m_pkthdr.rcvif->if_bridge == NULL
 	        || m->m_pkthdr.rcvif->if_bridge != ifp->if_softc))
 		fwdir = PF_FWD;
 
 	if (!V_pf_status.running)
 		return (PF_PASS);
 
 	memset(&pd, 0, sizeof(pd));
 	pd.pf_mtag = pf_find_mtag(m);
 
 	if (pd.pf_mtag && pd.pf_mtag->flags & PF_TAG_GENERATED)
 		return (PF_PASS);
 
 	kif = (struct pfi_kif *)ifp->if_pf_kif;
 	if (kif == NULL) {
 		DPFPRINTF(PF_DEBUG_URGENT,
 		    ("pf_test6: kif == NULL, if_xname %s\n", ifp->if_xname));
 		return (PF_DROP);
 	}
 	if (kif->pfik_flags & PFI_IFLAG_SKIP)
 		return (PF_PASS);
 
 	if (m->m_flags & M_SKIP_FIREWALL)
 		return (PF_PASS);
 
 	PF_RULES_RLOCK();
 
 	/* We do IP header normalization and packet reassembly here */
 	if (pf_normalize_ip6(m0, dir, kif, &reason, &pd) != PF_PASS) {
 		action = PF_DROP;
 		goto done;
 	}
 	m = *m0;	/* pf_normalize messes with m0 */
 	h = mtod(m, struct ip6_hdr *);
 
 #if 1
 	/*
 	 * we do not support jumbogram yet.  if we keep going, zero ip6_plen
 	 * will do something bad, so drop the packet for now.
 	 */
 	if (htons(h->ip6_plen) == 0) {
 		action = PF_DROP;
 		REASON_SET(&reason, PFRES_NORM);	/*XXX*/
 		goto done;
 	}
 #endif
 
 	pd.src = (struct pf_addr *)&h->ip6_src;
 	pd.dst = (struct pf_addr *)&h->ip6_dst;
 	pd.sport = pd.dport = NULL;
 	pd.ip_sum = NULL;
 	pd.proto_sum = NULL;
 	pd.dir = dir;
 	pd.sidx = (dir == PF_IN) ? 0 : 1;
 	pd.didx = (dir == PF_IN) ? 1 : 0;
 	pd.af = AF_INET6;
 	pd.tos = 0;
 	pd.tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
 
 	off = ((caddr_t)h - m->m_data) + sizeof(struct ip6_hdr);
 	pd.proto = h->ip6_nxt;
 	do {
 		switch (pd.proto) {
 		case IPPROTO_FRAGMENT:
 			action = pf_test_fragment(&r, dir, kif, m, h,
 			    &pd, &a, &ruleset);
 			if (action == PF_DROP)
 				REASON_SET(&reason, PFRES_FRAG);
 			goto done;
 		case IPPROTO_ROUTING: {
 			struct ip6_rthdr rthdr;
 
 			if (rh_cnt++) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: IPv6 more than one rthdr\n"));
 				action = PF_DROP;
 				REASON_SET(&reason, PFRES_IPOPTIONS);
 				log = 1;
 				goto done;
 			}
 			if (!pf_pull_hdr(m, off, &rthdr, sizeof(rthdr), NULL,
 			    &reason, pd.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: IPv6 short rthdr\n"));
 				action = PF_DROP;
 				REASON_SET(&reason, PFRES_SHORT);
 				log = 1;
 				goto done;
 			}
 			if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: IPv6 rthdr0\n"));
 				action = PF_DROP;
 				REASON_SET(&reason, PFRES_IPOPTIONS);
 				log = 1;
 				goto done;
 			}
 			/* FALLTHROUGH */
 		}
 		case IPPROTO_AH:
 		case IPPROTO_HOPOPTS:
 		case IPPROTO_DSTOPTS: {
 			/* get next header and header length */
 			struct ip6_ext	opt6;
 
 			if (!pf_pull_hdr(m, off, &opt6, sizeof(opt6),
 			    NULL, &reason, pd.af)) {
 				DPFPRINTF(PF_DEBUG_MISC,
 				    ("pf: IPv6 short opt\n"));
 				action = PF_DROP;
 				log = 1;
 				goto done;
 			}
 			if (pd.proto == IPPROTO_AH)
 				off += (opt6.ip6e_len + 2) * 4;
 			else
 				off += (opt6.ip6e_len + 1) * 8;
 			pd.proto = opt6.ip6e_nxt;
 			/* goto the next header */
 			break;
 		}
 		default:
 			terminal++;
 			break;
 		}
 	} while (!terminal);
 
 	/* if there's no routing header, use unmodified mbuf for checksumming */
 	if (!n)
 		n = m;
 
 	switch (pd.proto) {
 
 	case IPPROTO_TCP: {
 		struct tcphdr	th;
 
 		pd.hdr.tcp = &th;
 		if (!pf_pull_hdr(m, off, &th, sizeof(th),
 		    &action, &reason, AF_INET6)) {
 			log = action != PF_PASS;
 			goto done;
 		}
 		pd.p_len = pd.tot_len - off - (th.th_off << 2);
 		action = pf_normalize_tcp(dir, kif, m, 0, off, h, &pd);
 		if (action == PF_DROP)
 			goto done;
 		action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd,
 		    &reason);
 		if (action == PF_PASS) {
 			if (pfsync_update_state_ptr != NULL)
 				pfsync_update_state_ptr(s);
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
 			    &a, &ruleset, inp);
 		break;
 	}
 
 	case IPPROTO_UDP: {
 		struct udphdr	uh;
 
 		pd.hdr.udp = &uh;
 		if (!pf_pull_hdr(m, off, &uh, sizeof(uh),
 		    &action, &reason, AF_INET6)) {
 			log = action != PF_PASS;
 			goto done;
 		}
 		if (uh.uh_dport == 0 ||
 		    ntohs(uh.uh_ulen) > m->m_pkthdr.len - off ||
 		    ntohs(uh.uh_ulen) < sizeof(struct udphdr)) {
 			action = PF_DROP;
 			REASON_SET(&reason, PFRES_SHORT);
 			goto done;
 		}
 		action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd);
 		if (action == PF_PASS) {
 			if (pfsync_update_state_ptr != NULL)
 				pfsync_update_state_ptr(s);
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
 			    &a, &ruleset, inp);
 		break;
 	}
 
 	case IPPROTO_ICMP: {
 		action = PF_DROP;
 		DPFPRINTF(PF_DEBUG_MISC,
 		    ("pf: dropping IPv6 packet with ICMPv4 payload\n"));
 		goto done;
 	}
 
 	case IPPROTO_ICMPV6: {
 		struct icmp6_hdr	ih;
 
 		pd.hdr.icmp6 = &ih;
 		if (!pf_pull_hdr(m, off, &ih, sizeof(ih),
 		    &action, &reason, AF_INET6)) {
 			log = action != PF_PASS;
 			goto done;
 		}
 		action = pf_test_state_icmp(&s, dir, kif,
 		    m, off, h, &pd, &reason);
 		if (action == PF_PASS) {
 			if (pfsync_update_state_ptr != NULL)
 				pfsync_update_state_ptr(s);
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
 			    &a, &ruleset, inp);
 		break;
 	}
 
 	default:
 		action = pf_test_state_other(&s, dir, kif, m, &pd);
 		if (action == PF_PASS) {
 			if (pfsync_update_state_ptr != NULL)
 				pfsync_update_state_ptr(s);
 			r = s->rule.ptr;
 			a = s->anchor.ptr;
 			log = s->log;
 		} else if (s == NULL)
 			action = pf_test_rule(&r, &s, dir, kif, m, off, &pd,
 			    &a, &ruleset, inp);
 		break;
 	}
 
 done:
 	PF_RULES_RUNLOCK();
 	if (n != m) {
 		m_freem(n);
 		n = NULL;
 	}
 
 	/* handle dangerous IPv6 extension headers. */
 	if (action == PF_PASS && rh_cnt &&
 	    !((s && s->state_flags & PFSTATE_ALLOWOPTS) || r->allow_opts)) {
 		action = PF_DROP;
 		REASON_SET(&reason, PFRES_IPOPTIONS);
 		log = r->log;
 		DPFPRINTF(PF_DEBUG_MISC,
 		    ("pf: dropping packet with dangerous v6 headers\n"));
 	}
 
 	if (s && s->tag > 0 && pf_tag_packet(m, &pd, s->tag)) {
 		action = PF_DROP;
 		REASON_SET(&reason, PFRES_MEMORY);
 	}
 	if (r->rtableid >= 0)
 		M_SETFIB(m, r->rtableid);
 
 #ifdef ALTQ
 	if (action == PF_PASS && r->qid) {
 		if (pd.pf_mtag == NULL &&
 		    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
 			action = PF_DROP;
 			REASON_SET(&reason, PFRES_MEMORY);
 		} else {
 			if (s != NULL)
 				pd.pf_mtag->qid_hash = pf_state_hash(s);
 			if (pd.tos & IPTOS_LOWDELAY)
 				pd.pf_mtag->qid = r->pqid;
 			else
 				pd.pf_mtag->qid = r->qid;
 			/* Add hints for ecn. */
 			pd.pf_mtag->hdr = h;
 		}
 	}
 #endif /* ALTQ */
 
 	if (dir == PF_IN && action == PF_PASS && (pd.proto == IPPROTO_TCP ||
 	    pd.proto == IPPROTO_UDP) && s != NULL && s->nat_rule.ptr != NULL &&
 	    (s->nat_rule.ptr->action == PF_RDR ||
 	    s->nat_rule.ptr->action == PF_BINAT) &&
 	    IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))
 		m->m_flags |= M_SKIP_FIREWALL;
 
 	/* XXX: Anybody working on it?! */
 	if (r->divert.port)
 		printf("pf: divert(9) is not supported for IPv6\n");
 
 	if (log) {
 		struct pf_rule *lr;
 
 		if (s != NULL && s->nat_rule.ptr != NULL &&
 		    s->nat_rule.ptr->log & PF_LOG_ALL)
 			lr = s->nat_rule.ptr;
 		else
 			lr = r;
 		PFLOG_PACKET(kif, m, AF_INET6, dir, reason, lr, a, ruleset,
 		    &pd, (s == NULL));
 	}
 
 	kif->pfik_bytes[1][dir == PF_OUT][action != PF_PASS] += pd.tot_len;
 	kif->pfik_packets[1][dir == PF_OUT][action != PF_PASS]++;
 
 	if (action == PF_PASS || r->action == PF_DROP) {
 		dirndx = (dir == PF_OUT);
 		r->packets[dirndx]++;
 		r->bytes[dirndx] += pd.tot_len;
 		if (a != NULL) {
 			a->packets[dirndx]++;
 			a->bytes[dirndx] += pd.tot_len;
 		}
 		if (s != NULL) {
 			if (s->nat_rule.ptr != NULL) {
 				s->nat_rule.ptr->packets[dirndx]++;
 				s->nat_rule.ptr->bytes[dirndx] += pd.tot_len;
 			}
 			if (s->src_node != NULL) {
 				s->src_node->packets[dirndx]++;
 				s->src_node->bytes[dirndx] += pd.tot_len;
 			}
 			if (s->nat_src_node != NULL) {
 				s->nat_src_node->packets[dirndx]++;
 				s->nat_src_node->bytes[dirndx] += pd.tot_len;
 			}
 			dirndx = (dir == s->direction) ? 0 : 1;
 			s->packets[dirndx]++;
 			s->bytes[dirndx] += pd.tot_len;
 		}
 		tr = r;
 		nr = (s != NULL) ? s->nat_rule.ptr : pd.nat_rule;
 		if (nr != NULL && r == &V_pf_default_rule)
 			tr = nr;
 		if (tr->src.addr.type == PF_ADDR_TABLE)
 			pfr_update_stats(tr->src.addr.p.tbl,
 			    (s == NULL) ? pd.src :
 			    &s->key[(s->direction == PF_IN)]->addr[0],
 			    pd.af, pd.tot_len, dir == PF_OUT,
 			    r->action == PF_PASS, tr->src.neg);
 		if (tr->dst.addr.type == PF_ADDR_TABLE)
 			pfr_update_stats(tr->dst.addr.p.tbl,
 			    (s == NULL) ? pd.dst :
 			    &s->key[(s->direction == PF_IN)]->addr[1],
 			    pd.af, pd.tot_len, dir == PF_OUT,
 			    r->action == PF_PASS, tr->dst.neg);
 	}
 
 	switch (action) {
 	case PF_SYNPROXY_DROP:
 		m_freem(*m0);
 	case PF_DEFER:
 		*m0 = NULL;
 		action = PF_PASS;
 		break;
 	case PF_DROP:
 		m_freem(*m0);
 		*m0 = NULL;
 		break;
 	default:
 		/* pf_route6() returns unlocked. */
 		if (r->rt) {
 			pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd);
 			return (action);
 		}
 		break;
 	}
 
 	if (s)
 		PF_STATE_UNLOCK(s);
 
 	/* If reassembled packet passed, create new fragments. */
 	if (action == PF_PASS && *m0 && fwdir == PF_FWD &&
 	    (mtag = m_tag_find(m, PF_REASSEMBLED, NULL)) != NULL)
 		action = pf_refragment6(ifp, m0, mtag);
 
 	return (action);
 }
 #endif /* INET6 */
Index: projects/release-pkg/sys/ofed/include/rdma/Kbuild
===================================================================
--- projects/release-pkg/sys/ofed/include/rdma/Kbuild	(revision 293335)
+++ projects/release-pkg/sys/ofed/include/rdma/Kbuild	(nonexistent)
@@ -1 +0,0 @@
-header-y += ib_user_mad.h

Property changes on: projects/release-pkg/sys/ofed/include/rdma/Kbuild
___________________________________________________________________
Deleted: fbsd:nokeywords
## -1 +0,0 ##
-true
\ No newline at end of property
Index: projects/release-pkg/sys
===================================================================
--- projects/release-pkg/sys	(revision 293335)
+++ projects/release-pkg/sys	(revision 293336)

Property changes on: projects/release-pkg/sys
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/sys:r293225-293335
Index: projects/release-pkg/targets/pseudo/userland/lib/Makefile.depend
===================================================================
--- projects/release-pkg/targets/pseudo/userland/lib/Makefile.depend	(revision 293335)
+++ projects/release-pkg/targets/pseudo/userland/lib/Makefile.depend	(revision 293336)
@@ -1,187 +1,188 @@
 # $FreeBSD$
 
 # This file is not autogenerated - take care!
 
 .if !defined(MK_CLANG)
 .include <src.opts.mk>
 .endif
 
 DIRDEPS = \
 	lib/${CSU_DIR} \
 	lib/atf/libatf-c \
 	lib/atf/libatf-c++ \
 	lib/lib80211 \
 	lib/libalias/libalias \
 	lib/libalias/modules/cuseeme \
 	lib/libalias/modules/dummy \
 	lib/libalias/modules/ftp \
 	lib/libalias/modules/irc \
 	lib/libalias/modules/nbt \
 	lib/libalias/modules/pptp \
 	lib/libalias/modules/skinny \
 	lib/libalias/modules/smedia \
 	lib/libarchive \
 	lib/libauditd \
 	lib/libbegemot \
 	lib/libblocksruntime \
 	lib/libbluetooth \
 	lib/libbsdstat \
 	lib/libbsm \
 	lib/libbsnmp/libbsnmp \
 	lib/libbz2 \
 	lib/libc \
 	lib/libc++ \
 	lib/libcalendar \
 	lib/libcam \
 	lib/libcasper \
 	lib/libcom_err/doc \
 	lib/libcompat \
 	lib/libcompiler_rt \
 	lib/libcrypt \
 	lib/libcuse \
 	lib/libcxxrt \
 	lib/libdevctl \
 	lib/libdevinfo \
 	lib/libdevstat \
 	lib/libdwarf \
 	lib/libedit/edit/readline \
 	lib/libelf \
 	lib/libevent \
 	lib/libexecinfo \
 	lib/libexpat \
 	lib/libfetch \
 	lib/libgeom \
 	lib/libgssapi \
 	lib/libiconv_modules/BIG5 \
 	lib/libiconv_modules/DECHanyu \
 	lib/libiconv_modules/EUC \
 	lib/libiconv_modules/EUCTW \
 	lib/libiconv_modules/GBK2K \
 	lib/libiconv_modules/HZ \
 	lib/libiconv_modules/ISO2022 \
 	lib/libiconv_modules/JOHAB \
 	lib/libiconv_modules/MSKanji \
 	lib/libiconv_modules/UES \
 	lib/libiconv_modules/UTF1632 \
 	lib/libiconv_modules/UTF7 \
 	lib/libiconv_modules/UTF8 \
 	lib/libiconv_modules/VIQR \
 	lib/libiconv_modules/ZW \
 	lib/libiconv_modules/iconv_none \
 	lib/libiconv_modules/iconv_std \
 	lib/libiconv_modules/mapper_646 \
 	lib/libiconv_modules/mapper_none \
 	lib/libiconv_modules/mapper_parallel \
 	lib/libiconv_modules/mapper_serial \
 	lib/libiconv_modules/mapper_std \
 	lib/libiconv_modules/mapper_zone \
 	lib/libipsec \
 	lib/libipx \
 	lib/libjail \
 	lib/libkiconv \
 	lib/libkvm \
 	lib/libldns \
 	lib/liblzma \
 	lib/libmagic \
 	lib/libmandoc \
 	lib/libmd \
 	lib/libmemstat \
 	lib/libmilter \
 	lib/libmp \
 	lib/libnetgraph \
 	lib/libngatm \
 	lib/libnv \
 	lib/libopie \
 	lib/libpam/libpam \
 	lib/libpam/modules/pam_chroot \
 	lib/libpam/modules/pam_deny \
 	lib/libpam/modules/pam_echo \
 	lib/libpam/modules/pam_exec \
 	lib/libpam/modules/pam_ftpusers \
 	lib/libpam/modules/pam_group \
 	lib/libpam/modules/pam_guest \
 	lib/libpam/modules/pam_krb5 \
 	lib/libpam/modules/pam_ksu \
 	lib/libpam/modules/pam_lastlog \
 	lib/libpam/modules/pam_login_access \
 	lib/libpam/modules/pam_nologin \
 	lib/libpam/modules/pam_opie \
 	lib/libpam/modules/pam_opieaccess \
 	lib/libpam/modules/pam_passwdqc \
 	lib/libpam/modules/pam_permit \
 	lib/libpam/modules/pam_radius \
 	lib/libpam/modules/pam_rhosts \
 	lib/libpam/modules/pam_rootok \
 	lib/libpam/modules/pam_securetty \
 	lib/libpam/modules/pam_self \
 	lib/libpam/modules/pam_ssh \
 	lib/libpam/modules/pam_tacplus \
 	lib/libpam/modules/pam_unix \
 	lib/libpcap \
 	lib/libpjdlog \
 	lib/libpmc \
 	lib/libproc \
 	lib/libprocstat \
 	lib/libradius \
 	lib/librpcsec_gss \
 	lib/librpcsvc \
 	lib/librt \
 	lib/librtld_db \
 	lib/libsbuf \
 	lib/libsdp \
 	lib/libsm \
 	lib/libsmb \
 	lib/libsmdb \
 	lib/libsmutil \
 	lib/libstand \
 	lib/libstdbuf \
 	lib/libstdthreads \
+	lib/libsysdecode \
 	lib/libtacplus \
 	lib/libtelnet \
 	lib/libthr \
 	lib/libthread_db \
 	lib/libufs \
 	lib/libugidfw \
 	lib/libulog \
 	lib/libunbound \
 	lib/libusb \
 	lib/libusbhid \
 	lib/libutil \
 	lib/libvgl \
 	lib/libvmmapi \
 	lib/libwrap \
 	lib/liby \
 	lib/libyaml \
 	lib/libypclnt \
 	lib/libz \
 	lib/msun \
 	lib/ncurses/form \
 	lib/ncurses/formw \
 	lib/ncurses/menu \
 	lib/ncurses/menuw \
 	lib/ncurses/ncurses \
 	lib/ncurses/ncursesw \
 	lib/ncurses/panel \
 	lib/ncurses/panelw \
 
 .if ${MK_CLANG} != "no" && \
     (${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "amd64" || \
     (${MACHINE_CPUARCH} == "arm" && ${MACHINE_ARCH} != "armeb") || \
     (${MACHINE_CPUARCH} == "i386"))
 DIRDEPS+= \
 	lib/libclang_rt/asan-preinit \
 	lib/libclang_rt/asan \
 	lib/libclang_rt/asan_cxx \
 	lib/libclang_rt/include \
 	lib/libclang_rt/profile \
 	lib/libclang_rt/safestack \
 	lib/libclang_rt/ubsan_standalone \
 	lib/libclang_rt/ubsan_standalone_cxx \
 
 .endif
 
 .if ${MK_NAND} != "no"
 DIRDEPS+= lib/libnandfs
 .endif
 
 .include <dirdeps.mk>
Index: projects/release-pkg/targets
===================================================================
--- projects/release-pkg/targets	(revision 293335)
+++ projects/release-pkg/targets	(revision 293336)

Property changes on: projects/release-pkg/targets
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head/targets:r289091-289384,293171-293335
Index: projects/release-pkg/usr.bin/cap_mkdb/cap_mkdb.c
===================================================================
--- projects/release-pkg/usr.bin/cap_mkdb/cap_mkdb.c	(revision 293335)
+++ projects/release-pkg/usr.bin/cap_mkdb/cap_mkdb.c	(revision 293336)
@@ -1,268 +1,268 @@
 /*-
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char copyright[] =
 "@(#) Copyright (c) 1992, 1993\n\
 	The Regents of the University of California.  All rights reserved.\n";
 #endif
 
 #if 0
 #ifndef lint
 static char sccsid[] = "@(#)cap_mkdb.c	8.2 (Berkeley) 4/27/95";
 #endif
 #endif
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/stat.h>
 
 #include <db.h>
 #include <err.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
 static void	 db_build(char **);
 static void	 dounlink(void);
 static void	 usage(void);
 
 static DB	*capdbp;
 static int	 verbose;
 static char	*capname, buf[8 * 1024];
 
 static HASHINFO openinfo = {
 	4096,		/* bsize */
 	0,		/* ffactor */
 	0,		/* nelem */
 	0,		/* cachesize */
 	NULL,		/* hash() */
 	0		/* lorder */
 };
 
 /*
  * Mkcapdb creates a capability hash database for quick retrieval of capability
  * records.  The database contains 2 types of entries: records and references
  * marked by the first byte in the data.  A record entry contains the actual
  * capability record whereas a reference contains the name (key) under which
  * the correct record is stored.
  */
 int
 main(int argc, char *argv[])
 {
 	int byteorder, c;
 
 	capname = NULL;
 	byteorder = 0;
 	while ((c = getopt(argc, argv, "bf:lv")) != -1) {
 		switch(c) {
 		case 'b':
 		case 'l':
 			if (byteorder != 0)
 				usage();
 			byteorder = c == 'b' ? 4321 : 1234;
 			break;
 		case 'f':
 			capname = optarg;
 			break;
 		case 'v':
 			verbose = 1;
 			break;
 		case '?':
 		default:
 			usage();
 		}
 	}
 	argc -= optind;
 	argv += optind;
 
 	if (*argv == NULL)
 		usage();
 
 	/* Set byte order. */
 	openinfo.lorder = byteorder;
 
 	/*
 	 * The database file is the first argument if no name is specified.
 	 * Make arrangements to unlink it if exit badly.
 	 */
 	(void)snprintf(buf, sizeof(buf), "%s.db", capname ? capname : *argv);
 	if ((capname = strdup(buf)) == NULL)
 		errx(1, "strdup failed");
-	if ((capdbp = dbopen(capname, O_CREAT | O_TRUNC | O_RDWR,
+	if ((capdbp = dbopen(capname, O_CREAT | O_TRUNC | O_RDWR | O_SYNC,
 	    DEFFILEMODE, DB_HASH, &openinfo)) == NULL)
 		err(1, "%s", buf);
 
 	if (atexit(dounlink))
 		err(1, "atexit");
 
 	db_build(argv);
 
 	if (capdbp->close(capdbp) < 0)
 		err(1, "%s", capname);
 	capname = NULL;
 	exit(0);
 }
 
 static void
 dounlink(void)
 {
 	if (capname != NULL)
 		(void)unlink(capname);
 }
 
 /*
  * Any changes to these definitions should be made also in the getcap(3)
  * library routines.
  */
 #define RECOK	(char)0
 #define TCERR	(char)1
 #define SHADOW	(char)2
 
 /*
  * Db_build() builds the name and capability databases according to the
  * details above.
  */
 static void
 db_build(char **ifiles)
 {
 	DBT key, data;
 	recno_t reccnt;
 	size_t len, bplen;
 	int st;
 	char *bp, *p, *t;
 
 	data.data = NULL;
 	key.data = NULL;
 	for (reccnt = 0, bplen = 0; (st = cgetnext(&bp, ifiles)) > 0;) {
 
 		/*
 		 * Allocate enough memory to store record, terminating
 		 * NULL and one extra byte.
 		 */
 		len = strlen(bp);
 		if (bplen <= len + 2) {
 			bplen += MAX(256, len + 2);
 			if ((data.data = realloc(data.data, bplen)) == NULL)
 				errx(1, "malloc failed");
 		}
 
 		/* Find the end of the name field. */
 		if ((p = strchr(bp, ':')) == NULL) {
 			warnx("no name field: %.*s", (int)MIN(len, 20), bp);
 			continue;
 		}
 
 		/* First byte of stored record indicates status. */
 		switch(st) {
 		case 1:
 			((char *)(data.data))[0] = RECOK;
 			break;
 		case 2:
 			((char *)(data.data))[0] = TCERR;
 			warnx("record not tc expanded: %.*s", (int)(p - bp),
 			    bp);
 			break;
 		}
 
 		/* Create the stored record. */
 		memmove(&((u_char *)(data.data))[1], bp, len + 1);
 		data.size = len + 2;
 
 		/* Store the record under the name field. */
 		key.data = bp;
 		key.size = p - bp;
 
 		switch(capdbp->put(capdbp, &key, &data, R_NOOVERWRITE)) {
 		case -1:
 			err(1, "put");
 			/* NOTREACHED */
 		case 1:
 			warnx("ignored duplicate: %.*s",
 			    (int)key.size, (char *)key.data);
 			continue;
 		}
 		++reccnt;
 
 		/* If only one name, ignore the rest. */
 		*p = '\0';
 		if (strchr(bp, '|') == NULL)
 			continue;
 		*p = ':';
 
 		/* The rest of the names reference the entire name. */
 		((char *)(data.data))[0] = SHADOW;
 		memmove(&((u_char *)(data.data))[1], key.data, key.size);
 		data.size = key.size + 1;
 
 		/* Store references for other names. */
 		for (p = t = bp;; ++p) {
 			if (p > t && (*p == ':' || *p == '|')) {
 				key.size = p - t;
 				key.data = t;
 				switch(capdbp->put(capdbp,
 				    &key, &data, R_NOOVERWRITE)) {
 				case -1:
 					err(1, "put");
 					/* NOTREACHED */
 				case 1:
 					warnx("ignored duplicate: %.*s",
 					    (int)key.size, (char *)key.data);
 				}
 				t = p + 1;
 			}
 			if (*p == ':')
 				break;
 		}
 	}
 
 	switch(st) {
 	case -1:
 		err(1, "file argument");
 		/* NOTREACHED */
 	case -2:
 		errx(1, "potential reference loop detected");
 		/* NOTREACHED */
 	}
 
 	if (verbose)
 		(void)printf("cap_mkdb: %d capability records\n", reccnt);
 }
 
 static void
 usage(void)
 {
 	(void)fprintf(stderr,
 	    "usage: cap_mkdb [-b | -l] [-v] [-f outfile] file ...\n");
 	exit(1);
 }
Index: projects/release-pkg/usr.bin/truss/Makefile.depend.amd64
===================================================================
--- projects/release-pkg/usr.bin/truss/Makefile.depend.amd64	(revision 293335)
+++ projects/release-pkg/usr.bin/truss/Makefile.depend.amd64	(revision 293336)
@@ -1,31 +1,28 @@
 # $FreeBSD$
 # Autogenerated - do NOT edit!
 
 DIRDEPS = \
 	gnu/lib/csu \
 	gnu/lib/libgcc \
 	include \
 	include/arpa \
-	include/rpc \
 	include/xlocale \
 	lib/${CSU_DIR} \
 	lib/libc \
 	lib/libcompiler_rt \
 	lib/libsysdecode \
 
 
 .include <dirdeps.mk>
 
 .if ${DEP_RELDIR} == ${_DEP_RELDIR}
 # local dependencies - needed for -jN in clean tree
 amd64-cloudabi64.o: cloudabi64_syscalls.h
 amd64-cloudabi64.po: cloudabi64_syscalls.h
 amd64-freebsd.o: freebsd_syscalls.h
 amd64-freebsd.po: freebsd_syscalls.h
 amd64-freebsd32.o: freebsd32_syscalls.h
 amd64-freebsd32.po: freebsd32_syscalls.h
 amd64-linux32.o: amd64-linux32_syscalls.h
 amd64-linux32.po: amd64-linux32_syscalls.h
-ioctl.o: ioctl.c
-ioctl.po: ioctl.c
 .endif
Index: projects/release-pkg/usr.sbin/mountd/exports.5
===================================================================
--- projects/release-pkg/usr.sbin/mountd/exports.5	(revision 293335)
+++ projects/release-pkg/usr.sbin/mountd/exports.5	(revision 293336)
@@ -1,513 +1,515 @@
 .\" Copyright (c) 1989, 1991, 1993
 .\"	The Regents of the University of California.  All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\" 4. Neither the name of the University nor the names of its contributors
 .\"    may be used to endorse or promote products derived from this software
 .\"    without specific prior written permission.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\"     @(#)exports.5	8.3 (Berkeley) 3/29/95
 .\" $FreeBSD$
 .\"
 .Dd August 14, 2014
 .Dt EXPORTS 5
 .Os
 .Sh NAME
 .Nm exports
 .Nd define remote mount points for
 .Tn NFS
 mount requests
 .Sh SYNOPSIS
 .Nm
 .Sh DESCRIPTION
 The
 .Nm
 file specifies remote mount points for the
 .Tn NFS
 mount protocol per the
 .Tn NFS
 server specification; see
 .%T "Network File System Protocol Specification" ,
 RFC1094, Appendix A and
 .%T "NFS: Network File System Version 3 Specification" ,
 Appendix I.
 .Pp
 Each line in the file
 (other than comment lines that begin with a #)
 specifies the mount point(s) and export flags within one local server
 file system or the NFSv4 tree root for one or more hosts.
 A long line may be split over several lines by ending all but the
 last line with a backslash
 .Pq Ql \e .
 A host may be specified only once for each local file or the NFSv4 tree root on the
 server and there may be only one default entry for each server
 file system that applies to all other hosts.
 The latter exports the file system to the
 .Dq world
 and should
 be used only when the file system contains public information.
 .Pp
 In a mount entry,
 the first field(s) specify the directory path(s) within a server file system
 that can be mounted on by the corresponding client(s).
 There are three forms of this specification.
 The first is to list all mount points as absolute
 directory paths separated by whitespace.
 This list of directory paths should be considered an
 .Dq administrative control ,
 since it is only enforced by the
 .Xr mountd 8
 daemon and not the kernel.
 As such, it only applies to NFSv2 and NFSv3 mounts and only
 with respect to the client's use of the mount protocol.
 The second is to specify the pathname of the root of the file system
 followed by the
 .Fl alldirs
 flag;
 this form allows the host(s) to mount at any point within the file system,
 including regular files if the
 .Fl r
 option is used on
 .Xr mountd 8 .
 Because NFSv4 does not use the mount protocol,
 the
 .Dq administrative controls
 are not applied and all directories within this server
 file system are mountable via NFSv4 even if the
 .Fl alldirs
 flag has not been specified.
 The third form has the string ``V4:'' followed by a single absolute path
 name, to specify the NFSv4 tree root.
 This line does not export any file system, but simply marks where the root
 of the server's directory tree is for NFSv4 clients.
 The exported file systems for NFSv4 are specified via the other lines
 in the
 .Nm
 file in the same way as for NFSv2 and NFSv3.
 The pathnames must not have any symbolic links in them and should not have
 any
 .Dq Pa \&.
 or
 .Dq Pa ..
 components.
 Mount points for a file system may appear on multiple lines each with
 different sets of hosts and export options.
 .Pp
 The second component of a line specifies how the file system is to be
 exported to the host set.
 The option flags specify whether the file system
 is exported read-only or read-write and how the client UID is mapped to
 user credentials on the server.
 For the NFSv4 tree root, the only option that can be specified in this
 section is
 .Fl sec .
 .Pp
 Export options are specified as follows:
 .Pp
 .Sm off
 .Fl maproot Li = Sy user
 .Sm on
 The credential of the specified user is used for remote access by root.
 The credential includes all the groups to which the user is a member
 on the local machine (see
 .Xr id 1 ) .
 The user may be specified by name or number.
+The user string may be quoted, or use backslash escaping.
 .Pp
 .Sm off
 .Fl maproot Li = Sy user:group1:group2:...
 .Sm on
 The colon separated list is used to specify the precise credential
 to be used for remote access by root.
 The elements of the list may be either names or numbers.
 Note that user: should be used to distinguish a credential containing
 no groups from a complete credential for that user.
+The group names may be quoted, or use backslash escaping.
 .Pp
 .Sm off
 .Fl mapall Li = Sy user
 .Sm on
 or
 .Sm off
 .Fl mapall Li = Sy user:group1:group2:...
 .Sm on
 specifies a mapping for all client UIDs (including root)
 using the same semantics as
 .Fl maproot .
 .Pp
 The option
 .Fl r
 is a synonym for
 .Fl maproot
 in an effort to be backward compatible with older export file formats.
 .Pp
 In the absence of
 .Fl maproot
 and
 .Fl mapall
 options, remote accesses by root will result in using a credential of -2:-2.
 All other users will be mapped to their remote credential.
 If a
 .Fl maproot
 option is given,
 remote access by root will be mapped to that credential instead of -2:-2.
 If a
 .Fl mapall
 option is given,
 all users (including root) will be mapped to that credential in
 place of their own.
 .Pp
 .Sm off
 .Fl sec Li = Sy flavor1:flavor2...
 .Sm on
 specifies a colon separated list of acceptable security flavors to be
 used for remote access.
 Supported security flavors are sys, krb5, krb5i and krb5p.
 If multiple flavors are listed, they should be ordered with the most
 preferred flavor first.
 If this option is not present,
 the default security flavor list of just sys is used.
 .Pp
 The
 .Fl ro
 option specifies that the file system should be exported read-only
 (default read/write).
 The option
 .Fl o
 is a synonym for
 .Fl ro
 in an effort to be backward compatible with older export file formats.
 .Pp
 .Tn WebNFS
 exports strictly according to the spec (RFC 2054 and RFC 2055) can
 be done with the
 .Fl public
 flag.
 However, this flag in itself allows r/w access to all files in
 the file system, not requiring reserved ports and not remapping UIDs.
 It
 is only provided to conform to the spec, and should normally not be used.
 For a
 .Tn WebNFS
 export,
 use the
 .Fl webnfs
 flag, which implies
 .Fl public ,
 .Sm off
 .Fl mapall No = Sy nobody
 .Sm on
 and
 .Fl ro .
 Note that only one file system can be
 .Tn WebNFS
 exported on a server.
 .Pp
 A
 .Sm off
 .Fl index No = Pa file
 .Sm on
 option can be used to specify a file whose handle will be returned if
 a directory is looked up using the public filehandle
 .Pq Tn WebNFS .
 This is to mimic the behavior of URLs.
 If no
 .Fl index
 option is specified, a directory filehandle will be returned as usual.
 The
 .Fl index
 option only makes sense in combination with the
 .Fl public
 or
 .Fl webnfs
 flags.
 .Pp
 Specifying the
 .Fl quiet
 option will inhibit some of the syslog diagnostics for bad lines in
 .Pa /etc/exports .
 This can be useful to avoid annoying error messages for known possible
 problems (see
 .Sx EXAMPLES
 below).
 .Pp
 The third component of a line specifies the host set to which the line applies.
 The set may be specified in three ways.
 The first way is to list the host name(s) separated by white space.
 (Standard Internet
 .Dq dot
 addresses may be used in place of names.)
 The second way is to specify a
 .Dq netgroup
 as defined in the
 .Pa netgroup
 file (see
 .Xr netgroup 5 ) .
 The third way is to specify an Internet subnetwork using a network and
 network mask that is defined as the set of all hosts with addresses within
 the subnetwork.
 This latter approach requires less overhead within the
 kernel and is recommended for cases where the export line refers to a
 large number of clients within an administrative subnet.
 .Pp
 The first two cases are specified by simply listing the name(s) separated
 by whitespace.
 All names are checked to see if they are
 .Dq netgroup
 names
 first and are assumed to be hostnames otherwise.
 Using the full domain specification for a hostname can normally
 circumvent the problem of a host that has the same name as a netgroup.
 The third case is specified by the flag
 .Sm off
 .Fl network Li = Sy netname Op Li / Ar prefixlength
 .Sm on
 and optionally
 .Sm off
 .Fl mask No = Sy netmask .
 .Sm on
 The netmask may be specified either by attaching a
 .Ar prefixlength
 to the
 .Fl network
 option, or by using a separate
 .Fl mask
 option.
 If the mask is not specified, it will default to the mask for that network
 class (A, B or C; see
 .Xr inet 4 ) .
 See the
 .Sx EXAMPLES
 section below.
 .Pp
 Scoped IPv6 address must carry scope identifier as documented in
 .Xr inet6 4 .
 For example,
 .Dq Li fe80::%re2/10
 is used to specify
 .Li fe80::/10
 on
 .Li re2
 interface.
 .Pp
 For the third form which specifies the NFSv4 tree root, the directory path
 specifies the location within the server's file system tree which is the
 root of the NFSv4 tree.
 There can only be one NFSv4 root directory per server.
 As such, all entries of this form must specify the same directory path.
 For file systems other than ZFS,
 this location can be any directory and does not
 need to be within an exported file system. If it is not in an exported
 file system, a very limited set of operations are permitted, so that an
 NFSv4 client can traverse the tree to an exported file system.
 Although parts of the NFSv4 tree can be non-exported, the entire NFSv4 tree
 must consist of local file systems capable of being exported via NFS.
 All ZFS file systems in the subtree below the NFSv4 tree root must be
 exported.
 NFSv4 does not use the mount protocol and does permit clients to cross server
 mount point boundaries, although not all clients are capable of crossing the
 mount points.
 .Pp
 The
 .Fl sec
 option on these line(s) specifies what security flavors may be used for
 NFSv4 operations that do not use file handles. Since these operations
 (SetClientID, SetClientIDConfirm, Renew, DelegPurge and ReleaseLockOnwer)
 allocate/modify state in the server, it is possible to restrict some clients to
 the use of the krb5[ip] security flavors, via this option.
 See the
 .Sx EXAMPLES
 section below.
 This third form is meaningless for NFSv2 and NFSv3 and is ignored for them.
 .Pp
 The
 .Xr mountd 8
 utility can be made to re-read the
 .Nm
 file by sending it a hangup signal as follows:
 .Bd -literal -offset indent
 /etc/rc.d/mountd reload
 .Ed
 .Pp
 After sending the
 .Dv SIGHUP ,
 check the
 .Xr syslogd 8
 output to see whether
 .Xr mountd 8
 logged any parsing errors in the
 .Nm
 file.
 .Sh FILES
 .Bl -tag -width /etc/exports -compact
 .It Pa /etc/exports
 the default remote mount-point file
 .El
 .Sh EXAMPLES
 .Bd -literal -offset indent
 /usr /usr/local -maproot=0:10 friends
 /usr -maproot=daemon grumpy.cis.uoguelph.ca 131.104.48.16
 /usr -ro -mapall=nobody
 /u -maproot=bin: -network 131.104.48 -mask 255.255.255.0
 /a -network 192.168.0/24
 /a -network 3ffe:1ce1:1:fe80::/64
 /u2 -maproot=root friends
 /u2 -alldirs -network cis-net -mask cis-mask
 /cdrom -alldirs,quiet,ro -network 192.168.33.0 -mask 255.255.255.0
 /private -sec=krb5i
 /secret -sec=krb5p
 V4: /	-sec=krb5:krb5i:krb5p -network 131.104.48 -mask 255.255.255.0
 V4: /	-sec=sys:krb5:krb5i:krb5p grumpy.cis.uoguelph.ca
 .Ed
 .Pp
 Given that
 .Pa /usr , /u , /a
 and
 .Pa /u2
 are
 local file system mount points, the above example specifies the following:
 .Pp
 The file system rooted at
 .Pa /usr
 is exported to hosts
 .Em friends
 where friends is specified in the netgroup file
 with users mapped to their remote credentials and
 root mapped to UID 0 and group 10.
 It is exported read-write and the hosts in
 .Dq friends
 can mount either
 .Pa /usr
 or
 .Pa /usr/local .
 It is exported to
 .Em 131.104.48.16
 and
 .Em grumpy.cis.uoguelph.ca
 with users mapped to their remote credentials and
 root mapped to the user and groups associated with
 .Dq daemon ;
 it is exported to the rest of the world as read-only with
 all users mapped to the user and groups associated with
 .Dq nobody .
 .Pp
 The file system rooted at
 .Pa /u
 is exported to all hosts on the subnetwork
 .Em 131.104.48
 with root mapped to the UID for
 .Dq bin
 and with no group access.
 .Pp
 The file system rooted at
 .Pa /u2
 is exported to the hosts in
 .Dq friends
 with root mapped to UID and groups
 associated with
 .Dq root ;
 it is exported to all hosts on network
 .Dq cis-net
 allowing mounts at any
 directory within /u2.
 .Pp
 The file system rooted at
 .Pa /a
 is exported to the network 192.168.0.0, with a netmask of 255.255.255.0.
 However, the netmask length in the entry for
 .Pa /a
 is not specified through a
 .Fl mask
 option, but through the
 .Li / Ns Ar prefix
 notation.
 .Pp
 The file system rooted at
 .Pa /a
 is also exported to the IPv6 network
 .Li 3ffe:1ce1:1:fe80::
 address, using the upper 64 bits as the prefix.
 Note that, unlike with IPv4 network addresses, the specified network
 address must be complete, and not just contain the upper bits.
 With IPv6 addresses, the
 .Fl mask
 option must not be used.
 .Pp
 The file system rooted at
 .Pa /cdrom
 will be exported read-only to the entire network 192.168.33.0/24, including
 all its subdirectories.
 Since
 .Pa /cdrom
 is the conventional mountpoint for a CD-ROM device, this export will
 fail if no CD-ROM medium is currently mounted there since that line
 would then attempt to export a subdirectory of the root file system
 with the
 .Fl alldirs
 option which is not allowed.
 The
 .Fl quiet
 option will then suppress the error message for this condition that
 would normally be syslogged.
 As soon as an actual CD-ROM is going to be mounted,
 .Xr mount 8
 will notify
 .Xr mountd 8
 about this situation, and the
 .Pa /cdrom
 file system will be exported as intended.
 Note that without using the
 .Fl alldirs
 option, the export would always succeed.
 While there is no CD-ROM medium mounted under
 .Pa /cdrom ,
 it would export the (normally empty) directory
 .Pa /cdrom
 of the root file system instead.
 .Pp
 The file system rooted at
 .Pa /private
 will be exported using Kerberos 5 authentication and will require
 integrity protected messages for all accesses.
 The file system rooted at
 .Pa /secret
 will also be exported using Kerberos 5 authentication and all messages
 used to access it will be encrypted.
 .Pp
 For the experimental server, the NFSv4 tree is rooted at ``/'',
 and any client within the 131.104.48 subnet is permitted to perform NFSv4 state
 operations on the server, so long as valid Kerberos credentials are provided.
 The machine grumpy.cis.uoguelph.ca is permitted to perform NFSv4 state
 operations on the server using AUTH_SYS credentials, as well as Kerberos ones.
 .Sh SEE ALSO
 .Xr nfsv4 4 ,
 .Xr netgroup 5 ,
 .Xr mountd 8 ,
 .Xr nfsd 8 ,
 .Xr showmount 8
 .Sh BUGS
 The export options are tied to the local mount points in the kernel and
 must be non-contradictory for any exported subdirectory of the local
 server mount point.
 It is recommended that all exported directories within the same server
 file system be specified on adjacent lines going down the tree.
 You cannot specify a hostname that is also the name of a netgroup.
 Specifying the full domain specification for a hostname can normally
 circumvent the problem.
Index: projects/release-pkg/usr.sbin/mountd/mountd.c
===================================================================
--- projects/release-pkg/usr.sbin/mountd/mountd.c	(revision 293335)
+++ projects/release-pkg/usr.sbin/mountd/mountd.c	(revision 293336)
@@ -1,3221 +1,3290 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Herb Hasler and Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char copyright[] =
 "@(#) Copyright (c) 1989, 1993\n\
 	The Regents of the University of California.  All rights reserved.\n";
 #endif /*not lint*/
 
 #if 0
 #ifndef lint
 static char sccsid[] = "@(#)mountd.c	8.15 (Berkeley) 5/1/95";
 #endif /*not lint*/
 #endif
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/fcntl.h>
 #include <sys/linker.h>
 #include <sys/module.h>
 #include <sys/mount.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 
 #include <rpc/rpc.h>
 #include <rpc/rpc_com.h>
 #include <rpc/pmap_clnt.h>
 #include <rpc/pmap_prot.h>
 #include <rpcsvc/mount.h>
 #include <nfs/nfsproto.h>
 #include <nfs/nfssvc.h>
 #include <nfsserver/nfs.h>
 
 #include <fs/nfs/nfsport.h>
 
 #include <arpa/inet.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
 #include <grp.h>
 #include <libutil.h>
 #include <limits.h>
 #include <netdb.h>
 #include <pwd.h>
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include "pathnames.h"
 #include "mntopts.h"
 
 #ifdef DEBUG
 #include <stdarg.h>
 #endif
 
 /*
  * Structures for keeping the mount list and export list
  */
 struct mountlist {
 	struct mountlist *ml_next;
 	char	ml_host[MNTNAMLEN+1];
 	char	ml_dirp[MNTPATHLEN+1];
 };
 
 struct dirlist {
 	struct dirlist	*dp_left;
 	struct dirlist	*dp_right;
 	int		dp_flag;
 	struct hostlist	*dp_hosts;	/* List of hosts this dir exported to */
 	char		dp_dirp[1];	/* Actually malloc'd to size of dir */
 };
 /* dp_flag bits */
 #define	DP_DEFSET	0x1
 #define DP_HOSTSET	0x2
 
 struct exportlist {
 	struct exportlist *ex_next;
 	struct dirlist	*ex_dirl;
 	struct dirlist	*ex_defdir;
 	int		ex_flag;
 	fsid_t		ex_fs;
 	char		*ex_fsdir;
 	char		*ex_indexfile;
 	int		ex_numsecflavors;
 	int		ex_secflavors[MAXSECFLAVORS];
 	int		ex_defnumsecflavors;
 	int		ex_defsecflavors[MAXSECFLAVORS];
 };
 /* ex_flag bits */
 #define	EX_LINKED	0x1
 
 struct netmsk {
 	struct sockaddr_storage nt_net;
 	struct sockaddr_storage nt_mask;
 	char		*nt_name;
 };
 
 union grouptypes {
 	struct addrinfo *gt_addrinfo;
 	struct netmsk	gt_net;
 };
 
 struct grouplist {
 	int gr_type;
 	union grouptypes gr_ptr;
 	struct grouplist *gr_next;
 	int gr_numsecflavors;
 	int gr_secflavors[MAXSECFLAVORS];
 };
 /* Group types */
 #define	GT_NULL		0x0
 #define	GT_HOST		0x1
 #define	GT_NET		0x2
 #define	GT_DEFAULT	0x3
 #define GT_IGNORE	0x5
 
 struct hostlist {
 	int		 ht_flag;	/* Uses DP_xx bits */
 	struct grouplist *ht_grp;
 	struct hostlist	 *ht_next;
 };
 
 struct fhreturn {
 	int	fhr_flag;
 	int	fhr_vers;
 	nfsfh_t	fhr_fh;
 	int	fhr_numsecflavors;
 	int	*fhr_secflavors;
 };
 
 #define	GETPORT_MAXTRY	20	/* Max tries to get a port # */
 
 /* Global defs */
 static char	*add_expdir(struct dirlist **, char *, int);
 static void	add_dlist(struct dirlist **, struct dirlist *,
 		    struct grouplist *, int, struct exportlist *);
 static void	add_mlist(char *, char *);
 static int	check_dirpath(char *);
 static int	check_options(struct dirlist *);
 static int	checkmask(struct sockaddr *sa);
 static int	chk_host(struct dirlist *, struct sockaddr *, int *, int *,
 		    int *, int **);
+static char	*strsep_quote(char **stringp, const char *delim);
 static int	create_service(struct netconfig *nconf);
 static void	complete_service(struct netconfig *nconf, char *port_str);
 static void	clearout_service(void);
 static void	del_mlist(char *hostp, char *dirp);
 static struct dirlist	*dirp_search(struct dirlist *, char *);
 static int	do_mount(struct exportlist *, struct grouplist *, int,
 		    struct xucred *, char *, int, struct statfs *);
 static int	do_opt(char **, char **, struct exportlist *,
 		    struct grouplist *, int *, int *, struct xucred *);
 static struct exportlist	*ex_search(fsid_t *);
 static struct exportlist	*get_exp(void);
 static void	free_dir(struct dirlist *);
 static void	free_exp(struct exportlist *);
 static void	free_grp(struct grouplist *);
 static void	free_host(struct hostlist *);
 static void	get_exportlist(void);
 static int	get_host(char *, struct grouplist *, struct grouplist *);
 static struct hostlist *get_ht(void);
 static int	get_line(void);
 static void	get_mountlist(void);
 static int	get_net(char *, struct netmsk *, int);
 static void	getexp_err(struct exportlist *, struct grouplist *);
 static struct grouplist	*get_grp(void);
 static void	hang_dirp(struct dirlist *, struct grouplist *,
 				struct exportlist *, int);
 static void	huphandler(int sig);
 static int	makemask(struct sockaddr_storage *ssp, int bitlen);
 static void	mntsrv(struct svc_req *, SVCXPRT *);
 static void	nextfield(char **, char **);
 static void	out_of_mem(void);
 static void	parsecred(char *, struct xucred *);
 static int	parsesec(char *, struct exportlist *);
 static int	put_exlist(struct dirlist *, XDR *, struct dirlist *,
 		    int *, int);
 static void	*sa_rawaddr(struct sockaddr *sa, int *nbytes);
 static int	sacmp(struct sockaddr *sa1, struct sockaddr *sa2,
 		    struct sockaddr *samask);
 static int	scan_tree(struct dirlist *, struct sockaddr *);
 static void	usage(void);
 static int	xdr_dir(XDR *, char *);
 static int	xdr_explist(XDR *, caddr_t);
 static int	xdr_explist_brief(XDR *, caddr_t);
 static int	xdr_explist_common(XDR *, caddr_t, int);
 static int	xdr_fhs(XDR *, caddr_t);
 static int	xdr_mlist(XDR *, caddr_t);
 static void	terminate(int);
 
 static struct exportlist *exphead;
 static struct mountlist *mlhead;
 static struct grouplist *grphead;
 static char *exnames_default[2] = { _PATH_EXPORTS, NULL };
 static char **exnames;
 static char **hosts = NULL;
 static struct xucred def_anon = {
 	XUCRED_VERSION,
 	(uid_t)-2,
 	1,
 	{ (gid_t)-2 },
 	NULL
 };
 static int force_v2 = 0;
 static int resvport_only = 1;
 static int nhosts = 0;
 static int dir_only = 1;
 static int dolog = 0;
 static int got_sighup = 0;
 static int xcreated = 0;
 
 static char *svcport_str = NULL;
 static int mallocd_svcport = 0;
 static int *sock_fd;
 static int sock_fdcnt;
 static int sock_fdpos;
 static int suspend_nfsd = 0;
 
 static int opt_flags;
 static int have_v6 = 1;
 
 static int v4root_phase = 0;
 static char v4root_dirpath[PATH_MAX + 1];
 static int has_publicfh = 0;
 
 static struct pidfh *pfh = NULL;
 /* Bits for opt_flags above */
 #define	OP_MAPROOT	0x01
 #define	OP_MAPALL	0x02
 /* 0x4 free */
 #define	OP_MASK		0x08
 #define	OP_NET		0x10
 #define	OP_ALLDIRS	0x40
 #define	OP_HAVEMASK	0x80	/* A mask was specified or inferred. */
 #define	OP_QUIET	0x100
 #define OP_MASKLEN	0x200
 #define OP_SEC		0x400
 
 #ifdef DEBUG
 static int debug = 1;
 static void	SYSLOG(int, const char *, ...) __printflike(2, 3);
 #define syslog SYSLOG
 #else
 static int debug = 0;
 #endif
 
 /*
+ * Similar to strsep(), but it allows for quoted strings
+ * and escaped characters.
+ *
+ * It returns the string (or NULL, if *stringp is NULL),
+ * which is a de-quoted version of the string if necessary.
+ *
+ * It modifies *stringp in place.
+ */
+static char *
+strsep_quote(char **stringp, const char *delim)
+{
+	char *srcptr, *dstptr, *retval;
+	char quot = 0;
+	
+	if (stringp == NULL || *stringp == NULL)
+		return (NULL);
+
+	srcptr = dstptr = retval = *stringp;
+
+	while (*srcptr) {
+		/*
+		 * We're looking for several edge cases here.
+		 * First:  if we're in quote state (quot != 0),
+		 * then we ignore the delim characters, but otherwise
+		 * process as normal, unless it is the quote character.
+		 * Second:  if the current character is a backslash,
+		 * we take the next character as-is, without checking
+		 * for delim, quote, or backslash.  Exception:  if the
+		 * next character is a NUL, that's the end of the string.
+		 * Third:  if the character is a quote character, we toggle
+		 * quote state.
+		 * Otherwise:  check the current character for NUL, or
+		 * being in delim, and end the string if either is true.
+		 */
+		if (*srcptr == '\\') {
+			srcptr++;
+			/*
+			 * The edge case here is if the next character
+			 * is NUL, we want to stop processing.  But if
+			 * it's not NUL, then we simply want to copy it.
+			 */
+			if (*srcptr) {
+				*dstptr++ = *srcptr++;
+			}
+			continue;
+		}
+		if (quot == 0 && (*srcptr == '\'' || *srcptr == '"')) {
+			quot = *srcptr++;
+			continue;
+		}
+		if (quot && *srcptr == quot) {
+			/* End of the quoted part */
+			quot = 0;
+			srcptr++;
+			continue;
+		}
+		if (!quot && strchr(delim, *srcptr))
+			break;
+		*dstptr++ = *srcptr++;
+	}
+
+	*dstptr = 0; /* Terminate the string */
+	*stringp = (*srcptr == '\0') ? NULL : srcptr + 1;
+	return (retval);
+}
+
+/*
  * Mountd server for NFS mount protocol as described in:
  * NFS: Network File System Protocol Specification, RFC1094, Appendix A
  * The optional arguments are the exports file name
  * default: _PATH_EXPORTS
  * and "-n" to allow nonroot mount.
  */
 int
 main(int argc, char **argv)
 {
 	fd_set readfds;
 	struct netconfig *nconf;
 	char *endptr, **hosts_bak;
 	void *nc_handle;
 	pid_t otherpid;
 	in_port_t svcport;
 	int c, k, s;
 	int maxrec = RPC_MAXDATASIZE;
 	int attempt_cnt, port_len, port_pos, ret;
 	char **port_list;
 
 	/* Check that another mountd isn't already running. */
 	pfh = pidfile_open(_PATH_MOUNTDPID, 0600, &otherpid);
 	if (pfh == NULL) {
 		if (errno == EEXIST)
 			errx(1, "mountd already running, pid: %d.", otherpid);
 		warn("cannot open or create pidfile");
 	}
 
 	s = socket(AF_INET6, SOCK_DGRAM, IPPROTO_UDP);
 	if (s < 0)
 		have_v6 = 0;
 	else
 		close(s);
 
 	while ((c = getopt(argc, argv, "2deh:lnp:rS")) != -1)
 		switch (c) {
 		case '2':
 			force_v2 = 1;
 			break;
 		case 'e':
 			/* now a no-op, since this is the default */
 			break;
 		case 'n':
 			resvport_only = 0;
 			break;
 		case 'r':
 			dir_only = 0;
 			break;
 		case 'd':
 			debug = debug ? 0 : 1;
 			break;
 		case 'l':
 			dolog = 1;
 			break;
 		case 'p':
 			endptr = NULL;
 			svcport = (in_port_t)strtoul(optarg, &endptr, 10);
 			if (endptr == NULL || *endptr != '\0' ||
 			    svcport == 0 || svcport >= IPPORT_MAX)
 				usage();
 			svcport_str = strdup(optarg);
 			break;
 		case 'h':
 			++nhosts;
 			hosts_bak = hosts;
 			hosts_bak = realloc(hosts, nhosts * sizeof(char *));
 			if (hosts_bak == NULL) {
 				if (hosts != NULL) {
 					for (k = 0; k < nhosts; k++) 
 						free(hosts[k]);
 					free(hosts);
 					out_of_mem();
 				}
 			}
 			hosts = hosts_bak;
 			hosts[nhosts - 1] = strdup(optarg);
 			if (hosts[nhosts - 1] == NULL) {
 				for (k = 0; k < (nhosts - 1); k++) 
 					free(hosts[k]);
 				free(hosts);
 				out_of_mem();
 			}
 			break;
 		case 'S':
 			suspend_nfsd = 1;
 			break;
 		default:
 			usage();
 		};
 
 	if (modfind("nfsd") < 0) {
 		/* Not present in kernel, try loading it */
 		if (kldload("nfsd") < 0 || modfind("nfsd") < 0)
 			errx(1, "NFS server is not available");
 	}
 
 	argc -= optind;
 	argv += optind;
 	grphead = (struct grouplist *)NULL;
 	exphead = (struct exportlist *)NULL;
 	mlhead = (struct mountlist *)NULL;
 	if (argc > 0)
 		exnames = argv;
 	else
 		exnames = exnames_default;
 	openlog("mountd", LOG_PID, LOG_DAEMON);
 	if (debug)
 		warnx("getting export list");
 	get_exportlist();
 	if (debug)
 		warnx("getting mount list");
 	get_mountlist();
 	if (debug)
 		warnx("here we go");
 	if (debug == 0) {
 		daemon(0, 0);
 		signal(SIGINT, SIG_IGN);
 		signal(SIGQUIT, SIG_IGN);
 	}
 	signal(SIGHUP, huphandler);
 	signal(SIGTERM, terminate);
 	signal(SIGPIPE, SIG_IGN);
 
 	pidfile_write(pfh);
 
 	rpcb_unset(MOUNTPROG, MOUNTVERS, NULL);
 	rpcb_unset(MOUNTPROG, MOUNTVERS3, NULL);
 	rpc_control(RPC_SVC_CONNMAXREC_SET, &maxrec);
 
 	if (!resvport_only) {
 		if (sysctlbyname("vfs.nfsrv.nfs_privport", NULL, NULL,
 		    &resvport_only, sizeof(resvport_only)) != 0 &&
 		    errno != ENOENT) {
 			syslog(LOG_ERR, "sysctl: %m");
 			exit(1);
 		}
 	}
 
 	/*
 	 * If no hosts were specified, add a wildcard entry to bind to
 	 * INADDR_ANY. Otherwise make sure 127.0.0.1 and ::1 are added to the
 	 * list.
 	 */
 	if (nhosts == 0) {
 		hosts = malloc(sizeof(char *));
 		if (hosts == NULL)
 			out_of_mem();
 		hosts[0] = "*";
 		nhosts = 1;
 	} else {
 		hosts_bak = hosts;
 		if (have_v6) {
 			hosts_bak = realloc(hosts, (nhosts + 2) *
 			    sizeof(char *));
 			if (hosts_bak == NULL) {
 				for (k = 0; k < nhosts; k++)
 					free(hosts[k]);
 		    		free(hosts);
 		    		out_of_mem();
 			} else
 				hosts = hosts_bak;
 			nhosts += 2;
 			hosts[nhosts - 2] = "::1";
 		} else {
 			hosts_bak = realloc(hosts, (nhosts + 1) * sizeof(char *));
 			if (hosts_bak == NULL) {
 				for (k = 0; k < nhosts; k++)
 					free(hosts[k]);
 				free(hosts);
 				out_of_mem();
 			} else {
 				nhosts += 1;
 				hosts = hosts_bak;
 			}
 		}
 
 		hosts[nhosts - 1] = "127.0.0.1";
 	}
 
 	attempt_cnt = 1;
 	sock_fdcnt = 0;
 	sock_fd = NULL;
 	port_list = NULL;
 	port_len = 0;
 	nc_handle = setnetconfig();
 	while ((nconf = getnetconfig(nc_handle))) {
 		if (nconf->nc_flag & NC_VISIBLE) {
 			if (have_v6 == 0 && strcmp(nconf->nc_protofmly,
 			    "inet6") == 0) {
 				/* DO NOTHING */
 			} else {
 				ret = create_service(nconf);
 				if (ret == 1)
 					/* Ignore this call */
 					continue;
 				if (ret < 0) {
 					/*
 					 * Failed to bind port, so close off
 					 * all sockets created and try again
 					 * if the port# was dynamically
 					 * assigned via bind(2).
 					 */
 					clearout_service();
 					if (mallocd_svcport != 0 &&
 					    attempt_cnt < GETPORT_MAXTRY) {
 						free(svcport_str);
 						svcport_str = NULL;
 						mallocd_svcport = 0;
 					} else {
 						errno = EADDRINUSE;
 						syslog(LOG_ERR,
 						    "bindresvport_sa: %m");
 						exit(1);
 					}
 
 					/* Start over at the first service. */
 					free(sock_fd);
 					sock_fdcnt = 0;
 					sock_fd = NULL;
 					nc_handle = setnetconfig();
 					attempt_cnt++;
 				} else if (mallocd_svcport != 0 &&
 				    attempt_cnt == GETPORT_MAXTRY) {
 					/*
 					 * For the last attempt, allow
 					 * different port #s for each nconf
 					 * by saving the svcport_str and
 					 * setting it back to NULL.
 					 */
 					port_list = realloc(port_list,
 					    (port_len + 1) * sizeof(char *));
 					if (port_list == NULL)
 						out_of_mem();
 					port_list[port_len++] = svcport_str;
 					svcport_str = NULL;
 					mallocd_svcport = 0;
 				}
 			}
 		}
 	}
 
 	/*
 	 * Successfully bound the ports, so call complete_service() to
 	 * do the rest of the setup on the service(s).
 	 */
 	sock_fdpos = 0;
 	port_pos = 0;
 	nc_handle = setnetconfig();
 	while ((nconf = getnetconfig(nc_handle))) {
 		if (nconf->nc_flag & NC_VISIBLE) {
 			if (have_v6 == 0 && strcmp(nconf->nc_protofmly,
 			    "inet6") == 0) {
 				/* DO NOTHING */
 			} else if (port_list != NULL) {
 				if (port_pos >= port_len) {
 					syslog(LOG_ERR, "too many port#s");
 					exit(1);
 				}
 				complete_service(nconf, port_list[port_pos++]);
 			} else
 				complete_service(nconf, svcport_str);
 		}
 	}
 	endnetconfig(nc_handle);
 	free(sock_fd);
 	if (port_list != NULL) {
 		for (port_pos = 0; port_pos < port_len; port_pos++)
 			free(port_list[port_pos]);
 		free(port_list);
 	}
 
 	if (xcreated == 0) {
 		syslog(LOG_ERR, "could not create any services");
 		exit(1);
 	}
 
 	/* Expand svc_run() here so that we can call get_exportlist(). */
 	for (;;) {
 		if (got_sighup) {
 			get_exportlist();
 			got_sighup = 0;
 		}
 		readfds = svc_fdset;
 		switch (select(svc_maxfd + 1, &readfds, NULL, NULL, NULL)) {
 		case -1:
 			if (errno == EINTR)
                                 continue;
 			syslog(LOG_ERR, "mountd died: select: %m");
 			exit(1);
 		case 0:
 			continue;
 		default:
 			svc_getreqset(&readfds);
 		}
 	}
 } 
 
 /*
  * This routine creates and binds sockets on the appropriate
  * addresses. It gets called one time for each transport.
  * It returns 0 upon success, 1 for ingore the call and -1 to indicate
  * bind failed with EADDRINUSE.
  * Any file descriptors that have been created are stored in sock_fd and
  * the total count of them is maintained in sock_fdcnt.
  */
 static int
 create_service(struct netconfig *nconf)
 {
 	struct addrinfo hints, *res = NULL;
 	struct sockaddr_in *sin;
 	struct sockaddr_in6 *sin6;
 	struct __rpc_sockinfo si;
 	int aicode;
 	int fd;
 	int nhostsbak;
 	int one = 1;
 	int r;
 	u_int32_t host_addr[4];  /* IPv4 or IPv6 */
 	int mallocd_res;
 
 	if ((nconf->nc_semantics != NC_TPI_CLTS) &&
 	    (nconf->nc_semantics != NC_TPI_COTS) &&
 	    (nconf->nc_semantics != NC_TPI_COTS_ORD))
 		return (1);	/* not my type */
 
 	/*
 	 * XXX - using RPC library internal functions.
 	 */
 	if (!__rpc_nconf2sockinfo(nconf, &si)) {
 		syslog(LOG_ERR, "cannot get information for %s",
 		    nconf->nc_netid);
 		return (1);
 	}
 
 	/* Get mountd's address on this transport */
 	memset(&hints, 0, sizeof hints);
 	hints.ai_family = si.si_af;
 	hints.ai_socktype = si.si_socktype;
 	hints.ai_protocol = si.si_proto;
 
 	/*
 	 * Bind to specific IPs if asked to
 	 */
 	nhostsbak = nhosts;
 	while (nhostsbak > 0) {
 		--nhostsbak;
 		sock_fd = realloc(sock_fd, (sock_fdcnt + 1) * sizeof(int));
 		if (sock_fd == NULL)
 			out_of_mem();
 		sock_fd[sock_fdcnt++] = -1;	/* Set invalid for now. */
 		mallocd_res = 0;
 
 		hints.ai_flags = AI_PASSIVE;
 
 		/*	
 		 * XXX - using RPC library internal functions.
 		 */
 		if ((fd = __rpc_nconf2fd(nconf)) < 0) {
 			int non_fatal = 0;
 	    		if (errno == EAFNOSUPPORT &&
 			    nconf->nc_semantics != NC_TPI_CLTS) 
 				non_fatal = 1;
 				
 			syslog(non_fatal ? LOG_DEBUG : LOG_ERR, 
 			    "cannot create socket for %s", nconf->nc_netid);
 			if (non_fatal != 0)
 				continue;
 			exit(1);
 		}
 
 		switch (hints.ai_family) {
 		case AF_INET:
 			if (inet_pton(AF_INET, hosts[nhostsbak],
 			    host_addr) == 1) {
 				hints.ai_flags |= AI_NUMERICHOST;
 			} else {
 				/*
 				 * Skip if we have an AF_INET6 address.
 				 */
 				if (inet_pton(AF_INET6, hosts[nhostsbak],
 				    host_addr) == 1) {
 					close(fd);
 					continue;
 				}
 			}
 			break;
 		case AF_INET6:
 			if (inet_pton(AF_INET6, hosts[nhostsbak],
 			    host_addr) == 1) {
 				hints.ai_flags |= AI_NUMERICHOST;
 			} else {
 				/*
 				 * Skip if we have an AF_INET address.
 				 */
 				if (inet_pton(AF_INET, hosts[nhostsbak],
 				    host_addr) == 1) {
 					close(fd);
 					continue;
 				}
 			}
 
 			/*
 			 * We're doing host-based access checks here, so don't
 			 * allow v4-in-v6 to confuse things. The kernel will
 			 * disable it by default on NFS sockets too.
 			 */
 			if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one,
 			    sizeof one) < 0) {
 				syslog(LOG_ERR,
 				    "can't disable v4-in-v6 on IPv6 socket");
 				exit(1);
 			}
 			break;
 		default:
 			break;
 		}
 
 		/*
 		 * If no hosts were specified, just bind to INADDR_ANY
 		 */
 		if (strcmp("*", hosts[nhostsbak]) == 0) {
 			if (svcport_str == NULL) {
 				res = malloc(sizeof(struct addrinfo));
 				if (res == NULL) 
 					out_of_mem();
 				mallocd_res = 1;
 				res->ai_flags = hints.ai_flags;
 				res->ai_family = hints.ai_family;
 				res->ai_protocol = hints.ai_protocol;
 				switch (res->ai_family) {
 				case AF_INET:
 					sin = malloc(sizeof(struct sockaddr_in));
 					if (sin == NULL) 
 						out_of_mem();
 					sin->sin_family = AF_INET;
 					sin->sin_port = htons(0);
 					sin->sin_addr.s_addr = htonl(INADDR_ANY);
 					res->ai_addr = (struct sockaddr*) sin;
 					res->ai_addrlen = (socklen_t)
 					    sizeof(struct sockaddr_in);
 					break;
 				case AF_INET6:
 					sin6 = malloc(sizeof(struct sockaddr_in6));
 					if (sin6 == NULL)
 						out_of_mem();
 					sin6->sin6_family = AF_INET6;
 					sin6->sin6_port = htons(0);
 					sin6->sin6_addr = in6addr_any;
 					res->ai_addr = (struct sockaddr*) sin6;
 					res->ai_addrlen = (socklen_t)
 					    sizeof(struct sockaddr_in6);
 					break;
 				default:
 					syslog(LOG_ERR, "bad addr fam %d",
 					    res->ai_family);
 					exit(1);
 				}
 			} else { 
 				if ((aicode = getaddrinfo(NULL, svcport_str,
 				    &hints, &res)) != 0) {
 					syslog(LOG_ERR,
 					    "cannot get local address for %s: %s",
 					    nconf->nc_netid,
 					    gai_strerror(aicode));
 					close(fd);
 					continue;
 				}
 			}
 		} else {
 			if ((aicode = getaddrinfo(hosts[nhostsbak], svcport_str,
 			    &hints, &res)) != 0) {
 				syslog(LOG_ERR,
 				    "cannot get local address for %s: %s",
 				    nconf->nc_netid, gai_strerror(aicode));
 				close(fd);
 				continue;
 			}
 		}
 
 		/* Store the fd. */
 		sock_fd[sock_fdcnt - 1] = fd;
 
 		/* Now, attempt the bind. */
 		r = bindresvport_sa(fd, res->ai_addr);
 		if (r != 0) {
 			if (errno == EADDRINUSE && mallocd_svcport != 0) {
 				if (mallocd_res != 0) {
 					free(res->ai_addr);
 					free(res);
 				} else
 					freeaddrinfo(res);
 				return (-1);
 			}
 			syslog(LOG_ERR, "bindresvport_sa: %m");
 			exit(1);
 		}
 
 		if (svcport_str == NULL) {
 			svcport_str = malloc(NI_MAXSERV * sizeof(char));
 			if (svcport_str == NULL)
 				out_of_mem();
 			mallocd_svcport = 1;
 
 			if (getnameinfo(res->ai_addr,
 			    res->ai_addr->sa_len, NULL, NI_MAXHOST,
 			    svcport_str, NI_MAXSERV * sizeof(char),
 			    NI_NUMERICHOST | NI_NUMERICSERV))
 				errx(1, "Cannot get port number");
 		}
 		if (mallocd_res != 0) {
 			free(res->ai_addr);
 			free(res);
 		} else
 			freeaddrinfo(res);
 		res = NULL;
 	}
 	return (0);
 }
 
 /*
  * Called after all the create_service() calls have succeeded, to complete
  * the setup and registration.
  */
 static void
 complete_service(struct netconfig *nconf, char *port_str)
 {
 	struct addrinfo hints, *res = NULL;
 	struct __rpc_sockinfo si;
 	struct netbuf servaddr;
 	SVCXPRT	*transp = NULL;
 	int aicode, fd, nhostsbak;
 	int registered = 0;
 
 	if ((nconf->nc_semantics != NC_TPI_CLTS) &&
 	    (nconf->nc_semantics != NC_TPI_COTS) &&
 	    (nconf->nc_semantics != NC_TPI_COTS_ORD))
 		return;	/* not my type */
 
 	/*
 	 * XXX - using RPC library internal functions.
 	 */
 	if (!__rpc_nconf2sockinfo(nconf, &si)) {
 		syslog(LOG_ERR, "cannot get information for %s",
 		    nconf->nc_netid);
 		return;
 	}
 
 	nhostsbak = nhosts;
 	while (nhostsbak > 0) {
 		--nhostsbak;
 		if (sock_fdpos >= sock_fdcnt) {
 			/* Should never happen. */
 			syslog(LOG_ERR, "Ran out of socket fd's");
 			return;
 		}
 		fd = sock_fd[sock_fdpos++];
 		if (fd < 0)
 			continue;
 
 		if (nconf->nc_semantics != NC_TPI_CLTS)
 			listen(fd, SOMAXCONN);
 
 		if (nconf->nc_semantics == NC_TPI_CLTS )
 			transp = svc_dg_create(fd, 0, 0);
 		else 
 			transp = svc_vc_create(fd, RPC_MAXDATASIZE,
 			    RPC_MAXDATASIZE);
 
 		if (transp != (SVCXPRT *) NULL) {
 			if (!svc_reg(transp, MOUNTPROG, MOUNTVERS, mntsrv,
 			    NULL)) 
 				syslog(LOG_ERR,
 				    "can't register %s MOUNTVERS service",
 				    nconf->nc_netid);
 			if (!force_v2) {
 				if (!svc_reg(transp, MOUNTPROG, MOUNTVERS3,
 				    mntsrv, NULL)) 
 					syslog(LOG_ERR,
 					    "can't register %s MOUNTVERS3 service",
 					    nconf->nc_netid);
 			}
 		} else 
 			syslog(LOG_WARNING, "can't create %s services",
 			    nconf->nc_netid);
 
 		if (registered == 0) {
 			registered = 1;
 			memset(&hints, 0, sizeof hints);
 			hints.ai_flags = AI_PASSIVE;
 			hints.ai_family = si.si_af;
 			hints.ai_socktype = si.si_socktype;
 			hints.ai_protocol = si.si_proto;
 
 			if ((aicode = getaddrinfo(NULL, port_str, &hints,
 			    &res)) != 0) {
 				syslog(LOG_ERR, "cannot get local address: %s",
 				    gai_strerror(aicode));
 				exit(1);
 			}
 
 			servaddr.buf = malloc(res->ai_addrlen);
 			memcpy(servaddr.buf, res->ai_addr, res->ai_addrlen);
 			servaddr.len = res->ai_addrlen;
 
 			rpcb_set(MOUNTPROG, MOUNTVERS, nconf, &servaddr);
 			rpcb_set(MOUNTPROG, MOUNTVERS3, nconf, &servaddr);
 
 			xcreated++;
 			freeaddrinfo(res);
 		}
 	} /* end while */
 }
 
 /*
  * Clear out sockets after a failure to bind one of them, so that the
  * cycle of socket creation/binding can start anew.
  */
 static void
 clearout_service(void)
 {
 	int i;
 
 	for (i = 0; i < sock_fdcnt; i++) {
 		if (sock_fd[i] >= 0) {
 			shutdown(sock_fd[i], SHUT_RDWR);
 			close(sock_fd[i]);
 		}
 	}
 }
 
 static void
 usage(void)
 {
 	fprintf(stderr,
 		"usage: mountd [-2] [-d] [-e] [-l] [-n] [-p <port>] [-r] "
 		"[-S] [-h <bindip>] [export_file ...]\n");
 	exit(1);
 }
 
 /*
  * The mount rpc service
  */
 void
 mntsrv(struct svc_req *rqstp, SVCXPRT *transp)
 {
 	struct exportlist *ep;
 	struct dirlist *dp;
 	struct fhreturn fhr;
 	struct stat stb;
 	struct statfs fsb;
 	char host[NI_MAXHOST], numerichost[NI_MAXHOST];
 	int lookup_failed = 1;
 	struct sockaddr *saddr;
 	u_short sport;
 	char rpcpath[MNTPATHLEN + 1], dirpath[MAXPATHLEN];
 	int bad = 0, defset, hostset;
 	sigset_t sighup_mask;
 	int numsecflavors, *secflavorsp;
 
 	sigemptyset(&sighup_mask);
 	sigaddset(&sighup_mask, SIGHUP);
 	saddr = svc_getrpccaller(transp)->buf;
 	switch (saddr->sa_family) {
 	case AF_INET6:
 		sport = ntohs(((struct sockaddr_in6 *)saddr)->sin6_port);
 		break;
 	case AF_INET:
 		sport = ntohs(((struct sockaddr_in *)saddr)->sin_port);
 		break;
 	default:
 		syslog(LOG_ERR, "request from unknown address family");
 		return;
 	}
 	lookup_failed = getnameinfo(saddr, saddr->sa_len, host, sizeof host, 
 	    NULL, 0, 0);
 	getnameinfo(saddr, saddr->sa_len, numerichost,
 	    sizeof numerichost, NULL, 0, NI_NUMERICHOST);
 	switch (rqstp->rq_proc) {
 	case NULLPROC:
 		if (!svc_sendreply(transp, (xdrproc_t)xdr_void, NULL))
 			syslog(LOG_ERR, "can't send reply");
 		return;
 	case MOUNTPROC_MNT:
 		if (sport >= IPPORT_RESERVED && resvport_only) {
 			syslog(LOG_NOTICE,
 			    "mount request from %s from unprivileged port",
 			    numerichost);
 			svcerr_weakauth(transp);
 			return;
 		}
 		if (!svc_getargs(transp, (xdrproc_t)xdr_dir, rpcpath)) {
 			syslog(LOG_NOTICE, "undecodable mount request from %s",
 			    numerichost);
 			svcerr_decode(transp);
 			return;
 		}
 
 		/*
 		 * Get the real pathname and make sure it is a directory
 		 * or a regular file if the -r option was specified
 		 * and it exists.
 		 */
 		if (realpath(rpcpath, dirpath) == NULL ||
 		    stat(dirpath, &stb) < 0 ||
 		    (!S_ISDIR(stb.st_mode) &&
 		    (dir_only || !S_ISREG(stb.st_mode))) ||
 		    statfs(dirpath, &fsb) < 0) {
 			chdir("/");	/* Just in case realpath doesn't */
 			syslog(LOG_NOTICE,
 			    "mount request from %s for non existent path %s",
 			    numerichost, dirpath);
 			if (debug)
 				warnx("stat failed on %s", dirpath);
 			bad = ENOENT;	/* We will send error reply later */
 		}
 
 		/* Check in the exports list */
 		sigprocmask(SIG_BLOCK, &sighup_mask, NULL);
 		ep = ex_search(&fsb.f_fsid);
 		hostset = defset = 0;
 		if (ep && (chk_host(ep->ex_defdir, saddr, &defset, &hostset,
 		    &numsecflavors, &secflavorsp) ||
 		    ((dp = dirp_search(ep->ex_dirl, dirpath)) &&
 		      chk_host(dp, saddr, &defset, &hostset, &numsecflavors,
 		       &secflavorsp)) ||
 		    (defset && scan_tree(ep->ex_defdir, saddr) == 0 &&
 		     scan_tree(ep->ex_dirl, saddr) == 0))) {
 			if (bad) {
 				if (!svc_sendreply(transp, (xdrproc_t)xdr_long,
 				    (caddr_t)&bad))
 					syslog(LOG_ERR, "can't send reply");
 				sigprocmask(SIG_UNBLOCK, &sighup_mask, NULL);
 				return;
 			}
 			if (hostset & DP_HOSTSET) {
 				fhr.fhr_flag = hostset;
 				fhr.fhr_numsecflavors = numsecflavors;
 				fhr.fhr_secflavors = secflavorsp;
 			} else {
 				fhr.fhr_flag = defset;
 				fhr.fhr_numsecflavors = ep->ex_defnumsecflavors;
 				fhr.fhr_secflavors = ep->ex_defsecflavors;
 			}
 			fhr.fhr_vers = rqstp->rq_vers;
 			/* Get the file handle */
 			memset(&fhr.fhr_fh, 0, sizeof(nfsfh_t));
 			if (getfh(dirpath, (fhandle_t *)&fhr.fhr_fh) < 0) {
 				bad = errno;
 				syslog(LOG_ERR, "can't get fh for %s", dirpath);
 				if (!svc_sendreply(transp, (xdrproc_t)xdr_long,
 				    (caddr_t)&bad))
 					syslog(LOG_ERR, "can't send reply");
 				sigprocmask(SIG_UNBLOCK, &sighup_mask, NULL);
 				return;
 			}
 			if (!svc_sendreply(transp, (xdrproc_t)xdr_fhs,
 			    (caddr_t)&fhr))
 				syslog(LOG_ERR, "can't send reply");
 			if (!lookup_failed)
 				add_mlist(host, dirpath);
 			else
 				add_mlist(numerichost, dirpath);
 			if (debug)
 				warnx("mount successful");
 			if (dolog)
 				syslog(LOG_NOTICE,
 				    "mount request succeeded from %s for %s",
 				    numerichost, dirpath);
 		} else {
 			bad = EACCES;
 			syslog(LOG_NOTICE,
 			    "mount request denied from %s for %s",
 			    numerichost, dirpath);
 		}
 
 		if (bad && !svc_sendreply(transp, (xdrproc_t)xdr_long,
 		    (caddr_t)&bad))
 			syslog(LOG_ERR, "can't send reply");
 		sigprocmask(SIG_UNBLOCK, &sighup_mask, NULL);
 		return;
 	case MOUNTPROC_DUMP:
 		if (!svc_sendreply(transp, (xdrproc_t)xdr_mlist, (caddr_t)NULL))
 			syslog(LOG_ERR, "can't send reply");
 		else if (dolog)
 			syslog(LOG_NOTICE,
 			    "dump request succeeded from %s",
 			    numerichost);
 		return;
 	case MOUNTPROC_UMNT:
 		if (sport >= IPPORT_RESERVED && resvport_only) {
 			syslog(LOG_NOTICE,
 			    "umount request from %s from unprivileged port",
 			    numerichost);
 			svcerr_weakauth(transp);
 			return;
 		}
 		if (!svc_getargs(transp, (xdrproc_t)xdr_dir, rpcpath)) {
 			syslog(LOG_NOTICE, "undecodable umount request from %s",
 			    numerichost);
 			svcerr_decode(transp);
 			return;
 		}
 		if (realpath(rpcpath, dirpath) == NULL) {
 			syslog(LOG_NOTICE, "umount request from %s "
 			    "for non existent path %s",
 			    numerichost, dirpath);
 		}
 		if (!svc_sendreply(transp, (xdrproc_t)xdr_void, (caddr_t)NULL))
 			syslog(LOG_ERR, "can't send reply");
 		if (!lookup_failed)
 			del_mlist(host, dirpath);
 		del_mlist(numerichost, dirpath);
 		if (dolog)
 			syslog(LOG_NOTICE,
 			    "umount request succeeded from %s for %s",
 			    numerichost, dirpath);
 		return;
 	case MOUNTPROC_UMNTALL:
 		if (sport >= IPPORT_RESERVED && resvport_only) {
 			syslog(LOG_NOTICE,
 			    "umountall request from %s from unprivileged port",
 			    numerichost);
 			svcerr_weakauth(transp);
 			return;
 		}
 		if (!svc_sendreply(transp, (xdrproc_t)xdr_void, (caddr_t)NULL))
 			syslog(LOG_ERR, "can't send reply");
 		if (!lookup_failed)
 			del_mlist(host, NULL);
 		del_mlist(numerichost, NULL);
 		if (dolog)
 			syslog(LOG_NOTICE,
 			    "umountall request succeeded from %s",
 			    numerichost);
 		return;
 	case MOUNTPROC_EXPORT:
 		if (!svc_sendreply(transp, (xdrproc_t)xdr_explist, (caddr_t)NULL))
 			if (!svc_sendreply(transp, (xdrproc_t)xdr_explist_brief,
 			    (caddr_t)NULL))
 				syslog(LOG_ERR, "can't send reply");
 		if (dolog)
 			syslog(LOG_NOTICE,
 			    "export request succeeded from %s",
 			    numerichost);
 		return;
 	default:
 		svcerr_noproc(transp);
 		return;
 	}
 }
 
 /*
  * Xdr conversion for a dirpath string
  */
 static int
 xdr_dir(XDR *xdrsp, char *dirp)
 {
 	return (xdr_string(xdrsp, &dirp, MNTPATHLEN));
 }
 
 /*
  * Xdr routine to generate file handle reply
  */
 static int
 xdr_fhs(XDR *xdrsp, caddr_t cp)
 {
 	struct fhreturn *fhrp = (struct fhreturn *)cp;
 	u_long ok = 0, len, auth;
 	int i;
 
 	if (!xdr_long(xdrsp, &ok))
 		return (0);
 	switch (fhrp->fhr_vers) {
 	case 1:
 		return (xdr_opaque(xdrsp, (caddr_t)&fhrp->fhr_fh, NFSX_V2FH));
 	case 3:
 		len = NFSX_V3FH;
 		if (!xdr_long(xdrsp, &len))
 			return (0);
 		if (!xdr_opaque(xdrsp, (caddr_t)&fhrp->fhr_fh, len))
 			return (0);
 		if (fhrp->fhr_numsecflavors) {
 			if (!xdr_int(xdrsp, &fhrp->fhr_numsecflavors))
 				return (0);
 			for (i = 0; i < fhrp->fhr_numsecflavors; i++)
 				if (!xdr_int(xdrsp, &fhrp->fhr_secflavors[i]))
 					return (0);
 			return (1);
 		} else {
 			auth = AUTH_SYS;
 			len = 1;
 			if (!xdr_long(xdrsp, &len))
 				return (0);
 			return (xdr_long(xdrsp, &auth));
 		}
 	};
 	return (0);
 }
 
 static int
 xdr_mlist(XDR *xdrsp, caddr_t cp __unused)
 {
 	struct mountlist *mlp;
 	int true = 1;
 	int false = 0;
 	char *strp;
 
 	mlp = mlhead;
 	while (mlp) {
 		if (!xdr_bool(xdrsp, &true))
 			return (0);
 		strp = &mlp->ml_host[0];
 		if (!xdr_string(xdrsp, &strp, MNTNAMLEN))
 			return (0);
 		strp = &mlp->ml_dirp[0];
 		if (!xdr_string(xdrsp, &strp, MNTPATHLEN))
 			return (0);
 		mlp = mlp->ml_next;
 	}
 	if (!xdr_bool(xdrsp, &false))
 		return (0);
 	return (1);
 }
 
 /*
  * Xdr conversion for export list
  */
 static int
 xdr_explist_common(XDR *xdrsp, caddr_t cp __unused, int brief)
 {
 	struct exportlist *ep;
 	int false = 0;
 	int putdef;
 	sigset_t sighup_mask;
 
 	sigemptyset(&sighup_mask);
 	sigaddset(&sighup_mask, SIGHUP);
 	sigprocmask(SIG_BLOCK, &sighup_mask, NULL);
 	ep = exphead;
 	while (ep) {
 		putdef = 0;
 		if (put_exlist(ep->ex_dirl, xdrsp, ep->ex_defdir,
 			       &putdef, brief))
 			goto errout;
 		if (ep->ex_defdir && putdef == 0 &&
 			put_exlist(ep->ex_defdir, xdrsp, (struct dirlist *)NULL,
 			&putdef, brief))
 			goto errout;
 		ep = ep->ex_next;
 	}
 	sigprocmask(SIG_UNBLOCK, &sighup_mask, NULL);
 	if (!xdr_bool(xdrsp, &false))
 		return (0);
 	return (1);
 errout:
 	sigprocmask(SIG_UNBLOCK, &sighup_mask, NULL);
 	return (0);
 }
 
 /*
  * Called from xdr_explist() to traverse the tree and export the
  * directory paths.
  */
 static int
 put_exlist(struct dirlist *dp, XDR *xdrsp, struct dirlist *adp, int *putdefp,
 	int brief)
 {
 	struct grouplist *grp;
 	struct hostlist *hp;
 	int true = 1;
 	int false = 0;
 	int gotalldir = 0;
 	char *strp;
 
 	if (dp) {
 		if (put_exlist(dp->dp_left, xdrsp, adp, putdefp, brief))
 			return (1);
 		if (!xdr_bool(xdrsp, &true))
 			return (1);
 		strp = dp->dp_dirp;
 		if (!xdr_string(xdrsp, &strp, MNTPATHLEN))
 			return (1);
 		if (adp && !strcmp(dp->dp_dirp, adp->dp_dirp)) {
 			gotalldir = 1;
 			*putdefp = 1;
 		}
 		if (brief) {
 			if (!xdr_bool(xdrsp, &true))
 				return (1);
 			strp = "(...)";
 			if (!xdr_string(xdrsp, &strp, MNTPATHLEN))
 				return (1);
 		} else if ((dp->dp_flag & DP_DEFSET) == 0 &&
 		    (gotalldir == 0 || (adp->dp_flag & DP_DEFSET) == 0)) {
 			hp = dp->dp_hosts;
 			while (hp) {
 				grp = hp->ht_grp;
 				if (grp->gr_type == GT_HOST) {
 					if (!xdr_bool(xdrsp, &true))
 						return (1);
 					strp = grp->gr_ptr.gt_addrinfo->ai_canonname;
 					if (!xdr_string(xdrsp, &strp,
 					    MNTNAMLEN))
 						return (1);
 				} else if (grp->gr_type == GT_NET) {
 					if (!xdr_bool(xdrsp, &true))
 						return (1);
 					strp = grp->gr_ptr.gt_net.nt_name;
 					if (!xdr_string(xdrsp, &strp,
 					    MNTNAMLEN))
 						return (1);
 				}
 				hp = hp->ht_next;
 				if (gotalldir && hp == (struct hostlist *)NULL) {
 					hp = adp->dp_hosts;
 					gotalldir = 0;
 				}
 			}
 		}
 		if (!xdr_bool(xdrsp, &false))
 			return (1);
 		if (put_exlist(dp->dp_right, xdrsp, adp, putdefp, brief))
 			return (1);
 	}
 	return (0);
 }
 
 static int
 xdr_explist(XDR *xdrsp, caddr_t cp)
 {
 
 	return xdr_explist_common(xdrsp, cp, 0);
 }
 
 static int
 xdr_explist_brief(XDR *xdrsp, caddr_t cp)
 {
 
 	return xdr_explist_common(xdrsp, cp, 1);
 }
 
 static char *line;
 static size_t linesize;
 static FILE *exp_file;
 
 /*
  * Get the export list from one, currently open file
  */
 static void
 get_exportlist_one(void)
 {
 	struct exportlist *ep, *ep2;
 	struct grouplist *grp, *tgrp;
 	struct exportlist **epp;
 	struct dirlist *dirhead;
 	struct statfs fsb;
 	struct xucred anon;
 	char *cp, *endcp, *dirp, *hst, *usr, *dom, savedc;
 	int len, has_host, exflags, got_nondir, dirplen, netgrp;
 
 	v4root_phase = 0;
 	dirhead = (struct dirlist *)NULL;
 	while (get_line()) {
 		if (debug)
 			warnx("got line %s", line);
 		cp = line;
 		nextfield(&cp, &endcp);
 		if (*cp == '#')
 			goto nextline;
 
 		/*
 		 * Set defaults.
 		 */
 		has_host = FALSE;
 		anon = def_anon;
 		exflags = MNT_EXPORTED;
 		got_nondir = 0;
 		opt_flags = 0;
 		ep = (struct exportlist *)NULL;
 		dirp = NULL;
 
 		/*
 		 * Handle the V4 root dir.
 		 */
 		if (*cp == 'V' && *(cp + 1) == '4' && *(cp + 2) == ':') {
 			/*
 			 * V4: just indicates that it is the v4 root point,
 			 * so skip over that and set v4root_phase.
 			 */
 			if (v4root_phase > 0) {
 				syslog(LOG_ERR, "V4:duplicate line, ignored");
 				goto nextline;
 			}
 			v4root_phase = 1;
 			cp += 3;
 			nextfield(&cp, &endcp);
 		}
 
 		/*
 		 * Create new exports list entry
 		 */
 		len = endcp-cp;
 		tgrp = grp = get_grp();
 		while (len > 0) {
 			if (len > MNTNAMLEN) {
 			    getexp_err(ep, tgrp);
 			    goto nextline;
 			}
 			if (*cp == '-') {
 			    if (ep == (struct exportlist *)NULL) {
 				getexp_err(ep, tgrp);
 				goto nextline;
 			    }
 			    if (debug)
 				warnx("doing opt %s", cp);
 			    got_nondir = 1;
 			    if (do_opt(&cp, &endcp, ep, grp, &has_host,
 				&exflags, &anon)) {
 				getexp_err(ep, tgrp);
 				goto nextline;
 			    }
 			} else if (*cp == '/') {
 			    savedc = *endcp;
 			    *endcp = '\0';
 			    if (v4root_phase > 1) {
 				    if (dirp != NULL) {
 					syslog(LOG_ERR, "Multiple V4 dirs");
 					getexp_err(ep, tgrp);
 					goto nextline;
 				    }
 			    }
 			    if (check_dirpath(cp) &&
 				statfs(cp, &fsb) >= 0) {
 				if ((fsb.f_flags & MNT_AUTOMOUNTED) != 0)
 				    syslog(LOG_ERR, "Warning: exporting of "
 					"automounted fs %s not supported", cp);
 				if (got_nondir) {
 				    syslog(LOG_ERR, "dirs must be first");
 				    getexp_err(ep, tgrp);
 				    goto nextline;
 				}
 				if (v4root_phase == 1) {
 				    if (dirp != NULL) {
 					syslog(LOG_ERR, "Multiple V4 dirs");
 					getexp_err(ep, tgrp);
 					goto nextline;
 				    }
 				    if (strlen(v4root_dirpath) == 0) {
 					strlcpy(v4root_dirpath, cp,
 					    sizeof (v4root_dirpath));
 				    } else if (strcmp(v4root_dirpath, cp)
 					!= 0) {
 					syslog(LOG_ERR,
 					    "different V4 dirpath %s", cp);
 					getexp_err(ep, tgrp);
 					goto nextline;
 				    }
 				    dirp = cp;
 				    v4root_phase = 2;
 				    got_nondir = 1;
 				    ep = get_exp();
 				} else {
 				    if (ep) {
 					if (ep->ex_fs.val[0] !=
 					    fsb.f_fsid.val[0] ||
 					    ep->ex_fs.val[1] !=
 					    fsb.f_fsid.val[1]) {
 						getexp_err(ep, tgrp);
 						goto nextline;
 					}
 				    } else {
 					/*
 					 * See if this directory is already
 					 * in the list.
 					 */
 					ep = ex_search(&fsb.f_fsid);
 					if (ep == (struct exportlist *)NULL) {
 					    ep = get_exp();
 					    ep->ex_fs = fsb.f_fsid;
 					    ep->ex_fsdir = (char *)malloc
 					        (strlen(fsb.f_mntonname) + 1);
 					    if (ep->ex_fsdir)
 						strcpy(ep->ex_fsdir,
 						    fsb.f_mntonname);
 					    else
 						out_of_mem();
 					    if (debug)
 						warnx(
 						  "making new ep fs=0x%x,0x%x",
 						  fsb.f_fsid.val[0],
 						  fsb.f_fsid.val[1]);
 					} else if (debug)
 					    warnx("found ep fs=0x%x,0x%x",
 						fsb.f_fsid.val[0],
 						fsb.f_fsid.val[1]);
 				    }
 
 				    /*
 				     * Add dirpath to export mount point.
 				     */
 				    dirp = add_expdir(&dirhead, cp, len);
 				    dirplen = len;
 				}
 			    } else {
 				getexp_err(ep, tgrp);
 				goto nextline;
 			    }
 			    *endcp = savedc;
 			} else {
 			    savedc = *endcp;
 			    *endcp = '\0';
 			    got_nondir = 1;
 			    if (ep == (struct exportlist *)NULL) {
 				getexp_err(ep, tgrp);
 				goto nextline;
 			    }
 
 			    /*
 			     * Get the host or netgroup.
 			     */
 			    setnetgrent(cp);
 			    netgrp = getnetgrent(&hst, &usr, &dom);
 			    do {
 				if (has_host) {
 				    grp->gr_next = get_grp();
 				    grp = grp->gr_next;
 				}
 				if (netgrp) {
 				    if (hst == 0) {
 					syslog(LOG_ERR,
 				"null hostname in netgroup %s, skipping", cp);
 					grp->gr_type = GT_IGNORE;
 				    } else if (get_host(hst, grp, tgrp)) {
 					syslog(LOG_ERR,
 			"bad host %s in netgroup %s, skipping", hst, cp);
 					grp->gr_type = GT_IGNORE;
 				    }
 				} else if (get_host(cp, grp, tgrp)) {
 				    syslog(LOG_ERR, "bad host %s, skipping", cp);
 				    grp->gr_type = GT_IGNORE;
 				}
 				has_host = TRUE;
 			    } while (netgrp && getnetgrent(&hst, &usr, &dom));
 			    endnetgrent();
 			    *endcp = savedc;
 			}
 			cp = endcp;
 			nextfield(&cp, &endcp);
 			len = endcp - cp;
 		}
 		if (check_options(dirhead)) {
 			getexp_err(ep, tgrp);
 			goto nextline;
 		}
 		if (!has_host) {
 			grp->gr_type = GT_DEFAULT;
 			if (debug)
 				warnx("adding a default entry");
 
 		/*
 		 * Don't allow a network export coincide with a list of
 		 * host(s) on the same line.
 		 */
 		} else if ((opt_flags & OP_NET) && tgrp->gr_next) {
 			syslog(LOG_ERR, "network/host conflict");
 			getexp_err(ep, tgrp);
 			goto nextline;
 
 		/*
 		 * If an export list was specified on this line, make sure
 		 * that we have at least one valid entry, otherwise skip it.
 		 */
 		} else {
 			grp = tgrp;
 			while (grp && grp->gr_type == GT_IGNORE)
 				grp = grp->gr_next;
 			if (! grp) {
 			    getexp_err(ep, tgrp);
 			    goto nextline;
 			}
 		}
 
 		if (v4root_phase == 1) {
 			syslog(LOG_ERR, "V4:root, no dirp, ignored");
 			getexp_err(ep, tgrp);
 			goto nextline;
 		}
 
 		/*
 		 * Loop through hosts, pushing the exports into the kernel.
 		 * After loop, tgrp points to the start of the list and
 		 * grp points to the last entry in the list.
 		 */
 		grp = tgrp;
 		do {
 			if (do_mount(ep, grp, exflags, &anon, dirp, dirplen,
 			    &fsb)) {
 				getexp_err(ep, tgrp);
 				goto nextline;
 			}
 		} while (grp->gr_next && (grp = grp->gr_next));
 
 		/*
 		 * For V4: don't enter in mount lists.
 		 */
 		if (v4root_phase > 0 && v4root_phase <= 2) {
 			/*
 			 * Since these structures aren't used by mountd,
 			 * free them up now.
 			 */
 			if (ep != NULL)
 				free_exp(ep);
 			while (tgrp != NULL) {
 				grp = tgrp;
 				tgrp = tgrp->gr_next;
 				free_grp(grp);
 			}
 			goto nextline;
 		}
 
 		/*
 		 * Success. Update the data structures.
 		 */
 		if (has_host) {
 			hang_dirp(dirhead, tgrp, ep, opt_flags);
 			grp->gr_next = grphead;
 			grphead = tgrp;
 		} else {
 			hang_dirp(dirhead, (struct grouplist *)NULL, ep,
 				opt_flags);
 			free_grp(grp);
 		}
 		dirhead = (struct dirlist *)NULL;
 		if ((ep->ex_flag & EX_LINKED) == 0) {
 			ep2 = exphead;
 			epp = &exphead;
 
 			/*
 			 * Insert in the list in alphabetical order.
 			 */
 			while (ep2 && strcmp(ep2->ex_fsdir, ep->ex_fsdir) < 0) {
 				epp = &ep2->ex_next;
 				ep2 = ep2->ex_next;
 			}
 			if (ep2)
 				ep->ex_next = ep2;
 			*epp = ep;
 			ep->ex_flag |= EX_LINKED;
 		}
 nextline:
 		v4root_phase = 0;
 		if (dirhead) {
 			free_dir(dirhead);
 			dirhead = (struct dirlist *)NULL;
 		}
 	}
 }
 
 /*
  * Get the export list from all specified files
  */
 static void
 get_exportlist(void)
 {
 	struct exportlist *ep, *ep2;
 	struct grouplist *grp, *tgrp;
 	struct export_args export;
 	struct iovec *iov;
 	struct statfs *fsp, *mntbufp;
 	struct xvfsconf vfc;
 	char errmsg[255];
 	int num, i;
 	int iovlen;
 	int done;
 	struct nfsex_args eargs;
 
 	if (suspend_nfsd != 0)
 		(void)nfssvc(NFSSVC_SUSPENDNFSD, NULL);
 	v4root_dirpath[0] = '\0';
 	bzero(&export, sizeof(export));
 	export.ex_flags = MNT_DELEXPORT;
 	iov = NULL;
 	iovlen = 0;
 	bzero(errmsg, sizeof(errmsg));
 
 	/*
 	 * First, get rid of the old list
 	 */
 	ep = exphead;
 	while (ep) {
 		ep2 = ep;
 		ep = ep->ex_next;
 		free_exp(ep2);
 	}
 	exphead = (struct exportlist *)NULL;
 
 	grp = grphead;
 	while (grp) {
 		tgrp = grp;
 		grp = grp->gr_next;
 		free_grp(tgrp);
 	}
 	grphead = (struct grouplist *)NULL;
 
 	/*
 	 * and the old V4 root dir.
 	 */
 	bzero(&eargs, sizeof (eargs));
 	eargs.export.ex_flags = MNT_DELEXPORT;
 	if (nfssvc(NFSSVC_V4ROOTEXPORT, (caddr_t)&eargs) < 0 &&
 	    errno != ENOENT)
 		syslog(LOG_ERR, "Can't delete exports for V4:");
 
 	/*
 	 * and clear flag that notes if a public fh has been exported.
 	 */
 	has_publicfh = 0;
 
 	/*
 	 * And delete exports that are in the kernel for all local
 	 * filesystems.
 	 * XXX: Should know how to handle all local exportable filesystems.
 	 */
 	num = getmntinfo(&mntbufp, MNT_NOWAIT);
 
 	if (num > 0) {
 		build_iovec(&iov, &iovlen, "fstype", NULL, 0);
 		build_iovec(&iov, &iovlen, "fspath", NULL, 0);
 		build_iovec(&iov, &iovlen, "from", NULL, 0);
 		build_iovec(&iov, &iovlen, "update", NULL, 0);
 		build_iovec(&iov, &iovlen, "export", &export, sizeof(export));
 		build_iovec(&iov, &iovlen, "errmsg", errmsg, sizeof(errmsg));
 	}
 
 	for (i = 0; i < num; i++) {
 		fsp = &mntbufp[i];
 		if (getvfsbyname(fsp->f_fstypename, &vfc) != 0) {
 			syslog(LOG_ERR, "getvfsbyname() failed for %s",
 			    fsp->f_fstypename);
 			continue;
 		}
 
 		/*
 		 * We do not need to delete "export" flag from
 		 * filesystems that do not have it set.
 		 */
 		if (!(fsp->f_flags & MNT_EXPORTED))
 		    continue;
 		/*
 		 * Do not delete export for network filesystem by
 		 * passing "export" arg to nmount().
 		 * It only makes sense to do this for local filesystems.
 		 */
 		if (vfc.vfc_flags & VFCF_NETWORK)
 			continue;
 
 		iov[1].iov_base = fsp->f_fstypename;
 		iov[1].iov_len = strlen(fsp->f_fstypename) + 1;
 		iov[3].iov_base = fsp->f_mntonname;
 		iov[3].iov_len = strlen(fsp->f_mntonname) + 1;
 		iov[5].iov_base = fsp->f_mntfromname;
 		iov[5].iov_len = strlen(fsp->f_mntfromname) + 1;
 		errmsg[0] = '\0';
 
 		/*
 		 * EXDEV is returned when path exists but is not a
 		 * mount point.  May happens if raced with unmount.
 		 */
 		if (nmount(iov, iovlen, fsp->f_flags) < 0 &&
 		    errno != ENOENT && errno != ENOTSUP && errno != EXDEV) {
 			syslog(LOG_ERR,
 			    "can't delete exports for %s: %m %s",
 			    fsp->f_mntonname, errmsg);
 		}
 	}
 
 	if (iov != NULL) {
 		/* Free strings allocated by strdup() in getmntopts.c */
 		free(iov[0].iov_base); /* fstype */
 		free(iov[2].iov_base); /* fspath */
 		free(iov[4].iov_base); /* from */
 		free(iov[6].iov_base); /* update */
 		free(iov[8].iov_base); /* export */
 		free(iov[10].iov_base); /* errmsg */
 
 		/* free iov, allocated by realloc() */
 		free(iov);
 		iovlen = 0;
 	}
 
 	/*
 	 * Read in the exports file and build the list, calling
 	 * nmount() as we go along to push the export rules into the kernel.
 	 */
 	done = 0;
 	for (i = 0; exnames[i] != NULL; i++) {
 		if (debug)
 			warnx("reading exports from %s", exnames[i]);
 		if ((exp_file = fopen(exnames[i], "r")) == NULL) {
 			syslog(LOG_WARNING, "can't open %s", exnames[i]);
 			continue;
 		}
 		get_exportlist_one();
 		fclose(exp_file);
 		done++;
 	}
 	if (done == 0) {
 		syslog(LOG_ERR, "can't open any exports file");
 		exit(2);
 	}
 
 	/*
 	 * If there was no public fh, clear any previous one set.
 	 */
 	if (has_publicfh == 0)
 		(void) nfssvc(NFSSVC_NOPUBLICFH, NULL);
 
 	/* Resume the nfsd. If they weren't suspended, this is harmless. */
 	(void)nfssvc(NFSSVC_RESUMENFSD, NULL);
 }
 
 /*
  * Allocate an export list element
  */
 static struct exportlist *
 get_exp(void)
 {
 	struct exportlist *ep;
 
 	ep = (struct exportlist *)calloc(1, sizeof (struct exportlist));
 	if (ep == (struct exportlist *)NULL)
 		out_of_mem();
 	return (ep);
 }
 
 /*
  * Allocate a group list element
  */
 static struct grouplist *
 get_grp(void)
 {
 	struct grouplist *gp;
 
 	gp = (struct grouplist *)calloc(1, sizeof (struct grouplist));
 	if (gp == (struct grouplist *)NULL)
 		out_of_mem();
 	return (gp);
 }
 
 /*
  * Clean up upon an error in get_exportlist().
  */
 static void
 getexp_err(struct exportlist *ep, struct grouplist *grp)
 {
 	struct grouplist *tgrp;
 
 	if (!(opt_flags & OP_QUIET))
 		syslog(LOG_ERR, "bad exports list line %s", line);
 	if (ep && (ep->ex_flag & EX_LINKED) == 0)
 		free_exp(ep);
 	while (grp) {
 		tgrp = grp;
 		grp = grp->gr_next;
 		free_grp(tgrp);
 	}
 }
 
 /*
  * Search the export list for a matching fs.
  */
 static struct exportlist *
 ex_search(fsid_t *fsid)
 {
 	struct exportlist *ep;
 
 	ep = exphead;
 	while (ep) {
 		if (ep->ex_fs.val[0] == fsid->val[0] &&
 		    ep->ex_fs.val[1] == fsid->val[1])
 			return (ep);
 		ep = ep->ex_next;
 	}
 	return (ep);
 }
 
 /*
  * Add a directory path to the list.
  */
 static char *
 add_expdir(struct dirlist **dpp, char *cp, int len)
 {
 	struct dirlist *dp;
 
 	dp = (struct dirlist *)malloc(sizeof (struct dirlist) + len);
 	if (dp == (struct dirlist *)NULL)
 		out_of_mem();
 	dp->dp_left = *dpp;
 	dp->dp_right = (struct dirlist *)NULL;
 	dp->dp_flag = 0;
 	dp->dp_hosts = (struct hostlist *)NULL;
 	strcpy(dp->dp_dirp, cp);
 	*dpp = dp;
 	return (dp->dp_dirp);
 }
 
 /*
  * Hang the dir list element off the dirpath binary tree as required
  * and update the entry for host.
  */
 static void
 hang_dirp(struct dirlist *dp, struct grouplist *grp, struct exportlist *ep,
 	int flags)
 {
 	struct hostlist *hp;
 	struct dirlist *dp2;
 
 	if (flags & OP_ALLDIRS) {
 		if (ep->ex_defdir)
 			free((caddr_t)dp);
 		else
 			ep->ex_defdir = dp;
 		if (grp == (struct grouplist *)NULL) {
 			ep->ex_defdir->dp_flag |= DP_DEFSET;
 			/* Save the default security flavors list. */
 			ep->ex_defnumsecflavors = ep->ex_numsecflavors;
 			if (ep->ex_numsecflavors > 0)
 				memcpy(ep->ex_defsecflavors, ep->ex_secflavors,
 				    sizeof(ep->ex_secflavors));
 		} else while (grp) {
 			hp = get_ht();
 			hp->ht_grp = grp;
 			hp->ht_next = ep->ex_defdir->dp_hosts;
 			ep->ex_defdir->dp_hosts = hp;
 			/* Save the security flavors list for this host set. */
 			grp->gr_numsecflavors = ep->ex_numsecflavors;
 			if (ep->ex_numsecflavors > 0)
 				memcpy(grp->gr_secflavors, ep->ex_secflavors,
 				    sizeof(ep->ex_secflavors));
 			grp = grp->gr_next;
 		}
 	} else {
 
 		/*
 		 * Loop through the directories adding them to the tree.
 		 */
 		while (dp) {
 			dp2 = dp->dp_left;
 			add_dlist(&ep->ex_dirl, dp, grp, flags, ep);
 			dp = dp2;
 		}
 	}
 }
 
 /*
  * Traverse the binary tree either updating a node that is already there
  * for the new directory or adding the new node.
  */
 static void
 add_dlist(struct dirlist **dpp, struct dirlist *newdp, struct grouplist *grp,
 	int flags, struct exportlist *ep)
 {
 	struct dirlist *dp;
 	struct hostlist *hp;
 	int cmp;
 
 	dp = *dpp;
 	if (dp) {
 		cmp = strcmp(dp->dp_dirp, newdp->dp_dirp);
 		if (cmp > 0) {
 			add_dlist(&dp->dp_left, newdp, grp, flags, ep);
 			return;
 		} else if (cmp < 0) {
 			add_dlist(&dp->dp_right, newdp, grp, flags, ep);
 			return;
 		} else
 			free((caddr_t)newdp);
 	} else {
 		dp = newdp;
 		dp->dp_left = (struct dirlist *)NULL;
 		*dpp = dp;
 	}
 	if (grp) {
 
 		/*
 		 * Hang all of the host(s) off of the directory point.
 		 */
 		do {
 			hp = get_ht();
 			hp->ht_grp = grp;
 			hp->ht_next = dp->dp_hosts;
 			dp->dp_hosts = hp;
 			/* Save the security flavors list for this host set. */
 			grp->gr_numsecflavors = ep->ex_numsecflavors;
 			if (ep->ex_numsecflavors > 0)
 				memcpy(grp->gr_secflavors, ep->ex_secflavors,
 				    sizeof(ep->ex_secflavors));
 			grp = grp->gr_next;
 		} while (grp);
 	} else {
 		dp->dp_flag |= DP_DEFSET;
 		/* Save the default security flavors list. */
 		ep->ex_defnumsecflavors = ep->ex_numsecflavors;
 		if (ep->ex_numsecflavors > 0)
 			memcpy(ep->ex_defsecflavors, ep->ex_secflavors,
 			    sizeof(ep->ex_secflavors));
 	}
 }
 
 /*
  * Search for a dirpath on the export point.
  */
 static struct dirlist *
 dirp_search(struct dirlist *dp, char *dirp)
 {
 	int cmp;
 
 	if (dp) {
 		cmp = strcmp(dp->dp_dirp, dirp);
 		if (cmp > 0)
 			return (dirp_search(dp->dp_left, dirp));
 		else if (cmp < 0)
 			return (dirp_search(dp->dp_right, dirp));
 		else
 			return (dp);
 	}
 	return (dp);
 }
 
 /*
  * Scan for a host match in a directory tree.
  */
 static int
 chk_host(struct dirlist *dp, struct sockaddr *saddr, int *defsetp,
 	int *hostsetp, int *numsecflavors, int **secflavorsp)
 {
 	struct hostlist *hp;
 	struct grouplist *grp;
 	struct addrinfo *ai;
 
 	if (dp) {
 		if (dp->dp_flag & DP_DEFSET)
 			*defsetp = dp->dp_flag;
 		hp = dp->dp_hosts;
 		while (hp) {
 			grp = hp->ht_grp;
 			switch (grp->gr_type) {
 			case GT_HOST:
 				ai = grp->gr_ptr.gt_addrinfo;
 				for (; ai; ai = ai->ai_next) {
 					if (!sacmp(ai->ai_addr, saddr, NULL)) {
 						*hostsetp =
 						    (hp->ht_flag | DP_HOSTSET);
 						if (numsecflavors != NULL) {
 							*numsecflavors =
 							    grp->gr_numsecflavors;
 							*secflavorsp =
 							    grp->gr_secflavors;
 						}
 						return (1);
 					}
 				}
 				break;
 			case GT_NET:
 				if (!sacmp(saddr, (struct sockaddr *)
 				    &grp->gr_ptr.gt_net.nt_net,
 				    (struct sockaddr *)
 				    &grp->gr_ptr.gt_net.nt_mask)) {
 					*hostsetp = (hp->ht_flag | DP_HOSTSET);
 					if (numsecflavors != NULL) {
 						*numsecflavors =
 						    grp->gr_numsecflavors;
 						*secflavorsp =
 						    grp->gr_secflavors;
 					}
 					return (1);
 				}
 				break;
 			}
 			hp = hp->ht_next;
 		}
 	}
 	return (0);
 }
 
 /*
  * Scan tree for a host that matches the address.
  */
 static int
 scan_tree(struct dirlist *dp, struct sockaddr *saddr)
 {
 	int defset, hostset;
 
 	if (dp) {
 		if (scan_tree(dp->dp_left, saddr))
 			return (1);
 		if (chk_host(dp, saddr, &defset, &hostset, NULL, NULL))
 			return (1);
 		if (scan_tree(dp->dp_right, saddr))
 			return (1);
 	}
 	return (0);
 }
 
 /*
  * Traverse the dirlist tree and free it up.
  */
 static void
 free_dir(struct dirlist *dp)
 {
 
 	if (dp) {
 		free_dir(dp->dp_left);
 		free_dir(dp->dp_right);
 		free_host(dp->dp_hosts);
 		free((caddr_t)dp);
 	}
 }
 
 /*
  * Parse a colon separated list of security flavors
  */
 static int
 parsesec(char *seclist, struct exportlist *ep)
 {
 	char *cp, savedc;
 	int flavor;
 
 	ep->ex_numsecflavors = 0;
 	for (;;) {
 		cp = strchr(seclist, ':');
 		if (cp) {
 			savedc = *cp;
 			*cp = '\0';
 		}
 
 		if (!strcmp(seclist, "sys"))
 			flavor = AUTH_SYS;
 		else if (!strcmp(seclist, "krb5"))
 			flavor = RPCSEC_GSS_KRB5;
 		else if (!strcmp(seclist, "krb5i"))
 			flavor = RPCSEC_GSS_KRB5I;
 		else if (!strcmp(seclist, "krb5p"))
 			flavor = RPCSEC_GSS_KRB5P;
 		else {
 			if (cp)
 				*cp = savedc;
 			syslog(LOG_ERR, "bad sec flavor: %s", seclist);
 			return (1);
 		}
 		if (ep->ex_numsecflavors == MAXSECFLAVORS) {
 			if (cp)
 				*cp = savedc;
 			syslog(LOG_ERR, "too many sec flavors: %s", seclist);
 			return (1);
 		}
 		ep->ex_secflavors[ep->ex_numsecflavors] = flavor;
 		ep->ex_numsecflavors++;
 		if (cp) {
 			*cp = savedc;
 			seclist = cp + 1;
 		} else {
 			break;
 		}
 	}
 	return (0);
 }
 
 /*
  * Parse the option string and update fields.
  * Option arguments may either be -<option>=<value> or
  * -<option> <value>
  */
 static int
 do_opt(char **cpp, char **endcpp, struct exportlist *ep, struct grouplist *grp,
 	int *has_hostp, int *exflagsp, struct xucred *cr)
 {
 	char *cpoptarg, *cpoptend;
 	char *cp, *endcp, *cpopt, savedc, savedc2;
 	int allflag, usedarg;
 
 	savedc2 = '\0';
 	cpopt = *cpp;
 	cpopt++;
 	cp = *endcpp;
 	savedc = *cp;
 	*cp = '\0';
 	while (cpopt && *cpopt) {
 		allflag = 1;
 		usedarg = -2;
 		if ((cpoptend = strchr(cpopt, ','))) {
 			*cpoptend++ = '\0';
 			if ((cpoptarg = strchr(cpopt, '=')))
 				*cpoptarg++ = '\0';
 		} else {
 			if ((cpoptarg = strchr(cpopt, '=')))
 				*cpoptarg++ = '\0';
 			else {
 				*cp = savedc;
 				nextfield(&cp, &endcp);
 				**endcpp = '\0';
 				if (endcp > cp && *cp != '-') {
 					cpoptarg = cp;
 					savedc2 = *endcp;
 					*endcp = '\0';
 					usedarg = 0;
 				}
 			}
 		}
 		if (!strcmp(cpopt, "ro") || !strcmp(cpopt, "o")) {
 			*exflagsp |= MNT_EXRDONLY;
 		} else if (cpoptarg && (!strcmp(cpopt, "maproot") ||
 		    !(allflag = strcmp(cpopt, "mapall")) ||
 		    !strcmp(cpopt, "root") || !strcmp(cpopt, "r"))) {
 			usedarg++;
 			parsecred(cpoptarg, cr);
 			if (allflag == 0) {
 				*exflagsp |= MNT_EXPORTANON;
 				opt_flags |= OP_MAPALL;
 			} else
 				opt_flags |= OP_MAPROOT;
 		} else if (cpoptarg && (!strcmp(cpopt, "mask") ||
 		    !strcmp(cpopt, "m"))) {
 			if (get_net(cpoptarg, &grp->gr_ptr.gt_net, 1)) {
 				syslog(LOG_ERR, "bad mask: %s", cpoptarg);
 				return (1);
 			}
 			usedarg++;
 			opt_flags |= OP_MASK;
 		} else if (cpoptarg && (!strcmp(cpopt, "network") ||
 			!strcmp(cpopt, "n"))) {
 			if (strchr(cpoptarg, '/') != NULL) {
 				if (debug)
 					fprintf(stderr, "setting OP_MASKLEN\n");
 				opt_flags |= OP_MASKLEN;
 			}
 			if (grp->gr_type != GT_NULL) {
 				syslog(LOG_ERR, "network/host conflict");
 				return (1);
 			} else if (get_net(cpoptarg, &grp->gr_ptr.gt_net, 0)) {
 				syslog(LOG_ERR, "bad net: %s", cpoptarg);
 				return (1);
 			}
 			grp->gr_type = GT_NET;
 			*has_hostp = 1;
 			usedarg++;
 			opt_flags |= OP_NET;
 		} else if (!strcmp(cpopt, "alldirs")) {
 			opt_flags |= OP_ALLDIRS;
 		} else if (!strcmp(cpopt, "public")) {
 			*exflagsp |= MNT_EXPUBLIC;
 		} else if (!strcmp(cpopt, "webnfs")) {
 			*exflagsp |= (MNT_EXPUBLIC|MNT_EXRDONLY|MNT_EXPORTANON);
 			opt_flags |= OP_MAPALL;
 		} else if (cpoptarg && !strcmp(cpopt, "index")) {
 			ep->ex_indexfile = strdup(cpoptarg);
 		} else if (!strcmp(cpopt, "quiet")) {
 			opt_flags |= OP_QUIET;
 		} else if (cpoptarg && !strcmp(cpopt, "sec")) {
 			if (parsesec(cpoptarg, ep))
 				return (1);
 			opt_flags |= OP_SEC;
 			usedarg++;
 		} else {
 			syslog(LOG_ERR, "bad opt %s", cpopt);
 			return (1);
 		}
 		if (usedarg >= 0) {
 			*endcp = savedc2;
 			**endcpp = savedc;
 			if (usedarg > 0) {
 				*cpp = cp;
 				*endcpp = endcp;
 			}
 			return (0);
 		}
 		cpopt = cpoptend;
 	}
 	**endcpp = savedc;
 	return (0);
 }
 
 /*
  * Translate a character string to the corresponding list of network
  * addresses for a hostname.
  */
 static int
 get_host(char *cp, struct grouplist *grp, struct grouplist *tgrp)
 {
 	struct grouplist *checkgrp;
 	struct addrinfo *ai, *tai, hints;
 	int ecode;
 	char host[NI_MAXHOST];
 
 	if (grp->gr_type != GT_NULL) {
 		syslog(LOG_ERR, "Bad netgroup type for ip host %s", cp);
 		return (1);
 	}
 	memset(&hints, 0, sizeof hints);
 	hints.ai_flags = AI_CANONNAME;
 	hints.ai_protocol = IPPROTO_UDP;
 	ecode = getaddrinfo(cp, NULL, &hints, &ai);
 	if (ecode != 0) {
 		syslog(LOG_ERR,"can't get address info for host %s", cp);
 		return 1;
 	}
 	grp->gr_ptr.gt_addrinfo = ai;
 	while (ai != NULL) {
 		if (ai->ai_canonname == NULL) {
 			if (getnameinfo(ai->ai_addr, ai->ai_addrlen, host,
 			    sizeof host, NULL, 0, NI_NUMERICHOST) != 0)
 				strlcpy(host, "?", sizeof(host));
 			ai->ai_canonname = strdup(host);
 			ai->ai_flags |= AI_CANONNAME;
 		}
 		if (debug)
 			fprintf(stderr, "got host %s\n", ai->ai_canonname);
 		/*
 		 * Sanity check: make sure we don't already have an entry
 		 * for this host in the grouplist.
 		 */
 		for (checkgrp = tgrp; checkgrp != NULL;
 		    checkgrp = checkgrp->gr_next) {
 			if (checkgrp->gr_type != GT_HOST)
 				continue;
 			for (tai = checkgrp->gr_ptr.gt_addrinfo; tai != NULL;
 			    tai = tai->ai_next) {
 				if (sacmp(tai->ai_addr, ai->ai_addr, NULL) != 0)
 					continue;
 				if (debug)
 					fprintf(stderr,
 					    "ignoring duplicate host %s\n",
 					    ai->ai_canonname);
 				grp->gr_type = GT_IGNORE;
 				return (0);
 			}
 		}
 		ai = ai->ai_next;
 	}
 	grp->gr_type = GT_HOST;
 	return (0);
 }
 
 /*
  * Free up an exports list component
  */
 static void
 free_exp(struct exportlist *ep)
 {
 
 	if (ep->ex_defdir) {
 		free_host(ep->ex_defdir->dp_hosts);
 		free((caddr_t)ep->ex_defdir);
 	}
 	if (ep->ex_fsdir)
 		free(ep->ex_fsdir);
 	if (ep->ex_indexfile)
 		free(ep->ex_indexfile);
 	free_dir(ep->ex_dirl);
 	free((caddr_t)ep);
 }
 
 /*
  * Free hosts.
  */
 static void
 free_host(struct hostlist *hp)
 {
 	struct hostlist *hp2;
 
 	while (hp) {
 		hp2 = hp;
 		hp = hp->ht_next;
 		free((caddr_t)hp2);
 	}
 }
 
 static struct hostlist *
 get_ht(void)
 {
 	struct hostlist *hp;
 
 	hp = (struct hostlist *)malloc(sizeof (struct hostlist));
 	if (hp == (struct hostlist *)NULL)
 		out_of_mem();
 	hp->ht_next = (struct hostlist *)NULL;
 	hp->ht_flag = 0;
 	return (hp);
 }
 
 /*
  * Out of memory, fatal
  */
 static void
 out_of_mem(void)
 {
 
 	syslog(LOG_ERR, "out of memory");
 	exit(2);
 }
 
 /*
  * Do the nmount() syscall with the update flag to push the export info into
  * the kernel.
  */
 static int
 do_mount(struct exportlist *ep, struct grouplist *grp, int exflags,
     struct xucred *anoncrp, char *dirp, int dirplen, struct statfs *fsb)
 {
 	struct statfs fsb1;
 	struct addrinfo *ai;
 	struct export_args *eap;
 	char errmsg[255];
 	char *cp;
 	int done;
 	char savedc;
 	struct iovec *iov;
 	int i, iovlen;
 	int ret;
 	struct nfsex_args nfsea;
 
 	eap = &nfsea.export;
 
 	cp = NULL;
 	savedc = '\0';
 	iov = NULL;
 	iovlen = 0;
 	ret = 0;
 
 	bzero(eap, sizeof (struct export_args));
 	bzero(errmsg, sizeof(errmsg));
 	eap->ex_flags = exflags;
 	eap->ex_anon = *anoncrp;
 	eap->ex_indexfile = ep->ex_indexfile;
 	if (grp->gr_type == GT_HOST)
 		ai = grp->gr_ptr.gt_addrinfo;
 	else
 		ai = NULL;
 	eap->ex_numsecflavors = ep->ex_numsecflavors;
 	for (i = 0; i < eap->ex_numsecflavors; i++)
 		eap->ex_secflavors[i] = ep->ex_secflavors[i];
 	if (eap->ex_numsecflavors == 0) {
 		eap->ex_numsecflavors = 1;
 		eap->ex_secflavors[0] = AUTH_SYS;
 	}
 	done = FALSE;
 
 	if (v4root_phase == 0) {
 		build_iovec(&iov, &iovlen, "fstype", NULL, 0);
 		build_iovec(&iov, &iovlen, "fspath", NULL, 0);
 		build_iovec(&iov, &iovlen, "from", NULL, 0);
 		build_iovec(&iov, &iovlen, "update", NULL, 0);
 		build_iovec(&iov, &iovlen, "export", eap,
 		    sizeof (struct export_args));
 		build_iovec(&iov, &iovlen, "errmsg", errmsg, sizeof(errmsg));
 	}
 
 	while (!done) {
 		switch (grp->gr_type) {
 		case GT_HOST:
 			if (ai->ai_addr->sa_family == AF_INET6 && have_v6 == 0)
 				goto skip;
 			eap->ex_addr = ai->ai_addr;
 			eap->ex_addrlen = ai->ai_addrlen;
 			eap->ex_masklen = 0;
 			break;
 		case GT_NET:
 			if (grp->gr_ptr.gt_net.nt_net.ss_family == AF_INET6 &&
 			    have_v6 == 0)
 				goto skip;
 			eap->ex_addr =
 			    (struct sockaddr *)&grp->gr_ptr.gt_net.nt_net;
 			eap->ex_addrlen =
 			    ((struct sockaddr *)&grp->gr_ptr.gt_net.nt_net)->sa_len;
 			eap->ex_mask =
 			    (struct sockaddr *)&grp->gr_ptr.gt_net.nt_mask;
 			eap->ex_masklen = ((struct sockaddr *)&grp->gr_ptr.gt_net.nt_mask)->sa_len;
 			break;
 		case GT_DEFAULT:
 			eap->ex_addr = NULL;
 			eap->ex_addrlen = 0;
 			eap->ex_mask = NULL;
 			eap->ex_masklen = 0;
 			break;
 		case GT_IGNORE:
 			ret = 0;
 			goto error_exit;
 			break;
 		default:
 			syslog(LOG_ERR, "bad grouptype");
 			if (cp)
 				*cp = savedc;
 			ret = 1;
 			goto error_exit;
 		};
 
 		/*
 		 * For V4:, use the nfssvc() syscall, instead of mount().
 		 */
 		if (v4root_phase == 2) {
 			nfsea.fspec = v4root_dirpath;
 			if (nfssvc(NFSSVC_V4ROOTEXPORT, (caddr_t)&nfsea) < 0) {
 				syslog(LOG_ERR, "Exporting V4: failed");
 				return (2);
 			}
 		} else {
 			/*
 			 * XXX:
 			 * Maybe I should just use the fsb->f_mntonname path
 			 * instead of looping back up the dirp to the mount
 			 * point??
 			 * Also, needs to know how to export all types of local
 			 * exportable filesystems and not just "ufs".
 			 */
 			iov[1].iov_base = fsb->f_fstypename; /* "fstype" */
 			iov[1].iov_len = strlen(fsb->f_fstypename) + 1;
 			iov[3].iov_base = fsb->f_mntonname; /* "fspath" */
 			iov[3].iov_len = strlen(fsb->f_mntonname) + 1;
 			iov[5].iov_base = fsb->f_mntfromname; /* "from" */
 			iov[5].iov_len = strlen(fsb->f_mntfromname) + 1;
 			errmsg[0] = '\0';
 	
 			while (nmount(iov, iovlen, fsb->f_flags) < 0) {
 				if (cp)
 					*cp-- = savedc;
 				else
 					cp = dirp + dirplen - 1;
 				if (opt_flags & OP_QUIET) {
 					ret = 1;
 					goto error_exit;
 				}
 				if (errno == EPERM) {
 					if (debug)
 						warnx("can't change attributes for %s: %s",
 						    dirp, errmsg);
 					syslog(LOG_ERR,
 					   "can't change attributes for %s: %s",
 					    dirp, errmsg);
 					ret = 1;
 					goto error_exit;
 				}
 				if (opt_flags & OP_ALLDIRS) {
 					if (errno == EINVAL)
 						syslog(LOG_ERR,
 		"-alldirs requested but %s is not a filesystem mountpoint",
 						    dirp);
 					else
 						syslog(LOG_ERR,
 						    "could not remount %s: %m",
 						    dirp);
 					ret = 1;
 					goto error_exit;
 				}
 				/* back up over the last component */
 				while (*cp == '/' && cp > dirp)
 					cp--;
 				while (*(cp - 1) != '/' && cp > dirp)
 					cp--;
 				if (cp == dirp) {
 					if (debug)
 						warnx("mnt unsucc");
 					syslog(LOG_ERR, "can't export %s %s",
 					    dirp, errmsg);
 					ret = 1;
 					goto error_exit;
 				}
 				savedc = *cp;
 				*cp = '\0';
 				/*
 				 * Check that we're still on the same
 				 * filesystem.
 				 */
 				if (statfs(dirp, &fsb1) != 0 ||
 				    bcmp(&fsb1.f_fsid, &fsb->f_fsid,
 				    sizeof (fsb1.f_fsid)) != 0) {
 					*cp = savedc;
 					syslog(LOG_ERR,
 					    "can't export %s %s", dirp,
 					    errmsg);
 					ret = 1;
 					goto error_exit;
 				}
 			}
 		}
 
 		/*
 		 * For the experimental server:
 		 * If this is the public directory, get the file handle
 		 * and load it into the kernel via the nfssvc() syscall.
 		 */
 		if ((exflags & MNT_EXPUBLIC) != 0) {
 			fhandle_t fh;
 			char *public_name;
 
 			if (eap->ex_indexfile != NULL)
 				public_name = eap->ex_indexfile;
 			else
 				public_name = dirp;
 			if (getfh(public_name, &fh) < 0)
 				syslog(LOG_ERR,
 				    "Can't get public fh for %s", public_name);
 			else if (nfssvc(NFSSVC_PUBLICFH, (caddr_t)&fh) < 0)
 				syslog(LOG_ERR,
 				    "Can't set public fh for %s", public_name);
 			else
 				has_publicfh = 1;
 		}
 skip:
 		if (ai != NULL)
 			ai = ai->ai_next;
 		if (ai == NULL)
 			done = TRUE;
 	}
 	if (cp)
 		*cp = savedc;
 error_exit:
 	/* free strings allocated by strdup() in getmntopts.c */
 	if (iov != NULL) {
 		free(iov[0].iov_base); /* fstype */
 		free(iov[2].iov_base); /* fspath */
 		free(iov[4].iov_base); /* from */
 		free(iov[6].iov_base); /* update */
 		free(iov[8].iov_base); /* export */
 		free(iov[10].iov_base); /* errmsg */
 
 		/* free iov, allocated by realloc() */
 		free(iov);
 	}
 	return (ret);
 }
 
 /*
  * Translate a net address.
  *
  * If `maskflg' is nonzero, then `cp' is a netmask, not a network address.
  */
 static int
 get_net(char *cp, struct netmsk *net, int maskflg)
 {
 	struct netent *np = NULL;
 	char *name, *p, *prefp;
 	struct sockaddr_in sin;
 	struct sockaddr *sa = NULL;
 	struct addrinfo hints, *ai = NULL;
 	char netname[NI_MAXHOST];
 	long preflen;
 
 	p = prefp = NULL;
 	if ((opt_flags & OP_MASKLEN) && !maskflg) {
 		p = strchr(cp, '/');
 		*p = '\0';
 		prefp = p + 1;
 	}
 
 	/*
 	 * Check for a numeric address first. We wish to avoid
 	 * possible DNS lookups in getnetbyname().
 	 */
 	if (isxdigit(*cp) || *cp == ':') {
 		memset(&hints, 0, sizeof hints);
 		/* Ensure the mask and the network have the same family. */
 		if (maskflg && (opt_flags & OP_NET))
 			hints.ai_family = net->nt_net.ss_family;
 		else if (!maskflg && (opt_flags & OP_HAVEMASK))
 			hints.ai_family = net->nt_mask.ss_family;
 		else
 			hints.ai_family = AF_UNSPEC;
 		hints.ai_flags = AI_NUMERICHOST;
 		if (getaddrinfo(cp, NULL, &hints, &ai) == 0)
 			sa = ai->ai_addr;
 		if (sa != NULL && ai->ai_family == AF_INET) {
 			/*
 			 * The address in `cp' is really a network address, so
 			 * use inet_network() to re-interpret this correctly.
 			 * e.g. "127.1" means 127.1.0.0, not 127.0.0.1.
 			 */
 			bzero(&sin, sizeof sin);
 			sin.sin_family = AF_INET;
 			sin.sin_len = sizeof sin;
 			sin.sin_addr = inet_makeaddr(inet_network(cp), 0);
 			if (debug)
 				fprintf(stderr, "get_net: v4 addr %s\n",
 				    inet_ntoa(sin.sin_addr));
 			sa = (struct sockaddr *)&sin;
 		}
 	}
 	if (sa == NULL && (np = getnetbyname(cp)) != NULL) {
 		bzero(&sin, sizeof sin);
 		sin.sin_family = AF_INET;
 		sin.sin_len = sizeof sin;
 		sin.sin_addr = inet_makeaddr(np->n_net, 0);
 		sa = (struct sockaddr *)&sin;
 	}
 	if (sa == NULL)
 		goto fail;
 
 	if (maskflg) {
 		/* The specified sockaddr is a mask. */
 		if (checkmask(sa) != 0)
 			goto fail;
 		bcopy(sa, &net->nt_mask, sa->sa_len);
 		opt_flags |= OP_HAVEMASK;
 	} else {
 		/* The specified sockaddr is a network address. */
 		bcopy(sa, &net->nt_net, sa->sa_len);
 
 		/* Get a network name for the export list. */
 		if (np) {
 			name = np->n_name;
 		} else if (getnameinfo(sa, sa->sa_len, netname, sizeof netname,
 		   NULL, 0, NI_NUMERICHOST) == 0) {
 			name = netname;
 		} else {
 			goto fail;
 		}
 		if ((net->nt_name = strdup(name)) == NULL)
 			out_of_mem();
 
 		/*
 		 * Extract a mask from either a "/<masklen>" suffix, or
 		 * from the class of an IPv4 address.
 		 */
 		if (opt_flags & OP_MASKLEN) {
 			preflen = strtol(prefp, NULL, 10);
 			if (preflen < 0L || preflen == LONG_MAX)
 				goto fail;
 			bcopy(sa, &net->nt_mask, sa->sa_len);
 			if (makemask(&net->nt_mask, (int)preflen) != 0)
 				goto fail;
 			opt_flags |= OP_HAVEMASK;
 			*p = '/';
 		} else if (sa->sa_family == AF_INET &&
 		    (opt_flags & OP_MASK) == 0) {
 			in_addr_t addr;
 
 			addr = ((struct sockaddr_in *)sa)->sin_addr.s_addr;
 			if (IN_CLASSA(addr))
 				preflen = 8;
 			else if (IN_CLASSB(addr))
 				preflen = 16;
 			else if (IN_CLASSC(addr))
 				preflen = 24;
 			else if (IN_CLASSD(addr))
 				preflen = 28;
 			else
 				preflen = 32;	/* XXX */
 
 			bcopy(sa, &net->nt_mask, sa->sa_len);
 			makemask(&net->nt_mask, (int)preflen);
 			opt_flags |= OP_HAVEMASK;
 		}
 	}
 
 	if (ai)
 		freeaddrinfo(ai);
 	return 0;
 
 fail:
 	if (ai)
 		freeaddrinfo(ai);
 	return 1;
 }
 
 /*
  * Parse out the next white space separated field
  */
 static void
 nextfield(char **cp, char **endcp)
 {
 	char *p;
 
 	p = *cp;
 	while (*p == ' ' || *p == '\t')
 		p++;
 	if (*p == '\n' || *p == '\0')
 		*cp = *endcp = p;
 	else {
 		*cp = p++;
 		while (*p != ' ' && *p != '\t' && *p != '\n' && *p != '\0')
 			p++;
 		*endcp = p;
 	}
 }
 
 /*
  * Get an exports file line. Skip over blank lines and handle line
  * continuations.
  */
 static int
 get_line(void)
 {
 	char *p, *cp;
 	size_t len;
 	int totlen, cont_line;
 
 	/*
 	 * Loop around ignoring blank lines and getting all continuation lines.
 	 */
 	p = line;
 	totlen = 0;
 	do {
 		if ((p = fgetln(exp_file, &len)) == NULL)
 			return (0);
 		cp = p + len - 1;
 		cont_line = 0;
 		while (cp >= p &&
 		    (*cp == ' ' || *cp == '\t' || *cp == '\n' || *cp == '\\')) {
 			if (*cp == '\\')
 				cont_line = 1;
 			cp--;
 			len--;
 		}
 		if (cont_line) {
 			*++cp = ' ';
 			len++;
 		}
 		if (linesize < len + totlen + 1) {
 			linesize = len + totlen + 1;
 			line = realloc(line, linesize);
 			if (line == NULL)
 				out_of_mem();
 		}
 		memcpy(line + totlen, p, len);
 		totlen += len;
 		line[totlen] = '\0';
 	} while (totlen == 0 || cont_line);
 	return (1);
 }
 
 /*
  * Parse a description of a credential.
  */
 static void
 parsecred(char *namelist, struct xucred *cr)
 {
 	char *name;
 	int cnt;
 	char *names;
 	struct passwd *pw;
 	struct group *gr;
 	gid_t groups[XU_NGROUPS + 1];
 	int ngroups;
 
 	cr->cr_version = XUCRED_VERSION;
 	/*
 	 * Set up the unprivileged user.
 	 */
 	cr->cr_uid = -2;
 	cr->cr_groups[0] = -2;
 	cr->cr_ngroups = 1;
 	/*
 	 * Get the user's password table entry.
 	 */
-	names = strsep(&namelist, " \t\n");
+	names = strsep_quote(&namelist, " \t\n");
 	name = strsep(&names, ":");
+	/* Bug?  name could be NULL here */
 	if (isdigit(*name) || *name == '-')
 		pw = getpwuid(atoi(name));
 	else
 		pw = getpwnam(name);
 	/*
 	 * Credentials specified as those of a user.
 	 */
 	if (names == NULL) {
 		if (pw == NULL) {
 			syslog(LOG_ERR, "unknown user: %s", name);
 			return;
 		}
 		cr->cr_uid = pw->pw_uid;
 		ngroups = XU_NGROUPS + 1;
 		if (getgrouplist(pw->pw_name, pw->pw_gid, groups, &ngroups))
 			syslog(LOG_ERR, "too many groups");
 		/*
 		 * Compress out duplicate.
 		 */
 		cr->cr_ngroups = ngroups - 1;
 		cr->cr_groups[0] = groups[0];
 		for (cnt = 2; cnt < ngroups; cnt++)
 			cr->cr_groups[cnt - 1] = groups[cnt];
 		return;
 	}
 	/*
 	 * Explicit credential specified as a colon separated list:
 	 *	uid:gid:gid:...
 	 */
 	if (pw != NULL)
 		cr->cr_uid = pw->pw_uid;
 	else if (isdigit(*name) || *name == '-')
 		cr->cr_uid = atoi(name);
 	else {
 		syslog(LOG_ERR, "unknown user: %s", name);
 		return;
 	}
 	cr->cr_ngroups = 0;
 	while (names != NULL && *names != '\0' && cr->cr_ngroups < XU_NGROUPS) {
 		name = strsep(&names, ":");
 		if (isdigit(*name) || *name == '-') {
 			cr->cr_groups[cr->cr_ngroups++] = atoi(name);
 		} else {
 			if ((gr = getgrnam(name)) == NULL) {
 				syslog(LOG_ERR, "unknown group: %s", name);
 				continue;
 			}
 			cr->cr_groups[cr->cr_ngroups++] = gr->gr_gid;
 		}
 	}
 	if (names != NULL && *names != '\0' && cr->cr_ngroups == XU_NGROUPS)
 		syslog(LOG_ERR, "too many groups");
 }
 
 #define	STRSIZ	(MNTNAMLEN+MNTPATHLEN+50)
 /*
  * Routines that maintain the remote mounttab
  */
 static void
 get_mountlist(void)
 {
 	struct mountlist *mlp, **mlpp;
 	char *host, *dirp, *cp;
 	char str[STRSIZ];
 	FILE *mlfile;
 
 	if ((mlfile = fopen(_PATH_RMOUNTLIST, "r")) == NULL) {
 		if (errno == ENOENT)
 			return;
 		else {
 			syslog(LOG_ERR, "can't open %s", _PATH_RMOUNTLIST);
 			return;
 		}
 	}
 	mlpp = &mlhead;
 	while (fgets(str, STRSIZ, mlfile) != NULL) {
 		cp = str;
 		host = strsep(&cp, " \t\n");
 		dirp = strsep(&cp, " \t\n");
 		if (host == NULL || dirp == NULL)
 			continue;
 		mlp = (struct mountlist *)malloc(sizeof (*mlp));
 		if (mlp == (struct mountlist *)NULL)
 			out_of_mem();
 		strncpy(mlp->ml_host, host, MNTNAMLEN);
 		mlp->ml_host[MNTNAMLEN] = '\0';
 		strncpy(mlp->ml_dirp, dirp, MNTPATHLEN);
 		mlp->ml_dirp[MNTPATHLEN] = '\0';
 		mlp->ml_next = (struct mountlist *)NULL;
 		*mlpp = mlp;
 		mlpp = &mlp->ml_next;
 	}
 	fclose(mlfile);
 }
 
 static void
 del_mlist(char *hostp, char *dirp)
 {
 	struct mountlist *mlp, **mlpp;
 	struct mountlist *mlp2;
 	FILE *mlfile;
 	int fnd = 0;
 
 	mlpp = &mlhead;
 	mlp = mlhead;
 	while (mlp) {
 		if (!strcmp(mlp->ml_host, hostp) &&
 		    (!dirp || !strcmp(mlp->ml_dirp, dirp))) {
 			fnd = 1;
 			mlp2 = mlp;
 			*mlpp = mlp = mlp->ml_next;
 			free((caddr_t)mlp2);
 		} else {
 			mlpp = &mlp->ml_next;
 			mlp = mlp->ml_next;
 		}
 	}
 	if (fnd) {
 		if ((mlfile = fopen(_PATH_RMOUNTLIST, "w")) == NULL) {
 			syslog(LOG_ERR,"can't update %s", _PATH_RMOUNTLIST);
 			return;
 		}
 		mlp = mlhead;
 		while (mlp) {
 			fprintf(mlfile, "%s %s\n", mlp->ml_host, mlp->ml_dirp);
 			mlp = mlp->ml_next;
 		}
 		fclose(mlfile);
 	}
 }
 
 static void
 add_mlist(char *hostp, char *dirp)
 {
 	struct mountlist *mlp, **mlpp;
 	FILE *mlfile;
 
 	mlpp = &mlhead;
 	mlp = mlhead;
 	while (mlp) {
 		if (!strcmp(mlp->ml_host, hostp) && !strcmp(mlp->ml_dirp, dirp))
 			return;
 		mlpp = &mlp->ml_next;
 		mlp = mlp->ml_next;
 	}
 	mlp = (struct mountlist *)malloc(sizeof (*mlp));
 	if (mlp == (struct mountlist *)NULL)
 		out_of_mem();
 	strncpy(mlp->ml_host, hostp, MNTNAMLEN);
 	mlp->ml_host[MNTNAMLEN] = '\0';
 	strncpy(mlp->ml_dirp, dirp, MNTPATHLEN);
 	mlp->ml_dirp[MNTPATHLEN] = '\0';
 	mlp->ml_next = (struct mountlist *)NULL;
 	*mlpp = mlp;
 	if ((mlfile = fopen(_PATH_RMOUNTLIST, "a")) == NULL) {
 		syslog(LOG_ERR, "can't update %s", _PATH_RMOUNTLIST);
 		return;
 	}
 	fprintf(mlfile, "%s %s\n", mlp->ml_host, mlp->ml_dirp);
 	fclose(mlfile);
 }
 
 /*
  * Free up a group list.
  */
 static void
 free_grp(struct grouplist *grp)
 {
 	if (grp->gr_type == GT_HOST) {
 		if (grp->gr_ptr.gt_addrinfo != NULL)
 			freeaddrinfo(grp->gr_ptr.gt_addrinfo);
 	} else if (grp->gr_type == GT_NET) {
 		if (grp->gr_ptr.gt_net.nt_name)
 			free(grp->gr_ptr.gt_net.nt_name);
 	}
 	free((caddr_t)grp);
 }
 
 #ifdef DEBUG
 static void
 SYSLOG(int pri, const char *fmt, ...)
 {
 	va_list ap;
 
 	va_start(ap, fmt);
 	vfprintf(stderr, fmt, ap);
 	va_end(ap);
 }
 #endif /* DEBUG */
 
 /*
  * Check options for consistency.
  */
 static int
 check_options(struct dirlist *dp)
 {
 
 	if (v4root_phase == 0 && dp == NULL)
 	    return (1);
 	if ((opt_flags & (OP_MAPROOT | OP_MAPALL)) == (OP_MAPROOT | OP_MAPALL)) {
 	    syslog(LOG_ERR, "-mapall and -maproot mutually exclusive");
 	    return (1);
 	}
 	if ((opt_flags & OP_MASK) && (opt_flags & OP_NET) == 0) {
 		syslog(LOG_ERR, "-mask requires -network");
 		return (1);
 	}
 	if ((opt_flags & OP_NET) && (opt_flags & OP_HAVEMASK) == 0) {
 		syslog(LOG_ERR, "-network requires mask specification");
 		return (1);
 	}
 	if ((opt_flags & OP_MASK) && (opt_flags & OP_MASKLEN)) {
 		syslog(LOG_ERR, "-mask and /masklen are mutually exclusive");
 		return (1);
 	}
 	if (v4root_phase > 0 &&
 	    (opt_flags &
 	     ~(OP_SEC | OP_MASK | OP_NET | OP_HAVEMASK | OP_MASKLEN)) != 0) {
 	    syslog(LOG_ERR,"only -sec,-net,-mask options allowed on V4:");
 	    return (1);
 	}
 	if ((opt_flags & OP_ALLDIRS) && dp->dp_left) {
 	    syslog(LOG_ERR, "-alldirs has multiple directories");
 	    return (1);
 	}
 	return (0);
 }
 
 /*
  * Check an absolute directory path for any symbolic links. Return true
  */
 static int
 check_dirpath(char *dirp)
 {
 	char *cp;
 	int ret = 1;
 	struct stat sb;
 
 	cp = dirp + 1;
 	while (*cp && ret) {
 		if (*cp == '/') {
 			*cp = '\0';
 			if (lstat(dirp, &sb) < 0 || !S_ISDIR(sb.st_mode))
 				ret = 0;
 			*cp = '/';
 		}
 		cp++;
 	}
 	if (lstat(dirp, &sb) < 0 || !S_ISDIR(sb.st_mode))
 		ret = 0;
 	return (ret);
 }
 
 /*
  * Make a netmask according to the specified prefix length. The ss_family
  * and other non-address fields must be initialised before calling this.
  */
 static int
 makemask(struct sockaddr_storage *ssp, int bitlen)
 {
 	u_char *p;
 	int bits, i, len;
 
 	if ((p = sa_rawaddr((struct sockaddr *)ssp, &len)) == NULL)
 		return (-1);
 	if (bitlen > len * CHAR_BIT)
 		return (-1);
 
 	for (i = 0; i < len; i++) {
 		bits = (bitlen > CHAR_BIT) ? CHAR_BIT : bitlen;
 		*p++ = (u_char)~0 << (CHAR_BIT - bits);
 		bitlen -= bits;
 	}
 	return 0;
 }
 
 /*
  * Check that the sockaddr is a valid netmask. Returns 0 if the mask
  * is acceptable (i.e. of the form 1...10....0).
  */
 static int
 checkmask(struct sockaddr *sa)
 {
 	u_char *mask;
 	int i, len;
 
 	if ((mask = sa_rawaddr(sa, &len)) == NULL)
 		return (-1);
 
 	for (i = 0; i < len; i++)
 		if (mask[i] != 0xff)
 			break;
 	if (i < len) {
 		if (~mask[i] & (u_char)(~mask[i] + 1))
 			return (-1);
 		i++;
 	}
 	for (; i < len; i++)
 		if (mask[i] != 0)
 			return (-1);
 	return (0);
 }
 
 /*
  * Compare two sockaddrs according to a specified mask. Return zero if
  * `sa1' matches `sa2' when filtered by the netmask in `samask'.
  * If samask is NULL, perform a full comparison.
  */
 static int
 sacmp(struct sockaddr *sa1, struct sockaddr *sa2, struct sockaddr *samask)
 {
 	unsigned char *p1, *p2, *mask;
 	int len, i;
 
 	if (sa1->sa_family != sa2->sa_family ||
 	    (p1 = sa_rawaddr(sa1, &len)) == NULL ||
 	    (p2 = sa_rawaddr(sa2, NULL)) == NULL)
 		return (1);
 
 	switch (sa1->sa_family) {
 	case AF_INET6:
 		if (((struct sockaddr_in6 *)sa1)->sin6_scope_id !=
 		    ((struct sockaddr_in6 *)sa2)->sin6_scope_id)
 			return (1);
 		break;
 	}
 
 	/* Simple binary comparison if no mask specified. */
 	if (samask == NULL)
 		return (memcmp(p1, p2, len));
 
 	/* Set up the mask, and do a mask-based comparison. */
 	if (sa1->sa_family != samask->sa_family ||
 	    (mask = sa_rawaddr(samask, NULL)) == NULL)
 		return (1);
 
 	for (i = 0; i < len; i++)
 		if ((p1[i] & mask[i]) != (p2[i] & mask[i]))
 			return (1);
 	return (0);
 }
 
 /*
  * Return a pointer to the part of the sockaddr that contains the
  * raw address, and set *nbytes to its length in bytes. Returns
  * NULL if the address family is unknown.
  */
 static void *
 sa_rawaddr(struct sockaddr *sa, int *nbytes) {
 	void *p;
 	int len;
 
 	switch (sa->sa_family) {
 	case AF_INET:
 		len = sizeof(((struct sockaddr_in *)sa)->sin_addr);
 		p = &((struct sockaddr_in *)sa)->sin_addr;
 		break;
 	case AF_INET6:
 		len = sizeof(((struct sockaddr_in6 *)sa)->sin6_addr);
 		p = &((struct sockaddr_in6 *)sa)->sin6_addr;
 		break;
 	default:
 		p = NULL;
 		len = 0;
 	}
 
 	if (nbytes != NULL)
 		*nbytes = len;
 	return (p);
 }
 
 static void
 huphandler(int sig __unused)
 {
 
 	got_sighup = 1;
 }
 
 static void
 terminate(int sig __unused)
 {
 	pidfile_remove(pfh);
 	rpcb_unset(MOUNTPROG, MOUNTVERS, NULL);
 	rpcb_unset(MOUNTPROG, MOUNTVERS3, NULL);
 	exit (0);
 }
Index: projects/release-pkg/usr.sbin/rpcbind/Makefile
===================================================================
--- projects/release-pkg/usr.sbin/rpcbind/Makefile	(revision 293335)
+++ projects/release-pkg/usr.sbin/rpcbind/Makefile	(revision 293336)
@@ -1,21 +1,25 @@
 #	$NetBSD: Makefile,v 1.3 2000/06/20 13:56:43 fvdl Exp $
 # $FreeBSD$
 
 .include <src.opts.mk>
 
 PROG=	rpcbind
 MAN=	rpcbind.8
 SRCS=	check_bound.c rpcb_stat.c rpcb_svc_4.c rpcbind.c pmap_svc.c \
 	rpcb_svc.c rpcb_svc_com.c security.c warmstart.c util.c
 
 CFLAGS+= -DPORTMAP -DLIBWRAP
 
 .if ${MK_INET6_SUPPORT} != "no"
 CFLAGS+= -DINET6
 .endif
 
+.if ${MK_TESTS} != "no"
+SUBDIR+=	tests
+.endif
+
 WARNS?=	1
 
 LIBADD=	wrap
 
 .include <bsd.prog.mk>
Index: projects/release-pkg/usr.sbin/rpcbind/check_bound.c
===================================================================
--- projects/release-pkg/usr.sbin/rpcbind/check_bound.c	(revision 293335)
+++ projects/release-pkg/usr.sbin/rpcbind/check_bound.c	(revision 293336)
@@ -1,230 +1,241 @@
 /*	$NetBSD: check_bound.c,v 1.2 2000/06/22 08:09:26 fvdl Exp $	*/
 /*	$FreeBSD$ */
 
 /*-
  * Copyright (c) 2009, Sun Microsystems, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  * - Redistributions of source code must retain the above copyright notice,
  *   this list of conditions and the following disclaimer.
  * - Redistributions in binary form must reproduce the above copyright notice,
  *   this list of conditions and the following disclaimer in the documentation
  *   and/or other materials provided with the distribution.
  * - Neither the name of Sun Microsystems, Inc. nor the names of its
  *   contributors may be used to endorse or promote products derived
  *   from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 /*
  * Copyright (c) 1986 - 1991 by Sun Microsystems, Inc.
  */
 
 /* #ident	"@(#)check_bound.c	1.15	93/07/05 SMI" */
 
 #if 0
 #ifndef lint
 static	char sccsid[] = "@(#)check_bound.c 1.11 89/04/21 Copyr 1989 Sun Micro";
 #endif
 #endif
 
 /*
  * check_bound.c
  * Checks to see whether the program is still bound to the
  * claimed address and returns the universal merged address
  *
  */
 
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <rpc/rpc.h>
+#include <rpc/svc_dg.h>
 #include <stdio.h>
 #include <netconfig.h>
 #include <syslog.h>
 #include <string.h>
 #include <unistd.h>
 #include <stdlib.h>
 
 #include "rpcbind.h"
 
 struct fdlist {
 	int fd;
 	struct netconfig *nconf;
 	struct fdlist *next;
 	int check_binding;
 };
 
 static struct fdlist *fdhead;	/* Link list of the check fd's */
 static struct fdlist *fdtail;
 static char *nullstring = "";
 
 static bool_t check_bound(struct fdlist *, char *uaddr);
 
 /*
  * Returns 1 if the given address is bound for the given addr & transport
  * For all error cases, we assume that the address is bound
  * Returns 0 for success.
  */
 static bool_t
 check_bound(struct fdlist *fdl, char *uaddr)
 {
 	int fd;
 	struct netbuf *na;
 	int ans;
 
 	if (fdl->check_binding == FALSE)
 		return (TRUE);
 
 	na = uaddr2taddr(fdl->nconf, uaddr);
 	if (!na)
 		return (TRUE); /* punt, should never happen */
 
 	fd = __rpc_nconf2fd(fdl->nconf);
 	if (fd < 0) {
 		free(na->buf);
 		free(na);
 		return (TRUE);
 	}
 
 	ans = bind(fd, (struct sockaddr *)na->buf, na->len);
 
 	close(fd);
 	free(na->buf);
 	free(na);
 
 	return (ans == 0 ? FALSE : TRUE);
 }
 
 int
 add_bndlist(struct netconfig *nconf, struct netbuf *baddr __unused)
 {
 	struct fdlist *fdl;
 	struct netconfig *newnconf;
 
 	newnconf = getnetconfigent(nconf->nc_netid);
 	if (newnconf == NULL)
 		return (-1);
 	fdl = malloc(sizeof (struct fdlist));
 	if (fdl == NULL) {
 		freenetconfigent(newnconf);
 		syslog(LOG_ERR, "no memory!");
 		return (-1);
 	}
 	fdl->nconf = newnconf;
 	fdl->next = NULL;
 	if (fdhead == NULL) {
 		fdhead = fdl;
 		fdtail = fdl;
 	} else {
 		fdtail->next = fdl;
 		fdtail = fdl;
 	}
 	/* XXX no bound checking for now */
 	fdl->check_binding = FALSE;
 
 	return 0;
 }
 
 bool_t
 is_bound(char *netid, char *uaddr)
 {
 	struct fdlist *fdl;
 
 	for (fdl = fdhead; fdl; fdl = fdl->next)
 		if (strcmp(fdl->nconf->nc_netid, netid) == 0)
 			break;
 	if (fdl == NULL)
 		return (TRUE);
 	return (check_bound(fdl, uaddr));
 }
 
 /*
  * Returns NULL if there was some system error.
  * Returns "" if the address was not bound, i.e the server crashed.
  * Returns the merged address otherwise.
  */
 char *
 mergeaddr(SVCXPRT *xprt, char *netid, char *uaddr, char *saddr)
 {
 	struct fdlist *fdl;
+	struct svc_dg_data *dg_data;
 	char *c_uaddr, *s_uaddr, *m_uaddr, *allocated_uaddr = NULL;
 
 	for (fdl = fdhead; fdl; fdl = fdl->next)
 		if (strcmp(fdl->nconf->nc_netid, netid) == 0)
 			break;
 	if (fdl == NULL)
 		return (NULL);
 	if (check_bound(fdl, uaddr) == FALSE)
 		/* that server died */
 		return (nullstring);
 	/*
+	 * Try to determine the local address on which the client contacted us,
+	 * so we can send a reply from the same address.  If it's unknown, then
+	 * try to determine which address the client used, and pick a nearby
+	 * local address.
+	 *
 	 * If saddr is not NULL, the remote client may have included the
 	 * address by which it contacted us.  Use that for the "client" uaddr,
 	 * otherwise use the info from the SVCXPRT.
 	 */
-	if (saddr != NULL) {
+	dg_data = (struct svc_dg_data*)xprt->xp_p2;
+	if (dg_data != NULL && dg_data->su_srcaddr.buf != NULL) {
+		c_uaddr = taddr2uaddr(fdl->nconf, &dg_data->su_srcaddr);
+	}
+	else if (saddr != NULL) {
 		c_uaddr = saddr;
 	} else {
 		c_uaddr = taddr2uaddr(fdl->nconf, svc_getrpccaller(xprt));
 		if (c_uaddr == NULL) {
 			syslog(LOG_ERR, "taddr2uaddr failed for %s",
 				fdl->nconf->nc_netid);
 			return (NULL);
 		}
 		allocated_uaddr = c_uaddr;
 	}
 
 #ifdef ND_DEBUG
 	if (debugging) {
 		if (saddr == NULL) {
 			fprintf(stderr, "mergeaddr: client uaddr = %s\n",
 			    c_uaddr);
 		} else {
 			fprintf(stderr, "mergeaddr: contact uaddr = %s\n",
 			    c_uaddr);
 		}
 	}
 #endif
 	s_uaddr = uaddr;
 	/*
 	 * This is all we should need for IP 4 and 6
 	 */
 	m_uaddr = addrmerge(svc_getrpccaller(xprt), s_uaddr, c_uaddr, netid);
 #ifdef ND_DEBUG
 	if (debugging)
 		fprintf(stderr, "mergeaddr: uaddr = %s, merged uaddr = %s\n",
 				uaddr, m_uaddr);
 #endif
 	if (allocated_uaddr != NULL)
 		free(allocated_uaddr);
 	return (m_uaddr);
 }
 
 /*
  * Returns a netconf structure from its internal list.  This
  * structure should not be freed.
  */
 struct netconfig *
-rpcbind_get_conf(char *netid)
+rpcbind_get_conf(const char *netid)
 {
 	struct fdlist *fdl;
 
 	for (fdl = fdhead; fdl; fdl = fdl->next)
 		if (strcmp(fdl->nconf->nc_netid, netid) == 0)
 			break;
 	if (fdl == NULL)
 		return (NULL);
 	return (fdl->nconf);
 }
Index: projects/release-pkg/usr.sbin/rpcbind/rpcbind.h
===================================================================
--- projects/release-pkg/usr.sbin/rpcbind/rpcbind.h	(revision 293335)
+++ projects/release-pkg/usr.sbin/rpcbind/rpcbind.h	(revision 293336)
@@ -1,155 +1,155 @@
 /*	$NetBSD: rpcbind.h,v 1.1 2000/06/03 00:47:21 fvdl Exp $	*/
 /*	$FreeBSD$ */
 
 /*-
  * Copyright (c) 2009, Sun Microsystems, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  * - Redistributions of source code must retain the above copyright notice,
  *   this list of conditions and the following disclaimer.
  * - Redistributions in binary form must reproduce the above copyright notice,
  *   this list of conditions and the following disclaimer in the documentation
  *   and/or other materials provided with the distribution.
  * - Neither the name of Sun Microsystems, Inc. nor the names of its
  *   contributors may be used to endorse or promote products derived
  *   from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 /*
  * Copyright (c) 1986 - 1991 by Sun Microsystems, Inc.
  */
 
 /* #ident	"@(#)rpcbind.h 1.4 90/04/12 SMI" */
 
 /*
  * rpcbind.h
  * The common header declarations
  */
 
 #ifndef rpcbind_h
 #define	rpcbind_h
 
 #ifdef PORTMAP
 #include <rpc/pmap_prot.h>
 #endif
 #include <rpc/rpcb_prot.h>
 
 /*
  * Stuff for the rmtcall service
  */
 struct encap_parms {
 	u_int32_t arglen;
 	char *args;
 };
 
 struct r_rmtcall_args {
 	u_int32_t  rmt_prog;
 	u_int32_t  rmt_vers;
 	u_int32_t  rmt_proc;
 	int     rmt_localvers;  /* whether to send port # or uaddr */
 	char    *rmt_uaddr;
 	struct encap_parms rmt_args;
 };
 
 extern int debugging;
 extern int doabort;
 #ifdef LIBWRAP
 extern int libwrap;
 #endif
 extern int verboselog;
 extern int insecure;
 extern int oldstyle_local;
 extern rpcblist_ptr list_rbl;	/* A list of version 3 & 4 rpcbind services */
 
 #ifdef PORTMAP
 extern struct pmaplist *list_pml; /* A list of version 2 rpcbind services */
 extern char *udptrans;		/* Name of UDP transport */
 extern char *tcptrans;		/* Name of TCP transport */
 extern char *udp_uaddr;		/* Universal UDP address */
 extern char *tcp_uaddr;		/* Universal TCP address */
 #endif
 
 int add_bndlist(struct netconfig *, struct netbuf *);
 bool_t is_bound(char *, char *);
 char *mergeaddr(SVCXPRT *, char *, char *, char *);
-struct netconfig *rpcbind_get_conf(char *);
+struct netconfig *rpcbind_get_conf(const char *);
 
 void rpcbs_init(void); 
 void rpcbs_procinfo(rpcvers_t, rpcproc_t);
 void rpcbs_set(rpcvers_t, bool_t);
 void rpcbs_unset(rpcvers_t, bool_t);
 void rpcbs_getaddr(rpcvers_t, rpcprog_t, rpcvers_t, char *, char *);
 void rpcbs_rmtcall(rpcvers_t, rpcproc_t, rpcprog_t, rpcvers_t, rpcproc_t,
 			char *, rpcblist_ptr);
 void *rpcbproc_getstat(void *, struct svc_req *, SVCXPRT *, rpcvers_t);
 
 void rpcb_service_3(struct svc_req *, SVCXPRT *);
 void rpcb_service_4(struct svc_req *, SVCXPRT *);
 
 /* Common functions shared between versions */
 void *rpcbproc_set_com(void *, struct svc_req *, SVCXPRT *, rpcvers_t);
 void *rpcbproc_unset_com(void *, struct svc_req *, SVCXPRT *, rpcvers_t);
 bool_t map_set(RPCB *, char *);
 bool_t map_unset(RPCB *, char *);
 void delete_prog(unsigned int);
 void *rpcbproc_getaddr_com(RPCB *, struct svc_req *, SVCXPRT *, rpcvers_t,
 				 rpcvers_t);
 void *rpcbproc_gettime_com(void *, struct svc_req *, SVCXPRT *,
 				rpcvers_t);
 void *rpcbproc_uaddr2taddr_com(void *, struct svc_req *,
 					     SVCXPRT *, rpcvers_t);
 void *rpcbproc_taddr2uaddr_com(void *, struct svc_req *, SVCXPRT *,
 				    rpcvers_t);
 int create_rmtcall_fd(struct netconfig *);
 void rpcbproc_callit_com(struct svc_req *, SVCXPRT *, rpcvers_t,
 			      rpcvers_t);
 void my_svc_run(void);
 
 void rpcbind_abort(void);
 void reap(int);
 void toggle_verboselog(int);
 
 int check_access(SVCXPRT *, rpcproc_t, void *, unsigned int);
 int check_callit(SVCXPRT *, struct r_rmtcall_args *, int);
 void logit(int, struct sockaddr *, rpcproc_t, rpcprog_t, const char *);
 int is_loopback(struct netbuf *);
 
 #ifdef PORTMAP
 extern void pmap_service(struct svc_req *, SVCXPRT *);
 #endif
 
 void write_warmstart(void);
 void read_warmstart(void);
 
-char *addrmerge(struct netbuf *caller, char *serv_uaddr, char *clnt_uaddr,
-		     char *netid);
+char *addrmerge(struct netbuf *caller, const char *serv_uaddr,
+		const char *clnt_uaddr, char const *netid);
 int listen_addr(const struct sockaddr *sa);
 void network_init(void);
 struct sockaddr *local_sa(int);
 
 /* For different getaddr semantics */
 #define	RPCB_ALLVERS 0
 #define	RPCB_ONEVERS 1
 
 /* To convert a struct sockaddr to IPv4 or IPv6 address */
 #define	SA2SIN(sa)	((struct sockaddr_in *)(sa))
 #define	SA2SINADDR(sa)	(SA2SIN(sa)->sin_addr)
 #ifdef INET6
 #define	SA2SIN6(sa)	((struct sockaddr_in6 *)(sa))
 #define	SA2SIN6ADDR(sa)	(SA2SIN6(sa)->sin6_addr)
 #endif
 
 #endif /* rpcbind_h */
Index: projects/release-pkg/usr.sbin/rpcbind/tests/Makefile
===================================================================
--- projects/release-pkg/usr.sbin/rpcbind/tests/Makefile	(nonexistent)
+++ projects/release-pkg/usr.sbin/rpcbind/tests/Makefile	(revision 293336)
@@ -0,0 +1,17 @@
+# $FreeBSD$
+
+.include <src.opts.mk>
+
+.PATH:	${.CURDIR}/..
+
+ATF_TESTS_C=	addrmerge_test
+CFLAGS+=	-I${.CURDIR}/.. -Wno-cast-qual
+SRCS.addrmerge_test=	addrmerge_test.c util.c
+
+.if ${MK_INET6_SUPPORT} != "no"
+CFLAGS+= -DINET6
+.endif
+
+WARNS?=	3
+
+.include <bsd.test.mk>

Property changes on: projects/release-pkg/usr.sbin/rpcbind/tests/Makefile
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: projects/release-pkg/usr.sbin/rpcbind/tests/addrmerge_test.c
===================================================================
--- projects/release-pkg/usr.sbin/rpcbind/tests/addrmerge_test.c	(nonexistent)
+++ projects/release-pkg/usr.sbin/rpcbind/tests/addrmerge_test.c	(revision 293336)
@@ -0,0 +1,849 @@
+/*-
+ * Copyright (c) 2014 Spectra Logic Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ * $FreeBSD$
+ */
+
+#include <rpc/rpc.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <net/if.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#include <ifaddrs.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <atf-c.h>
+
+#include "rpcbind.h"
+
+#define MAX_IFADDRS 16
+
+int debugging = false;
+
+/* Data for mocking getifaddrs */
+struct ifaddr_storage {
+	struct ifaddrs ifaddr;
+	struct sockaddr_storage addr;
+	struct sockaddr_storage mask;
+	struct sockaddr_storage bcast;
+} mock_ifaddr_storage[MAX_IFADDRS];
+struct ifaddrs *mock_ifaddrs = NULL;
+int ifaddr_count = 0; 
+
+/* Data for mocking listen_addr */
+int bind_address_count = 0;
+struct sockaddr* bind_addresses[MAX_IFADDRS];
+
+/* Stub library functions */
+void
+freeifaddrs(struct ifaddrs *ifp __unused)
+{
+	return ;
+}
+
+int
+getifaddrs(struct ifaddrs **ifap)
+{
+	*ifap = mock_ifaddrs;
+	return (0);
+}
+
+static void
+mock_ifaddr4(const char* name, const char* addr, const char* mask,
+    const char* bcast, unsigned int flags, bool bind)
+{
+	struct ifaddrs *ifaddr = &mock_ifaddr_storage[ifaddr_count].ifaddr;
+	struct sockaddr_in *in = (struct sockaddr_in*)
+	    			&mock_ifaddr_storage[ifaddr_count].addr;
+	struct sockaddr_in *mask_in = (struct sockaddr_in*)
+	    			&mock_ifaddr_storage[ifaddr_count].mask;
+	struct sockaddr_in *bcast_in = (struct sockaddr_in*)
+	    			&mock_ifaddr_storage[ifaddr_count].bcast;
+
+	in->sin_family = AF_INET;
+	in->sin_port = 0;
+	in->sin_len = sizeof(in);
+	in->sin_addr.s_addr = inet_addr(addr);
+	mask_in->sin_family = AF_INET;
+	mask_in->sin_port = 0;
+	mask_in->sin_len = sizeof(mask_in);
+	mask_in->sin_addr.s_addr = inet_addr(mask);
+	bcast_in->sin_family = AF_INET;
+	bcast_in->sin_port = 0;
+	bcast_in->sin_len = sizeof(bcast_in);
+	bcast_in->sin_addr.s_addr = inet_addr(bcast);
+	*ifaddr = (struct ifaddrs) {
+		.ifa_next = NULL,
+		.ifa_name = (char*) name,
+		.ifa_flags = flags,
+		.ifa_addr = (struct sockaddr*) in,
+		.ifa_netmask = (struct sockaddr*) mask_in,
+		.ifa_broadaddr = (struct sockaddr*) bcast_in,
+		.ifa_data = NULL,	/* addrmerge doesn't care*/
+	};
+
+	if (ifaddr_count > 0)
+		mock_ifaddr_storage[ifaddr_count - 1].ifaddr.ifa_next = ifaddr;
+	ifaddr_count++;
+	mock_ifaddrs = &mock_ifaddr_storage[0].ifaddr;
+
+	/* Optionally simulate binding an ip ala "rpcbind -h foo" */
+	if (bind) {
+		bind_addresses[bind_address_count] = (struct sockaddr*)in;
+		bind_address_count++;
+	}
+}
+
+#ifdef INET6
+static void
+mock_ifaddr6(const char* name, const char* addr, const char* mask,
+    const char* bcast, unsigned int flags, uint32_t scope_id, bool bind)
+{
+	struct ifaddrs *ifaddr = &mock_ifaddr_storage[ifaddr_count].ifaddr;
+	struct sockaddr_in6 *in6 = (struct sockaddr_in6*)
+	    			&mock_ifaddr_storage[ifaddr_count].addr;
+	struct sockaddr_in6 *mask_in6 = (struct sockaddr_in6*)
+	    			&mock_ifaddr_storage[ifaddr_count].mask;
+	struct sockaddr_in6 *bcast_in6 = (struct sockaddr_in6*)
+	    			&mock_ifaddr_storage[ifaddr_count].bcast;
+
+	in6->sin6_family = AF_INET6;
+	in6->sin6_port = 0;
+	in6->sin6_len = sizeof(*in6);
+	in6->sin6_scope_id = scope_id;
+	ATF_REQUIRE_EQ(1, inet_pton(AF_INET6, addr, (void*)&in6->sin6_addr));
+	mask_in6->sin6_family = AF_INET6;
+	mask_in6->sin6_port = 0;
+	mask_in6->sin6_len = sizeof(*mask_in6);
+	mask_in6->sin6_scope_id = scope_id;
+	ATF_REQUIRE_EQ(1, inet_pton(AF_INET6, mask,
+	    (void*)&mask_in6->sin6_addr));
+	bcast_in6->sin6_family = AF_INET6;
+	bcast_in6->sin6_port = 0;
+	bcast_in6->sin6_len = sizeof(*bcast_in6);
+	bcast_in6->sin6_scope_id = scope_id;
+	ATF_REQUIRE_EQ(1, inet_pton(AF_INET6, bcast,
+	    (void*)&bcast_in6->sin6_addr));
+	*ifaddr = (struct ifaddrs) {
+		.ifa_next = NULL,
+		.ifa_name = (char*) name,
+		.ifa_flags = flags,
+		.ifa_addr = (struct sockaddr*) in6,
+		.ifa_netmask = (struct sockaddr*) mask_in6,
+		.ifa_broadaddr = (struct sockaddr*) bcast_in6,
+		.ifa_data = NULL,	/* addrmerge doesn't care*/
+	};
+
+	if (ifaddr_count > 0)
+		mock_ifaddr_storage[ifaddr_count - 1].ifaddr.ifa_next = ifaddr;
+	ifaddr_count++;
+	mock_ifaddrs = &mock_ifaddr_storage[0].ifaddr;
+
+	/* Optionally simulate binding an ip ala "rpcbind -h foo" */
+	if (bind) {
+		bind_addresses[bind_address_count] = (struct sockaddr*)in6;
+		bind_address_count++;
+	}
+}
+#else
+static void
+mock_ifaddr6(const char* name __unused, const char* addr __unused,
+    const char* mask __unused, const char* bcast __unused,
+    unsigned int flags __unused, uint32_t scope_id __unused, bool bind __unused)
+{
+}
+#endif /*INET6 */
+
+static void
+mock_lo0(void)
+{
+	/* 
+	 * This broadcast address looks wrong, but it's what getifaddrs(2)
+	 * actually returns.  It's invalid because IFF_BROADCAST is not set
+	 */
+	mock_ifaddr4("lo0", "127.0.0.1", "255.0.0.0", "127.0.0.1",
+	    IFF_LOOPBACK | IFF_UP | IFF_RUNNING | IFF_MULTICAST, false);
+	mock_ifaddr6("lo0", "::1", "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff",
+	    "::1",
+	    IFF_LOOPBACK | IFF_UP | IFF_RUNNING | IFF_MULTICAST, 0, false);
+}
+
+static void
+mock_igb0(void)
+{
+	mock_ifaddr4("igb0", "192.0.2.2", "255.255.255.128", "192.0.2.127",
+	    IFF_UP | IFF_BROADCAST | IFF_RUNNING | IFF_SIMPLEX | IFF_MULTICAST,
+	    false);
+	mock_ifaddr6("igb0", "2001:db8::2", "ffff:ffff:ffff:ffff::",
+	    "2001:db8::ffff:ffff:ffff:ffff",
+	    IFF_UP | IFF_BROADCAST | IFF_RUNNING | IFF_SIMPLEX | IFF_MULTICAST,
+	    0, false);
+	/* Link local address */
+	mock_ifaddr6("igb0", "fe80::2", "ffff:ffff:ffff:ffff::",
+	    "fe80::ffff:ffff:ffff:ffff",
+	    IFF_UP | IFF_BROADCAST | IFF_RUNNING | IFF_SIMPLEX | IFF_MULTICAST,
+	    2, false);
+}
+
+/* On the same subnet as igb0 */
+static void
+mock_igb1(bool bind)
+{
+	mock_ifaddr4("igb1", "192.0.2.3", "255.255.255.128", "192.0.2.127",
+	    IFF_UP | IFF_BROADCAST | IFF_RUNNING | IFF_SIMPLEX | IFF_MULTICAST,
+	    bind);
+	mock_ifaddr6("igb1", "2001:db8::3", "ffff:ffff:ffff:ffff::",
+	    "2001:db8::ffff:ffff:ffff:ffff",
+	    IFF_UP | IFF_BROADCAST | IFF_RUNNING | IFF_SIMPLEX | IFF_MULTICAST,
+	    0, bind);
+	/* Link local address */
+	mock_ifaddr6("igb1", "fe80::3", "ffff:ffff:ffff:ffff::",
+	    "fe80::ffff:ffff:ffff:ffff",
+	    IFF_UP | IFF_BROADCAST | IFF_RUNNING | IFF_SIMPLEX | IFF_MULTICAST,
+	    3, bind);
+}
+
+/* igb2 is on a different subnet than igb0 */
+static void
+mock_igb2(void)
+{
+	mock_ifaddr4("igb2", "192.0.2.130", "255.255.255.128", "192.0.2.255",
+	    IFF_UP | IFF_BROADCAST | IFF_RUNNING | IFF_SIMPLEX | IFF_MULTICAST,
+	    false);
+	mock_ifaddr6("igb2", "2001:db8:1::2", "ffff:ffff:ffff:ffff::",
+	    "2001:db8:1:0:ffff:ffff:ffff:ffff",
+	    IFF_UP | IFF_BROADCAST | IFF_RUNNING | IFF_SIMPLEX | IFF_MULTICAST,
+	    0, false);
+}
+
+/* tun0 is a P2P interface */
+static void
+mock_tun0(void)
+{
+	mock_ifaddr4("tun0", "192.0.2.5", "255.255.255.255", "192.0.2.6",
+	    IFF_UP | IFF_RUNNING | IFF_POINTOPOINT | IFF_MULTICAST, false);
+	mock_ifaddr6("tun0", "2001:db8::5",
+	    "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff",
+	    "2001:db8::6",
+	    IFF_UP | IFF_RUNNING | IFF_POINTOPOINT | IFF_MULTICAST, 0, false);
+}
+
+
+/* Stub rpcbind functions */
+int
+listen_addr(const struct sockaddr *sa)
+{
+	int i;
+	
+	if (bind_address_count == 0)
+		return (1);
+	
+	for (i = 0; i < bind_address_count; i++) {
+		if (bind_addresses[i]->sa_family != sa->sa_family)
+			continue;
+
+		if (0 == memcmp(bind_addresses[i]->sa_data, sa->sa_data,
+		    sa->sa_len))
+			return (1);
+	}
+	return (0);
+}
+
+struct netconfig*
+rpcbind_get_conf(const char* netid __unused)
+{
+	/* Use static variables so we can return pointers to them */
+	static char* lookups = NULL;
+	static struct netconfig nconf_udp;
+#ifdef INET6
+	static struct netconfig nconf_udp6;
+#endif /* INET6 */
+
+	nconf_udp.nc_netid = "udp"; //netid_storage;
+	nconf_udp.nc_semantics = NC_TPI_CLTS;
+	nconf_udp.nc_flag = NC_VISIBLE;
+	nconf_udp.nc_protofmly = (char*)"inet";
+	nconf_udp.nc_proto = (char*)"udp";
+	nconf_udp.nc_device = (char*)"-";
+	nconf_udp.nc_nlookups = 0;
+	nconf_udp.nc_lookups = &lookups;
+
+#ifdef INET6
+	nconf_udp6.nc_netid = "udp6"; //netid_storage;
+	nconf_udp6.nc_semantics = NC_TPI_CLTS;
+	nconf_udp6.nc_flag = NC_VISIBLE;
+	nconf_udp6.nc_protofmly = (char*)"inet6";
+	nconf_udp6.nc_proto = (char*)"udp6";
+	nconf_udp6.nc_device = (char*)"-";
+	nconf_udp6.nc_nlookups = 0;
+	nconf_udp6.nc_lookups = &lookups;
+#endif /* INET6 */
+
+	if (0 == strncmp("udp", netid, sizeof("udp")))
+		return (&nconf_udp);
+#ifdef INET6
+	else if (0 == strncmp("udp6", netid, sizeof("udp6")))
+		return (&nconf_udp6);
+#endif /* INET6 */
+	else
+		return (NULL);
+}
+
+/*
+ * Helper function used by most test cases
+ * param recvdstaddr	If non-null, the uaddr on which the request was received
+ */
+static char*
+do_addrmerge4(const char* recvdstaddr)
+{
+	struct netbuf caller;
+	struct sockaddr_in caller_in;
+	const char *serv_uaddr, *clnt_uaddr, *netid;
+	
+	/* caller contains the client's IP address */
+	caller.maxlen = sizeof(struct sockaddr_storage);
+	caller.len = sizeof(caller_in);
+	caller_in.sin_family = AF_INET;
+	caller_in.sin_len = sizeof(caller_in);
+	caller_in.sin_port = 1234;
+	caller_in.sin_addr.s_addr = inet_addr("192.0.2.1");
+	caller.buf = (void*)&caller_in;
+	if (recvdstaddr != NULL)
+		clnt_uaddr = recvdstaddr;
+	else
+		clnt_uaddr = "192.0.2.1.3.46";
+
+	/* assume server is bound in INADDR_ANY port 814 */
+	serv_uaddr = "0.0.0.0.3.46";
+
+	netid = "udp";
+	return (addrmerge(&caller, serv_uaddr, clnt_uaddr, netid));
+}
+
+#ifdef INET6
+/*
+ * Variant of do_addrmerge4 where the caller has an IPv6 address
+ * param recvdstaddr	If non-null, the uaddr on which the request was received
+ */
+static char*
+do_addrmerge6(const char* recvdstaddr)
+{
+	struct netbuf caller;
+	struct sockaddr_in6 caller_in6;
+	const char *serv_uaddr, *clnt_uaddr, *netid;
+	
+	/* caller contains the client's IP address */
+	caller.maxlen = sizeof(struct sockaddr_storage);
+	caller.len = sizeof(caller_in6);
+	caller_in6.sin6_family = AF_INET6;
+	caller_in6.sin6_len = sizeof(caller_in6);
+	caller_in6.sin6_port = 1234;
+	ATF_REQUIRE_EQ(1, inet_pton(AF_INET6, "2001:db8::1",
+	    (void*)&caller_in6.sin6_addr));
+	caller.buf = (void*)&caller_in6;
+	if (recvdstaddr != NULL)
+		clnt_uaddr = recvdstaddr;
+	else
+		clnt_uaddr = "2001:db8::1.3.46";
+
+	/* assume server is bound in INADDR_ANY port 814 */
+	serv_uaddr = "::1.3.46";
+
+	netid = "udp6";
+	return (addrmerge(&caller, serv_uaddr, clnt_uaddr, netid));
+}
+
+/* Variant of do_addrmerge6 where the caller uses a link local address */
+static char*
+do_addrmerge6_ll(void)
+{
+	struct netbuf caller;
+	struct sockaddr_in6 caller_in6;
+	const char *serv_uaddr, *clnt_uaddr, *netid;
+	
+	/* caller contains the client's IP address */
+	caller.maxlen = sizeof(struct sockaddr_storage);
+	caller.len = sizeof(caller_in6);
+	caller_in6.sin6_family = AF_INET6;
+	caller_in6.sin6_len = sizeof(caller_in6);
+	caller_in6.sin6_port = 1234;
+	caller_in6.sin6_scope_id = 2; /* same as igb0 */
+	ATF_REQUIRE_EQ(1, inet_pton(AF_INET6, "fe80::beef",
+	    (void*)&caller_in6.sin6_addr));
+	caller.buf = (void*)&caller_in6;
+	clnt_uaddr = "fe80::beef.3.46";
+
+	/* assume server is bound in INADDR_ANY port 814 */
+	serv_uaddr = "::1.3.46";
+
+	netid = "udp6";
+	return (addrmerge(&caller, serv_uaddr, clnt_uaddr, netid));
+}
+#endif /* INET6 */
+
+ATF_TC_WITHOUT_HEAD(addrmerge_noifaddrs);
+ATF_TC_BODY(addrmerge_noifaddrs, tc)
+{
+	char* maddr;
+
+	maddr = do_addrmerge4(NULL);
+
+	/* Since getifaddrs returns null, addrmerge must too */
+	ATF_CHECK_EQ(NULL, maddr);
+}
+
+ATF_TC_WITHOUT_HEAD(addrmerge_localhost_only);
+ATF_TC_BODY(addrmerge_localhost_only, tc)
+{
+	char *maddr;
+	
+	/* getifaddrs will return localhost only */
+	mock_lo0();
+
+	maddr = do_addrmerge4(NULL);
+
+	/* We must return localhost if there is nothing better */
+	ATF_REQUIRE(maddr != NULL);
+	ATF_CHECK_STREQ("127.0.0.1.3.46", maddr);
+}
+
+ATF_TC_WITHOUT_HEAD(addrmerge_singlehomed);
+ATF_TC_BODY(addrmerge_singlehomed, tc)
+{
+	char *maddr;
+	
+	/* getifaddrs will return one public address */
+	mock_lo0();
+	mock_igb0();
+
+	maddr = do_addrmerge4(NULL);
+
+	ATF_REQUIRE(maddr != NULL);
+	ATF_CHECK_STREQ("192.0.2.2.3.46", maddr);
+}
+
+ATF_TC_WITHOUT_HEAD(addrmerge_one_addr_on_each_subnet);
+ATF_TC_BODY(addrmerge_one_addr_on_each_subnet, tc)
+{
+	char *maddr;
+	
+	mock_lo0();
+	mock_igb0();
+	mock_igb2();
+
+	maddr = do_addrmerge4(NULL);
+
+	/* We must return the address on the caller's subnet */
+	ATF_REQUIRE(maddr != NULL);
+	ATF_CHECK_STREQ("192.0.2.2.3.46", maddr);
+}
+
+
+/*
+ * Like addrmerge_one_addr_on_each_subnet, but getifaddrs returns a different
+ * order
+ */
+ATF_TC_WITHOUT_HEAD(addrmerge_one_addr_on_each_subnet_rev);
+ATF_TC_BODY(addrmerge_one_addr_on_each_subnet_rev, tc)
+{
+	char *maddr;
+	
+	/* getifaddrs will return one public address on each of two subnets */
+	mock_igb2();
+	mock_igb0();
+	mock_lo0();
+
+	maddr = do_addrmerge4(NULL);
+
+	/* We must return the address on the caller's subnet */
+	ATF_REQUIRE(maddr != NULL);
+	ATF_CHECK_STREQ("192.0.2.2.3.46", maddr);
+}
+
+ATF_TC_WITHOUT_HEAD(addrmerge_point2point);
+ATF_TC_BODY(addrmerge_point2point, tc)
+{
+	char *maddr;
+	
+	/* getifaddrs will return one normal and one p2p address */
+	mock_lo0();
+	mock_igb2();
+	mock_tun0();
+
+	maddr = do_addrmerge4(NULL);
+
+	/* addrmerge should disprefer P2P interfaces */
+	ATF_REQUIRE(maddr != NULL);
+	ATF_CHECK_STREQ("192.0.2.130.3.46", maddr);
+}
+
+/* Like addrerge_point2point, but getifaddrs returns a different order */
+ATF_TC_WITHOUT_HEAD(addrmerge_point2point_rev);
+ATF_TC_BODY(addrmerge_point2point_rev, tc)
+{
+	char *maddr;
+	
+	/* getifaddrs will return one normal and one p2p address */
+	mock_tun0();
+	mock_igb2();
+	mock_lo0();
+
+	maddr = do_addrmerge4(NULL);
+
+	/* addrmerge should disprefer P2P interfaces */
+	ATF_REQUIRE(maddr != NULL);
+	ATF_CHECK_STREQ("192.0.2.130.3.46", maddr);
+}
+
+/*
+ * Simulate using rpcbind -h to select just one ip when the subnet has
+ * multiple
+ */
+ATF_TC_WITHOUT_HEAD(addrmerge_bindip);
+ATF_TC_BODY(addrmerge_bindip, tc)
+{
+	char *maddr;
+	
+	/* getifaddrs will return one public address on each of two subnets */
+	mock_lo0();
+	mock_igb0();
+	mock_igb1(true);
+
+	maddr = do_addrmerge4(NULL);
+
+	/* We must return the address to which we are bound */
+	ATF_REQUIRE(maddr != NULL);
+	ATF_CHECK_STREQ("192.0.2.3.3.46", maddr);
+}
+
+/* Like addrmerge_bindip, but getifaddrs returns a different order */
+ATF_TC_WITHOUT_HEAD(addrmerge_bindip_rev);
+ATF_TC_BODY(addrmerge_bindip_rev, tc)
+{
+	char *maddr;
+	
+	/* getifaddrs will return one public address on each of two subnets */
+	mock_igb1(true);
+	mock_igb0();
+	mock_lo0();
+
+	maddr = do_addrmerge4(NULL);
+
+	/* We must return the address to which we are bound */
+	ATF_REQUIRE(maddr != NULL);
+	ATF_CHECK_STREQ("192.0.2.3.3.46", maddr);
+}
+
+/* 
+ * The address on which the request was received is known, and is provided as
+ * the hint.
+ */
+ATF_TC_WITHOUT_HEAD(addrmerge_recvdstaddr);
+ATF_TC_BODY(addrmerge_recvdstaddr, tc)
+{
+	char *maddr;
+	
+	mock_lo0();
+	mock_igb0();
+	mock_igb1(false);
+
+	maddr = do_addrmerge4("192.0.2.2.3.46");
+
+	/* We must return the address on which the request was received */
+	ATF_REQUIRE(maddr != NULL);
+	ATF_CHECK_STREQ("192.0.2.2.3.46", maddr);
+}
+
+ATF_TC_WITHOUT_HEAD(addrmerge_recvdstaddr_rev);
+ATF_TC_BODY(addrmerge_recvdstaddr_rev, tc)
+{
+	char *maddr;
+	
+	mock_igb1(false);
+	mock_igb0();
+	mock_lo0();
+
+	maddr = do_addrmerge4("192.0.2.2.3.46");
+
+	/* We must return the address on which the request was received */
+	ATF_REQUIRE(maddr != NULL);
+	ATF_CHECK_STREQ("192.0.2.2.3.46", maddr);
+}
+
+#ifdef INET6
+ATF_TC_WITHOUT_HEAD(addrmerge_localhost_only6);
+ATF_TC_BODY(addrmerge_localhost_only6, tc)
+{
+	char *maddr;
+	
+	/* getifaddrs will return localhost only */
+	mock_lo0();
+
+	maddr = do_addrmerge6(NULL);
+
+	/* We must return localhost if there is nothing better */
+	ATF_REQUIRE(maddr != NULL);
+	ATF_CHECK_STREQ("::1.3.46", maddr);
+}
+
+ATF_TC_WITHOUT_HEAD(addrmerge_singlehomed6);
+ATF_TC_BODY(addrmerge_singlehomed6, tc)
+{
+	char *maddr;
+	
+	/* getifaddrs will return one public address */
+	mock_lo0();
+	mock_igb0();
+
+	maddr = do_addrmerge6(NULL);
+
+	ATF_REQUIRE(maddr != NULL);
+	ATF_CHECK_STREQ("2001:db8::2.3.46", maddr);
+}
+
+ATF_TC_WITHOUT_HEAD(addrmerge_one_addr_on_each_subnet6);
+ATF_TC_BODY(addrmerge_one_addr_on_each_subnet6, tc)
+{
+	char *maddr;
+	
+	mock_lo0();
+	mock_igb0();
+	mock_igb2();
+
+	maddr = do_addrmerge6(NULL);
+
+	/* We must return the address on the caller's subnet */
+	ATF_REQUIRE(maddr != NULL);
+	ATF_CHECK_STREQ("2001:db8::2.3.46", maddr);
+}
+
+
+/*
+ * Like addrmerge_one_addr_on_each_subnet6, but getifaddrs returns a different
+ * order
+ */
+ATF_TC_WITHOUT_HEAD(addrmerge_one_addr_on_each_subnet6_rev);
+ATF_TC_BODY(addrmerge_one_addr_on_each_subnet6_rev, tc)
+{
+	char *maddr;
+	
+	/* getifaddrs will return one public address on each of two subnets */
+	mock_igb2();
+	mock_igb0();
+	mock_lo0();
+
+	maddr = do_addrmerge6(NULL);
+
+	/* We must return the address on the caller's subnet */
+	ATF_REQUIRE(maddr != NULL);
+	ATF_CHECK_STREQ("2001:db8::2.3.46", maddr);
+}
+
+ATF_TC_WITHOUT_HEAD(addrmerge_point2point6);
+ATF_TC_BODY(addrmerge_point2point6, tc)
+{
+	char *maddr;
+	
+	/* getifaddrs will return one normal and one p2p address */
+	mock_lo0();
+	mock_igb2();
+	mock_tun0();
+
+	maddr = do_addrmerge6(NULL);
+
+	/* addrmerge should disprefer P2P interfaces */
+	ATF_REQUIRE(maddr != NULL);
+	ATF_CHECK_STREQ("2001:db8:1::2.3.46", maddr);
+}
+
+/* Like addrerge_point2point, but getifaddrs returns a different order */
+ATF_TC_WITHOUT_HEAD(addrmerge_point2point6_rev);
+ATF_TC_BODY(addrmerge_point2point6_rev, tc)
+{
+	char *maddr;
+	
+	/* getifaddrs will return one normal and one p2p address */
+	mock_tun0();
+	mock_igb2();
+	mock_lo0();
+
+	maddr = do_addrmerge6(NULL);
+
+	/* addrmerge should disprefer P2P interfaces */
+	ATF_REQUIRE(maddr != NULL);
+	ATF_CHECK_STREQ("2001:db8:1::2.3.46", maddr);
+}
+
+ATF_TC_WITHOUT_HEAD(addrmerge_bindip6);
+ATF_TC_BODY(addrmerge_bindip6, tc)
+{
+	char *maddr;
+	
+	/* getifaddrs will return one public address on each of two subnets */
+	mock_lo0();
+	mock_igb0();
+	mock_igb1(true);
+
+	maddr = do_addrmerge6(NULL);
+
+	/* We must return the address to which we are bound */
+	ATF_REQUIRE(maddr != NULL);
+	ATF_CHECK_STREQ("2001:db8::3.3.46", maddr);
+}
+
+/* Like addrerge_bindip, but getifaddrs returns a different order */
+ATF_TC_WITHOUT_HEAD(addrmerge_bindip6_rev);
+ATF_TC_BODY(addrmerge_bindip6_rev, tc)
+{
+	char *maddr;
+	
+	/* getifaddrs will return one public address on each of two subnets */
+	mock_igb1(true);
+	mock_igb0();
+	mock_lo0();
+
+	maddr = do_addrmerge6(NULL);
+
+	/* We must return the address to which we are bound */
+	ATF_REQUIRE(maddr != NULL);
+	ATF_CHECK_STREQ("2001:db8::3.3.46", maddr);
+}
+
+/* 
+ * IPv6 Link Local addresses with the same scope id as the caller, if the caller
+ * is also a link local address, should be preferred
+ */
+ATF_TC_WITHOUT_HEAD(addrmerge_ipv6_linklocal);
+ATF_TC_BODY(addrmerge_ipv6_linklocal, tc)
+{
+	char *maddr;
+	
+	/* 
+	 * getifaddrs will return two link local addresses with the same netmask
+	 * and prefix but different scope IDs
+	 */
+	mock_igb1(false);
+	mock_igb0();
+	mock_lo0();
+
+	maddr = do_addrmerge6_ll();
+
+	/* We must return the address to which we are bound */
+	ATF_REQUIRE(maddr != NULL);
+	ATF_CHECK_STREQ("fe80::2.3.46", maddr);
+}
+
+ATF_TC_WITHOUT_HEAD(addrmerge_ipv6_linklocal_rev);
+ATF_TC_BODY(addrmerge_ipv6_linklocal_rev, tc)
+{
+	char *maddr;
+	
+	/* 
+	 * getifaddrs will return two link local addresses with the same netmask
+	 * and prefix but different scope IDs
+	 */
+	mock_lo0();
+	mock_igb0();
+	mock_igb1(false);
+
+	maddr = do_addrmerge6_ll();
+
+	/* We must return the address to which we are bound */
+	ATF_REQUIRE(maddr != NULL);
+	ATF_CHECK_STREQ("fe80::2.3.46", maddr);
+}
+
+ATF_TC_WITHOUT_HEAD(addrmerge_recvdstaddr6);
+ATF_TC_BODY(addrmerge_recvdstaddr6, tc)
+{
+	char *maddr;
+	
+	mock_lo0();
+	mock_igb0();
+	mock_igb1(false);
+
+	maddr = do_addrmerge6("2001:db8::2.3.46");
+
+	/* We must return the address on which the request was received */
+	ATF_REQUIRE(maddr != NULL);
+	ATF_CHECK_STREQ("2001:db8::2.3.46", maddr);
+}
+
+ATF_TC_WITHOUT_HEAD(addrmerge_recvdstaddr6_rev);
+ATF_TC_BODY(addrmerge_recvdstaddr6_rev, tc)
+{
+	char *maddr;
+	
+	mock_igb1(false);
+	mock_igb0();
+	mock_lo0();
+
+	maddr = do_addrmerge6("2001:db8::2.3.46");
+
+	/* We must return the address on which the request was received */
+	ATF_REQUIRE(maddr != NULL);
+	ATF_CHECK_STREQ("2001:db8::2.3.46", maddr);
+}
+#endif /* INET6 */
+
+
+ATF_TP_ADD_TCS(tp)
+{
+	ATF_TP_ADD_TC(tp, addrmerge_noifaddrs);
+	ATF_TP_ADD_TC(tp, addrmerge_localhost_only);
+	ATF_TP_ADD_TC(tp, addrmerge_singlehomed);
+	ATF_TP_ADD_TC(tp, addrmerge_one_addr_on_each_subnet);
+	ATF_TP_ADD_TC(tp, addrmerge_one_addr_on_each_subnet_rev);
+	ATF_TP_ADD_TC(tp, addrmerge_point2point);
+	ATF_TP_ADD_TC(tp, addrmerge_point2point_rev);
+	ATF_TP_ADD_TC(tp, addrmerge_bindip);
+	ATF_TP_ADD_TC(tp, addrmerge_bindip_rev);
+	ATF_TP_ADD_TC(tp, addrmerge_recvdstaddr);
+	ATF_TP_ADD_TC(tp, addrmerge_recvdstaddr_rev);
+#ifdef INET6
+	ATF_TP_ADD_TC(tp, addrmerge_localhost_only6);
+	ATF_TP_ADD_TC(tp, addrmerge_singlehomed6);
+	ATF_TP_ADD_TC(tp, addrmerge_one_addr_on_each_subnet6);
+	ATF_TP_ADD_TC(tp, addrmerge_one_addr_on_each_subnet6_rev);
+	ATF_TP_ADD_TC(tp, addrmerge_point2point6);
+	ATF_TP_ADD_TC(tp, addrmerge_point2point6_rev);
+	ATF_TP_ADD_TC(tp, addrmerge_bindip6);
+	ATF_TP_ADD_TC(tp, addrmerge_bindip6_rev);
+	ATF_TP_ADD_TC(tp, addrmerge_ipv6_linklocal);
+	ATF_TP_ADD_TC(tp, addrmerge_ipv6_linklocal_rev);
+	ATF_TP_ADD_TC(tp, addrmerge_recvdstaddr6);
+	ATF_TP_ADD_TC(tp, addrmerge_recvdstaddr6_rev);
+#endif
+
+	return (atf_no_error());
+}

Property changes on: projects/release-pkg/usr.sbin/rpcbind/tests/addrmerge_test.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Index: projects/release-pkg/usr.sbin/rpcbind/util.c
===================================================================
--- projects/release-pkg/usr.sbin/rpcbind/util.c	(revision 293335)
+++ projects/release-pkg/usr.sbin/rpcbind/util.c	(revision 293336)
@@ -1,352 +1,401 @@
 /*
  * $NetBSD: util.c,v 1.4 2000/08/03 00:04:30 fvdl Exp $
  * $FreeBSD$
  */
 
 /*-
  * Copyright (c) 2000 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Frank van der Linden.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <sys/queue.h>
 #include <net/if.h>
 #include <netinet/in.h>
 #include <ifaddrs.h>
 #include <sys/poll.h>
 #include <rpc/rpc.h>
 #include <errno.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include <netdb.h>
 #include <netconfig.h>
 #include <stdio.h>
 #include <arpa/inet.h>
 
 #include "rpcbind.h"
 
 static struct sockaddr_in *local_in4;
 #ifdef INET6
 static struct sockaddr_in6 *local_in6;
 #endif
 
-static int bitmaskcmp(void *, void *, void *, int);
+static int bitmaskcmp(struct sockaddr *, struct sockaddr *, struct sockaddr *);
 
 /*
  * For all bits set in "mask", compare the corresponding bits in
  * "dst" and "src", and see if they match. Returns 0 if the addresses
  * match.
  */
 static int
-bitmaskcmp(void *dst, void *src, void *mask, int bytelen)
+bitmaskcmp(struct sockaddr *dst, struct sockaddr *src, struct sockaddr *mask)
 {
 	int i;
-	u_int8_t *p1 = dst, *p2 = src, *netmask = mask;
+	u_int8_t *p1, *p2, *netmask;
+	int bytelen;
 
+	if (dst->sa_family != src->sa_family ||
+	    dst->sa_family != mask->sa_family)
+		return (1);
+
+	switch (dst->sa_family) {
+	case AF_INET:
+		p1 = (uint8_t*) &SA2SINADDR(dst);
+		p2 = (uint8_t*) &SA2SINADDR(src);
+		netmask = (uint8_t*) &SA2SINADDR(mask);
+		bytelen = sizeof(struct in_addr);
+		break;
+#ifdef INET6
+	case AF_INET6:
+		p1 = (uint8_t*) &SA2SIN6ADDR(dst);
+		p2 = (uint8_t*) &SA2SIN6ADDR(src);
+		netmask = (uint8_t*) &SA2SIN6ADDR(mask);
+		bytelen = sizeof(struct in6_addr);
+		break;
+#endif
+	default:
+		return (1);
+	}
+
 	for (i = 0; i < bytelen; i++)
 		if ((p1[i] & netmask[i]) != (p2[i] & netmask[i]))
 			return (1);
 	return (0);
 }
 
 /*
  * Find a server address that can be used by `caller' to contact
  * the local service specified by `serv_uaddr'. If `clnt_uaddr' is
  * non-NULL, it is used instead of `caller' as a hint suggesting
  * the best address (e.g. the `r_addr' field of an rpc, which
  * contains the rpcbind server address that the caller used).
  *
  * Returns the best server address as a malloc'd "universal address"
  * string which should be freed by the caller. On error, returns NULL.
  */
 char *
-addrmerge(struct netbuf *caller, char *serv_uaddr, char *clnt_uaddr,
-	  char *netid)
+addrmerge(struct netbuf *caller, const char *serv_uaddr, const char *clnt_uaddr,
+	  const char *netid)
 {
 	struct ifaddrs *ifap, *ifp = NULL, *bestif;
 	struct netbuf *serv_nbp = NULL, *hint_nbp = NULL, tbuf;
 	struct sockaddr *caller_sa, *hint_sa, *ifsa, *ifmasksa, *serv_sa;
 	struct sockaddr_storage ss;
 	struct netconfig *nconf;
-	char *caller_uaddr = NULL, *hint_uaddr = NULL;
+	char *caller_uaddr = NULL;
+	const char *hint_uaddr = NULL;
 	char *ret = NULL;
+	int bestif_goodness;
 
 #ifdef ND_DEBUG
 	if (debugging)
 		fprintf(stderr, "addrmerge(caller, %s, %s, %s\n", serv_uaddr,
 		    clnt_uaddr == NULL ? "NULL" : clnt_uaddr, netid);
 #endif
 	caller_sa = caller->buf;
 	if ((nconf = rpcbind_get_conf(netid)) == NULL)
 		goto freeit;
 	if ((caller_uaddr = taddr2uaddr(nconf, caller)) == NULL)
 		goto freeit;
 
 	/*
 	 * Use `clnt_uaddr' as the hint if non-NULL, but ignore it if its
 	 * address family is different from that of the caller.
 	 */
 	hint_sa = NULL;
 	if (clnt_uaddr != NULL) {
 		hint_uaddr = clnt_uaddr;
 		if ((hint_nbp = uaddr2taddr(nconf, clnt_uaddr)) == NULL)
 			goto freeit;
 		hint_sa = hint_nbp->buf;
 	}
 	if (hint_sa == NULL || hint_sa->sa_family != caller_sa->sa_family) {
 		hint_uaddr = caller_uaddr;
 		hint_sa = caller->buf;
 	}
 
 #ifdef ND_DEBUG
 	if (debugging)
 		fprintf(stderr, "addrmerge: hint %s\n", hint_uaddr);
 #endif
 	/* Local caller, just return the server address. */
 	if (strncmp(caller_uaddr, "0.0.0.0.", 8) == 0 ||
 	    strncmp(caller_uaddr, "::.", 3) == 0 || caller_uaddr[0] == '/') {
 		ret = strdup(serv_uaddr);
 		goto freeit;
 	}
 
 	if (getifaddrs(&ifp) < 0)
 		goto freeit;
 
 	/*
-	 * Loop through all interfaces. For each interface, see if it
-	 * is either the loopback interface (which we always listen
-	 * on) or is one of the addresses the program bound to (the
-	 * wildcard by default, or a subset if -h is specified) and
-	 * the network portion of its address is equal to that of the
-	 * client.  If so, we have found the interface that we want to
-	 * use.
+	 * Loop through all interface addresses.  We are listening to an address
+	 * if any of the following are true:
+	 * a) It's a loopback address
+	 * b) It was specified with the -h command line option
+	 * c) There were no -h command line options.
+	 *
+	 * Among addresses on which we are listening, choose in order of
+	 * preference an address that is:
+	 *
+	 * a) Equal to the hint
+	 * b) A link local address with the same scope ID as the client's
+	 *    address, if the client's address is also link local
+	 * c) An address on the same subnet as the client's address
+	 * d) A non-localhost, non-p2p address
+	 * e) Any usable address
 	 */
 	bestif = NULL;
+	bestif_goodness = 0;
 	for (ifap = ifp; ifap != NULL; ifap = ifap->ifa_next) {
 		ifsa = ifap->ifa_addr;
 		ifmasksa = ifap->ifa_netmask;
 
+		/* Skip addresses where we don't listen */
 		if (ifsa == NULL || ifsa->sa_family != hint_sa->sa_family ||
 		    !(ifap->ifa_flags & IFF_UP))
 			continue;
 
 		if (!(ifap->ifa_flags & IFF_LOOPBACK) && !listen_addr(ifsa))
 			continue;
 
-		switch (hint_sa->sa_family) {
-		case AF_INET:
-			/*
-			 * If the hint address matches this interface
-			 * address/netmask, then we're done.
-			 */
-			if (!bitmaskcmp(&SA2SINADDR(ifsa),
-			    &SA2SINADDR(hint_sa), &SA2SINADDR(ifmasksa),
-			    sizeof(struct in_addr))) {
-				bestif = ifap;
-				goto found;
-			}
-			break;
+		if ((hint_sa->sa_family == AF_INET) &&
+		    ((((struct sockaddr_in*)hint_sa)->sin_addr.s_addr == 
+		      ((struct sockaddr_in*)ifsa)->sin_addr.s_addr))) {
+			const int goodness = 4;
+
+			bestif_goodness = goodness;
+			bestif = ifap;
+			goto found;
+		}
 #ifdef INET6
-		case AF_INET6:
+		if ((hint_sa->sa_family == AF_INET6) &&
+		    (0 == memcmp(&((struct sockaddr_in6*)hint_sa)->sin6_addr,
+				 &((struct sockaddr_in6*)ifsa)->sin6_addr,
+				 sizeof(struct in6_addr))) &&
+		    (((struct sockaddr_in6*)hint_sa)->sin6_scope_id ==
+		    (((struct sockaddr_in6*)ifsa)->sin6_scope_id))) {
+			const int goodness = 4;
+
+			bestif_goodness = goodness;
+			bestif = ifap;
+			goto found;
+		}
+		if (hint_sa->sa_family == AF_INET6) {
 			/*
 			 * For v6 link local addresses, if the caller is on
 			 * a link-local address then use the scope id to see
 			 * which one.
 			 */
 			if (IN6_IS_ADDR_LINKLOCAL(&SA2SIN6ADDR(ifsa)) &&
 			    IN6_IS_ADDR_LINKLOCAL(&SA2SIN6ADDR(caller_sa)) &&
 			    IN6_IS_ADDR_LINKLOCAL(&SA2SIN6ADDR(hint_sa))) {
 				if (SA2SIN6(ifsa)->sin6_scope_id ==
 				    SA2SIN6(caller_sa)->sin6_scope_id) {
-					bestif = ifap;
-					goto found;
+					const int goodness = 3;
+
+					if (bestif_goodness < goodness) {
+						bestif = ifap;
+						bestif_goodness = goodness;
+					}
 				}
-			} else if (!bitmaskcmp(&SA2SIN6ADDR(ifsa),
-			    &SA2SIN6ADDR(hint_sa), &SA2SIN6ADDR(ifmasksa),
-			    sizeof(struct in6_addr))) {
+			}
+		}
+#endif /* INET6 */
+		if (0 == bitmaskcmp(hint_sa, ifsa, ifmasksa)) {
+			const int goodness = 2;
+
+			if (bestif_goodness < goodness) {
 				bestif = ifap;
-				goto found;
+				bestif_goodness = goodness;
 			}
-			break;
-#endif
-		default:
-			continue;
 		}
+		if (!(ifap->ifa_flags & (IFF_LOOPBACK | IFF_POINTOPOINT))) {
+			const int goodness = 1;
 
-		/*
-		 * Remember the first possibly useful interface, preferring
-		 * "normal" to point-to-point and loopback ones.
-		 */
-		if (bestif == NULL ||
-		    (!(ifap->ifa_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) &&
-		    (bestif->ifa_flags & (IFF_LOOPBACK | IFF_POINTOPOINT))))
+			if (bestif_goodness < goodness) {
+				bestif = ifap;
+				bestif_goodness = goodness;
+			}
+		}
+		if (bestif == NULL)
 			bestif = ifap;
 	}
 	if (bestif == NULL)
 		goto freeit;
 
 found:
 	/*
 	 * Construct the new address using the address from
 	 * `bestif', and the port number from `serv_uaddr'.
 	 */
 	serv_nbp = uaddr2taddr(nconf, serv_uaddr);
 	if (serv_nbp == NULL)
 		goto freeit;
 	serv_sa = serv_nbp->buf;
 
 	memcpy(&ss, bestif->ifa_addr, bestif->ifa_addr->sa_len);
 	switch (ss.ss_family) {
 	case AF_INET:
 		SA2SIN(&ss)->sin_port = SA2SIN(serv_sa)->sin_port;
 		break;
 #ifdef INET6
 	case AF_INET6:
 		SA2SIN6(&ss)->sin6_port = SA2SIN6(serv_sa)->sin6_port;
 		break;
 #endif
 	}
 	tbuf.len = ss.ss_len;
 	tbuf.maxlen = sizeof(ss);
 	tbuf.buf = &ss;
 	ret = taddr2uaddr(nconf, &tbuf);
 
 freeit:
 	if (caller_uaddr != NULL)
 		free(caller_uaddr);
 	if (hint_nbp != NULL) {
 		free(hint_nbp->buf);
 		free(hint_nbp);
 	}
 	if (serv_nbp != NULL) {
 		free(serv_nbp->buf);
 		free(serv_nbp);
 	}
 	if (ifp != NULL)
 		freeifaddrs(ifp);
 
 #ifdef ND_DEBUG
 	if (debugging)
 		fprintf(stderr, "addrmerge: returning %s\n", ret);
 #endif
 	return ret;
 }
 
 void
 network_init(void)
 {
 #ifdef INET6
 	struct ifaddrs *ifap, *ifp;
 	struct ipv6_mreq mreq6;
 	unsigned int ifindex;
 	int s;
 #endif
 	int ecode;
 	struct addrinfo hints, *res;
 
 	memset(&hints, 0, sizeof hints);
 	hints.ai_family = AF_INET;
 	if ((ecode = getaddrinfo(NULL, "sunrpc", &hints, &res))) {
 		if (debugging)
 			fprintf(stderr, "can't get local ip4 address: %s\n",
 			    gai_strerror(ecode));
 	} else {
 		local_in4 = (struct sockaddr_in *)malloc(sizeof *local_in4);
 		if (local_in4 == NULL) {
 			if (debugging)
 				fprintf(stderr, "can't alloc local ip4 addr\n");
 		}
 		memcpy(local_in4, res->ai_addr, sizeof *local_in4);
 	}
 
 #ifdef INET6
 	hints.ai_family = AF_INET6;
 	if ((ecode = getaddrinfo(NULL, "sunrpc", &hints, &res))) {
 		if (debugging)
 			fprintf(stderr, "can't get local ip6 address: %s\n",
 			    gai_strerror(ecode));
 	} else {
 		local_in6 = (struct sockaddr_in6 *)malloc(sizeof *local_in6);
 		if (local_in6 == NULL) {
 			if (debugging)
 				fprintf(stderr, "can't alloc local ip6 addr\n");
 		}
 		memcpy(local_in6, res->ai_addr, sizeof *local_in6);
 	}
 
 	/*
 	 * Now join the RPC ipv6 multicast group on all interfaces.
 	 */
 	if (getifaddrs(&ifp) < 0)
 		return;
 
 	mreq6.ipv6mr_interface = 0;
 	inet_pton(AF_INET6, RPCB_MULTICAST_ADDR, &mreq6.ipv6mr_multiaddr);
 
 	s = socket(AF_INET6, SOCK_DGRAM, IPPROTO_UDP);
 
 	/*
 	 * Loop through all interfaces. For each IPv6 multicast-capable
 	 * interface, join the RPC multicast group on that interface.
 	 */
 	for (ifap = ifp; ifap != NULL; ifap = ifap->ifa_next) {
 		if (ifap->ifa_addr->sa_family != AF_INET6 ||
 		    !(ifap->ifa_flags & IFF_MULTICAST))
 			continue;
 		ifindex = if_nametoindex(ifap->ifa_name);
 		if (ifindex == mreq6.ipv6mr_interface)
 			/*
 			 * Already did this one.
 			 */
 			continue;
 		mreq6.ipv6mr_interface = ifindex;
 		if (setsockopt(s, IPPROTO_IPV6, IPV6_JOIN_GROUP, &mreq6,
 		    sizeof mreq6) < 0)
 			if (debugging)
 				perror("setsockopt v6 multicast");
 	}
 #endif
 
 	/* close(s); */
 }
 
 struct sockaddr *
 local_sa(int af)
 {
 	switch (af) {
 	case AF_INET:
 		return (struct sockaddr *)local_in4;
 #ifdef INET6
 	case AF_INET6:
 		return (struct sockaddr *)local_in6;
 #endif
 	default:
 		return NULL;
 	}
 }
Index: projects/release-pkg/usr.sbin/services_mkdb/services_mkdb.c
===================================================================
--- projects/release-pkg/usr.sbin/services_mkdb/services_mkdb.c	(revision 293335)
+++ projects/release-pkg/usr.sbin/services_mkdb/services_mkdb.c	(revision 293336)
@@ -1,440 +1,456 @@
 /*	$NetBSD: services_mkdb.c,v 1.14 2008/04/28 20:24:17 martin Exp $	*/
 
 /*-
  * Copyright (c) 1999 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Luke Mewburn and Christos Zoulas.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/stat.h>
 
 #include <assert.h>
 #include <db.h>
 #include <err.h>
 #include <fcntl.h>
 #include <netdb.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <libgen.h>
 #include <libutil.h>
 #include <ctype.h>
 #include <errno.h>
 #include <stringlist.h>
 
 #include "extern.h"
 
 static char tname[MAXPATHLEN];
 
 #define	PMASK		0xffff
 #define PROTOMAX	5
 
 static void	add(DB *, StringList *, size_t, const char *, size_t *, int);
 static StringList ***parseservices(const char *, StringList *);
 static void	cleanup(void);
 static void	store(DB *, DBT *, DBT *, int);
 static void	killproto(DBT *);
 static char    *getstring(const char *, size_t, char **, const char *);
 static size_t	getprotoindex(StringList *, const char *);
 static const char *getprotostr(StringList *, size_t);
 static const char *mkaliases(StringList *, char *, size_t);
 static void	usage(void);
 
 HASHINFO hinfo = {
 	.bsize = 256,
 	.ffactor = 4,
 	.nelem = 32768,
 	.cachesize = 1024,
 	.hash = NULL,
 	.lorder = 0
 };
 
 
 int
 main(int argc, char *argv[])
 {
 	DB	*db;
 	int	 ch;
 	const char *fname = _PATH_SERVICES;
 	const char *dbname = _PATH_SERVICES_DB;
 	int	 warndup = 1;
 	int	 unique = 0;
 	int	 otherflag = 0;
 	int	 byteorder = 0;
 	size_t	 cnt = 0;
 	StringList *sl, ***svc;
 	size_t port, proto;
+	char *dbname_dir;
+	int dbname_dir_fd = -1;
 
 	setprogname(argv[0]);
 
 	while ((ch = getopt(argc, argv, "blo:qu")) != -1)
 		switch (ch) {
 		case 'b':
 		case 'l':
 			if (byteorder != 0)
 				usage();
 			byteorder = ch == 'b' ? 4321 : 1234;
 			break;
 		case 'q':
 			otherflag = 1;
 			warndup = 0;
 			break;
 		case 'o':
 			otherflag = 1;
 			dbname = optarg;
 			break;
 		case 'u':
 			unique++;
 			break;
 		case '?':
 		default:
 			usage();
 		}
 
 	argc -= optind;
 	argv += optind;
 
 	if (argc > 1 || (unique && otherflag))
 		usage();
 	if (argc == 1)
 		fname = argv[0];
 
 	/* Set byte order. */
 	hinfo.lorder = byteorder;
 
 	if (unique)
 		uniq(fname);
 
 	svc = parseservices(fname, sl = sl_init());
 
 	if (atexit(cleanup))
 		err(1, "Cannot install exit handler");
 
 	(void)snprintf(tname, sizeof(tname), "%s.tmp", dbname);
-	db = dbopen(tname, O_RDWR | O_CREAT | O_EXCL,
+	db = dbopen(tname, O_RDWR | O_CREAT | O_EXCL | O_SYNC,
 	    (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH), DB_HASH, &hinfo);
 	if (!db)
 		err(1, "Error opening temporary database `%s'", tname);
 
 
 	for (port = 0; port < PMASK + 1; port++) {
 		if (svc[port] == NULL)
 			continue;
 
 		for (proto = 0; proto < PROTOMAX; proto++) {
 			StringList *s;
 			if ((s = svc[port][proto]) == NULL)
 				continue;
 			add(db, s, port, getprotostr(sl, proto), &cnt, warndup);
 		}
 
 		free(svc[port]);
 	}
 
 	free(svc);
 	sl_free(sl, 1);
 
 	if ((db->close)(db))
 		err(1, "Error closing temporary database `%s'", tname);
 
-	if (rename(tname, dbname) == -1)
+	/*
+	 * Make sure file is safe on disk. To improve performance we will call
+	 * fsync() to the directory where file lies
+	 */
+	if (rename(tname, dbname) == -1 ||
+	    (dbname_dir = dirname(dbname)) == NULL ||
+	    (dbname_dir_fd = open(dbname_dir, O_RDONLY|O_DIRECTORY)) == -1 ||
+	    fsync(dbname_dir_fd) != 0) {
+		if (dbname_dir_fd != -1)
+			close(dbname_dir_fd);
 		err(1, "Cannot rename `%s' to `%s'", tname, dbname);
+	}
+
+	if (dbname_dir_fd != -1)
+		close(dbname_dir_fd);
 
 	return 0;
 }
 
 static void
 add(DB *db, StringList *sl, size_t port, const char *proto, size_t *cnt,
     int warndup)
 {
 	size_t i;
 	char	 keyb[BUFSIZ], datab[BUFSIZ], abuf[BUFSIZ];
 	DBT	 data, key;
 	key.data = keyb;
 	data.data = datab;
 
 #ifdef DEBUG
 	(void)printf("add %s %zu %s [ ", sl->sl_str[0], port, proto);
 	for (i = 1; i < sl->sl_cur; i++)
 	    (void)printf("%s ", sl->sl_str[i]);
 	(void)printf("]\n");
 #endif
 
 	/* key `indirect key', data `full line' */
 	data.size = snprintf(datab, sizeof(datab), "%zu", (*cnt)++) + 1;
 	key.size = snprintf(keyb, sizeof(keyb), "%s %zu/%s %s",
 	    sl->sl_str[0], port, proto, mkaliases(sl, abuf, sizeof(abuf))) + 1;
 	store(db, &data, &key, warndup);
 
 	/* key `\377port/proto', data = `indirect key' */
 	key.size = snprintf(keyb, sizeof(keyb), "\377%zu/%s",
 	    port, proto) + 1;
 	store(db, &key, &data, warndup);
 
 	/* key `\377port', data = `indirect key' */
 	killproto(&key);
 	store(db, &key, &data, warndup);
 
 	/* add references for service and all aliases */
 	for (i = 0; i < sl->sl_cur; i++) {
 		/* key `\376service/proto', data = `indirect key' */
 		key.size = snprintf(keyb, sizeof(keyb), "\376%s/%s",
 		    sl->sl_str[i], proto) + 1;
 		store(db, &key, &data, warndup);
 
 		/* key `\376service', data = `indirect key' */
 		killproto(&key);
 		store(db, &key, &data, warndup);
 	}
 	sl_free(sl, 1);
 }
 
 static StringList ***
 parseservices(const char *fname, StringList *sl)
 {
 	size_t len, line, pindex;
 	FILE *fp;
 	StringList ***svc, *s;
 	char *p, *ep;
 
 	if ((fp = fopen(fname, "r")) == NULL)
 		err(1, "Cannot open `%s'", fname);
 
 	line = 0;
 	if ((svc = calloc(PMASK + 1, sizeof(StringList **))) == NULL)
 		err(1, "Cannot allocate %zu bytes", (size_t)(PMASK + 1));
 
 	/* XXX: change NULL to "\0\0#" when fparseln fixed */
 	for (; (p = fparseln(fp, &len, &line, NULL, 0)) != NULL; free(p)) {
 		char	*name, *port, *proto, *aliases, *cp, *alias;
 		unsigned long pnum;
 
 		if (len == 0)
 			continue;
 
 		for (cp = p; *cp && isspace((unsigned char)*cp); cp++)
 			continue;
 
 		if (*cp == '\0' || *cp == '#')
 			continue;
 
 		if ((name = getstring(fname, line, &cp, "name")) == NULL)
 			continue;
 
 		if ((port = getstring(fname, line, &cp, "port")) == NULL)
 			continue;
 
 		if (cp) {
 			for (aliases = cp; *cp && *cp != '#'; cp++)
 				continue;
 
 			if (*cp)
 				*cp = '\0';
 		} else
 			aliases = NULL;
 
 		proto = strchr(port, '/');
 		if (proto == NULL || proto[1] == '\0') {
 			warnx("%s, %zu: no protocol found", fname, line);
 			continue;
 		}
 		*proto++ = '\0';
 
 		errno = 0;
 		pnum = strtoul(port, &ep, 0);
 		if (*port == '\0' || *ep != '\0') {
 			warnx("%s, %zu: invalid port `%s'", fname, line, port);
 			continue;
 		}
 		if ((errno == ERANGE && pnum == ULONG_MAX) || pnum > PMASK) {
 			warnx("%s, %zu: port too big `%s'", fname, line, port);
 			continue;
 		}
 
 		if (svc[pnum] == NULL) {
 			svc[pnum] = calloc(PROTOMAX, sizeof(StringList *));
 			if (svc[pnum] == NULL)
 				err(1, "Cannot allocate %zu bytes",
 				    (size_t)PROTOMAX);
 		}
 
 		pindex = getprotoindex(sl, proto);
 		if (svc[pnum][pindex] == NULL)
 			s = svc[pnum][pindex] = sl_init();
 		else
 			s = svc[pnum][pindex];
 
 		/* build list of aliases */
 		if (sl_find(s, name) == NULL) {
 			char *p2;
 
 			if ((p2 = strdup(name)) == NULL)
 				err(1, "Cannot copy string");
 			(void)sl_add(s, p2);
 		}
 
 		if (aliases) {
 			while ((alias = strsep(&aliases, " \t")) != NULL) {
 				if (alias[0] == '\0')
 					continue;
 				if (sl_find(s, alias) == NULL) {
 					char *p2;
 
 					if ((p2 = strdup(alias)) == NULL)
 						err(1, "Cannot copy string");
 					(void)sl_add(s, p2);
 				}
 			}
 		}
 	}
 	(void)fclose(fp);
 	return svc;
 }
 
 /*
  * cleanup(): Remove temporary files upon exit
  */
 static void
 cleanup(void)
 {
 	if (tname[0])
 		(void)unlink(tname);
 }
 
 static char *
 getstring(const char *fname, size_t line, char **cp, const char *tag)
 {
 	char *str;
 
 	while ((str = strsep(cp, " \t")) != NULL && *str == '\0')
 		continue;
 
 	if (str == NULL)
 		warnx("%s, %zu: no %s found", fname, line, tag);
 
 	return str;
 }
 
 static void
 killproto(DBT *key)
 {
 	char *p, *d = key->data;
 
 	if ((p = strchr(d, '/')) == NULL)
 		abort();
 	*p++ = '\0';
 	key->size = p - d;
 }
 
 static void
 store(DB *db, DBT *key, DBT *data, int warndup)
 {
 #ifdef DEBUG
 	int k = key->size - 1;
 	int d = data->size - 1;
 	(void)printf("store [%*.*s] [%*.*s]\n",
 		k, k, (char *)key->data + 1,
 		d, d, (char *)data->data + 1);
 #endif
 	switch ((db->put)(db, key, data, R_NOOVERWRITE)) {
 	case 0:
 		break;
 	case 1:
 		if (warndup)
 			warnx("duplicate service `%s'",
 			    &((char *)key->data)[1]);
 		break;
 	case -1:
 		err(1, "put");
 		break;
 	default:
 		abort();
 		break;
 	}
 }
 
 static size_t
 getprotoindex(StringList *sl, const char *str)
 {
 	size_t i;
 	char *p;
 
 	for (i= 0; i < sl->sl_cur; i++)
 		if (strcmp(sl->sl_str[i], str) == 0)
 			return i;
 
 	if (i == PROTOMAX)
 		errx(1, "Ran out of protocols adding `%s';"
 		    " recompile with larger PROTOMAX", str);
 	if ((p = strdup(str)) == NULL)
 		err(1, "Cannot copy string");
 	(void)sl_add(sl, p);
 	return i;
 }
 
 static const char *
 getprotostr(StringList *sl, size_t i)
 {
 	assert(i < sl->sl_cur);
 	return sl->sl_str[i];
 }
 
 static const char *
 mkaliases(StringList *sl, char *buf, size_t len)
 {
 	size_t nc, i, pos;
 
 	buf[0] = 0;
 	for (i = 1, pos = 0; i < sl->sl_cur; i++) {
 		nc = strlcpy(buf + pos, sl->sl_str[i], len);
 		if (nc >= len)
 			goto out;
 		pos += nc;
 		len -= nc;
 		nc = strlcpy(buf + pos, " ", len);
 		if (nc >= len)
 			goto out;
 		pos += nc;
 		len -= nc;
 	}
 	return buf;
 out:
 	warn("aliases for `%s' truncated", sl->sl_str[0]);
 	return buf;
 }
 
 static void
 usage(void)
 {
 	(void)fprintf(stderr,
 	    "Usage:\t%s [-b | -l] [-q] [-o <db>] [<servicefile>]\n"
 	    "\t%s -u [<servicefile>]\n", getprogname(), getprogname());
 	exit(1);
 }
Index: projects/release-pkg
===================================================================
--- projects/release-pkg	(revision 293335)
+++ projects/release-pkg	(revision 293336)

Property changes on: projects/release-pkg
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head:r293225-293335