Index: projects/vnet/Makefile.inc1 =================================================================== --- projects/vnet/Makefile.inc1 (revision 302276) +++ projects/vnet/Makefile.inc1 (revision 302277) @@ -1,2559 +1,2561 @@ # # $FreeBSD$ # # Make command line options: # -DNO_CLEANDIR run ${MAKE} clean, instead of ${MAKE} cleandir # -DNO_CLEAN do not clean at all # -DDB_FROM_SRC use the user/group databases in src/etc instead of # the system database when installing. # -DNO_SHARE do not go into share subdir # -DKERNFAST define NO_KERNEL{CONFIG,CLEAN,OBJ} # -DNO_KERNELCONFIG do not run config in ${MAKE} buildkernel # -DNO_KERNELCLEAN do not run ${MAKE} clean in ${MAKE} buildkernel # -DNO_KERNELOBJ do not run ${MAKE} obj in ${MAKE} buildkernel # -DNO_PORTSUPDATE do not update ports in ${MAKE} update # -DNO_ROOT install without using root privilege # -DNO_DOCUPDATE do not update doc in ${MAKE} update # -DWITHOUT_CTF do not run the DTrace CTF conversion tools on built objects # LOCAL_DIRS="list of dirs" to add additional dirs to the SUBDIR list # LOCAL_ITOOLS="list of tools" to add additional tools to the ITOOLS list # LOCAL_LIB_DIRS="list of dirs" to add additional dirs to libraries target # LOCAL_MTREE="list of mtree files" to process to allow local directories # to be created before files are installed # LOCAL_TOOL_DIRS="list of dirs" to add additional dirs to the build-tools # list # METALOG="path to metadata log" to write permission and ownership # when NO_ROOT is set. (default: ${DESTDIR}/METALOG) # TARGET="machine" to crossbuild world for a different machine type # TARGET_ARCH= may be required when a TARGET supports multiple endians # BUILDENV_SHELL= shell to launch for the buildenv target (def:${SHELL}) # WORLD_FLAGS= additional flags to pass to make(1) during buildworld # KERNEL_FLAGS= additional flags to pass to make(1) during buildkernel # SUBDIR_OVERRIDE="list of dirs" to build rather than everything. # All libraries and includes, and some build tools will still build. # # The intended user-driven targets are: # buildworld - rebuild *everything*, including glue to help do upgrades # installworld- install everything built by "buildworld" # checkworld - run test suite on installed world # doxygen - build API documentation of the kernel # update - convenient way to update your source tree (eg: svn/svnup) # # Standard targets (not defined here) are documented in the makefiles in # /usr/share/mk. These include: # obj depend all install clean cleandepend cleanobj .if !defined(TARGET) || !defined(TARGET_ARCH) .error "Both TARGET and TARGET_ARCH must be defined." .endif SRCDIR?= ${.CURDIR} LOCALBASE?= /usr/local # Cross toolchain changes must be in effect before bsd.compiler.mk # so that gets the right CC, and pass CROSS_TOOLCHAIN to submakes. .if defined(CROSS_TOOLCHAIN) .include "${LOCALBASE}/share/toolchains/${CROSS_TOOLCHAIN}.mk" CROSSENV+=CROSS_TOOLCHAIN="${CROSS_TOOLCHAIN}" .endif .if defined(CROSS_TOOLCHAIN_PREFIX) CROSS_COMPILER_PREFIX?=${CROSS_TOOLCHAIN_PREFIX} .endif XCOMPILERS= CC CXX CPP .for COMPILER in ${XCOMPILERS} .if defined(CROSS_COMPILER_PREFIX) X${COMPILER}?= ${CROSS_COMPILER_PREFIX}${${COMPILER}} .else X${COMPILER}?= ${${COMPILER}} .endif .endfor # If a full path to an external cross compiler is given, don't build # a cross compiler. .if ${XCC:N${CCACHE_BIN}:M/*} MK_CROSS_COMPILER= no .endif # Pull in COMPILER_TYPE and COMPILER_FREEBSD_VERSION early. .include .include "share/mk/src.opts.mk" # Check if there is a local compiler that can satisfy as an external compiler. .if ${MK_SYSTEM_COMPILER} == "yes" && ${MK_CROSS_COMPILER} == "yes" && \ (${MK_CLANG_BOOTSTRAP} == "yes" || ${MK_GCC_BOOTSTRAP} == "yes") && \ !make(showconfig) # Which compiler is expected to be used? .if ${MK_CLANG_BOOTSTRAP} == "yes" _expected_compiler_type= clang .elif ${MK_GCC_BOOTSTRAP} == "yes" _expected_compiler_type= gcc .endif # If the expected vs CC is different then we can't skip. # GCC cannot be used for cross-arch yet. For clang we pass -target later if # TARGET_ARCH!=MACHINE_ARCH. .if ${_expected_compiler_type} == ${COMPILER_TYPE} && \ (${COMPILER_TYPE} == "clang" || ${TARGET_ARCH} == ${MACHINE_ARCH}) # It needs to be the same revision as we would build for the bootstrap. .if !defined(CROSS_COMPILER_FREEBSD_VERSION) .if ${_expected_compiler_type} == "clang" CROSS_COMPILER_FREEBSD_VERSION!= \ awk '$$2 == "FREEBSD_CC_VERSION" {printf("%d\n", $$3)}' \ ${SRCDIR}/lib/clang/freebsd_cc_version.h || echo unknown CROSS_COMPILER_VERSION!= \ awk '$$2 == "CLANG_VERSION" {split($$3, a, "."); print a[1] * 10000 + a[2] * 100 + a[3]}' \ ${SRCDIR}/lib/clang/include/clang/Basic/Version.inc || echo unknown .elif ${_expected_compiler_type} == "gcc" CROSS_COMPILER_FREEBSD_VERSION!= \ awk '$$2 == "FBSD_CC_VER" {printf("%d\n", $$3)}' \ ${SRCDIR}/gnu/usr.bin/cc/cc_tools/freebsd-native.h || echo unknown CROSS_COMPILER_VERSION!= \ awk -F. '{print $$1 * 10000 + $$2 * 100 + $$3}' \ ${SRCDIR}/contrib/gcc/BASE-VER || echo unknown .endif .export CROSS_COMPILER_FREEBSD_VERSION CROSS_COMPILER_VERSION .endif # !defined(CROSS_COMPILER_FREEBSD_VERSION) .if ${COMPILER_VERSION} == ${CROSS_COMPILER_VERSION} && \ ${COMPILER_FREEBSD_VERSION} == ${CROSS_COMPILER_FREEBSD_VERSION} # Everything matches, disable the bootstrap compiler. MK_CLANG_BOOTSTRAP= no MK_GCC_BOOTSTRAP= no .if make(buildworld) .info SYSTEM_COMPILER: Determined that CC=${CC} matches the source tree. Not bootstrapping a cross-compiler. .endif .endif # ${COMPILER_VERSION} == ${CROSS_COMPILER_VERSION} .endif # ${_expected_compiler_type} == ${COMPILER_TYPE} .endif # ${XCC:N${CCACHE_BIN}:M/*} # For installworld need to ensure that the looked-up compiler metadata is # passed along rather than trying to run cc from the restricted # STRICTTMPPATH. .if ${MK_CLANG_BOOTSTRAP} == "no" && ${MK_GCC_BOOTSTRAP} == "no" .if !defined(X_COMPILER_TYPE) CROSSENV+= COMPILER_VERSION=${COMPILER_VERSION} \ COMPILER_TYPE=${COMPILER_TYPE} \ COMPILER_FREEBSD_VERSION=${COMPILER_FREEBSD_VERSION} .else CROSSENV+= COMPILER_VERSION=${X_COMPILER_VERSION} \ COMPILER_TYPE=${X_COMPILER_TYPE} \ COMPILER_FREEBSD_VERSION=${X_COMPILER_FREEBSD_VERSION} .endif .endif # Handle external binutils. .if defined(CROSS_TOOLCHAIN_PREFIX) CROSS_BINUTILS_PREFIX?=${CROSS_TOOLCHAIN_PREFIX} .endif # If we do not have a bootstrap binutils (because the in-tree one does not # support the target architecture), provide a default cross-binutils prefix. # This allows aarch64 builds, for example, to automatically use the # aarch64-binutils port or package. .if !make(showconfig) .if !empty(BROKEN_OPTIONS:MBINUTILS_BOOTSTRAP) && \ !defined(CROSS_BINUTILS_PREFIX) CROSS_BINUTILS_PREFIX=/usr/local/${TARGET_ARCH}-freebsd/bin/ .if !exists(${CROSS_BINUTILS_PREFIX}) .error In-tree binutils does not support the ${TARGET_ARCH} architecture. Install the ${TARGET_ARCH}-binutils port or package or set CROSS_BINUTILS_PREFIX. .endif .endif .endif XBINUTILS= AS AR LD NM OBJCOPY OBJDUMP RANLIB SIZE STRINGS .for BINUTIL in ${XBINUTILS} .if defined(CROSS_BINUTILS_PREFIX) && \ exists(${CROSS_BINUTILS_PREFIX}${${BINUTIL}}) X${BINUTIL}?= ${CROSS_BINUTILS_PREFIX}${${BINUTIL}} .else X${BINUTIL}?= ${${BINUTIL}} .endif .endfor # We must do lib/ and libexec/ before bin/ in case of a mid-install error to # keep the users system reasonably usable. For static->dynamic root upgrades, # we don't want to install a dynamic binary without rtld and the needed # libraries. More commonly, for dynamic root, we don't want to install a # binary that requires a newer library version that hasn't been installed yet. # This ordering is not a guarantee though. The only guarantee of a working # system here would require fine-grained ordering of all components based # on their dependencies. .if !empty(SUBDIR_OVERRIDE) SUBDIR= ${SUBDIR_OVERRIDE} .else SUBDIR= lib libexec .if !defined(NO_ROOT) && (make(installworld) || make(install)) # Ensure libraries are installed before progressing. SUBDIR+=.WAIT .endif SUBDIR+=bin .if ${MK_CDDL} != "no" SUBDIR+=cddl .endif SUBDIR+=gnu include .if ${MK_KERBEROS} != "no" SUBDIR+=kerberos5 .endif .if ${MK_RESCUE} != "no" SUBDIR+=rescue .endif SUBDIR+=sbin .if ${MK_CRYPT} != "no" SUBDIR+=secure .endif .if !defined(NO_SHARE) SUBDIR+=share .endif SUBDIR+=sys usr.bin usr.sbin .if ${MK_TESTS} != "no" SUBDIR+= tests .endif .if ${MK_OFED} != "no" SUBDIR+=contrib/ofed .endif # Local directories are last, since it is nice to at least get the base # system rebuilt before you do them. .for _DIR in ${LOCAL_DIRS} .if exists(${.CURDIR}/${_DIR}/Makefile) SUBDIR+= ${_DIR} .endif .endfor # Add LOCAL_LIB_DIRS, but only if they will not be picked up as a SUBDIR # of a LOCAL_DIRS directory. This allows LOCAL_DIRS=foo and # LOCAL_LIB_DIRS=foo/lib to behave as expected. .for _DIR in ${LOCAL_DIRS:M*/} ${LOCAL_DIRS:N*/:S|$|/|} _REDUNDENT_LIB_DIRS+= ${LOCAL_LIB_DIRS:M${_DIR}*} .endfor .for _DIR in ${LOCAL_LIB_DIRS} .if empty(_REDUNDENT_LIB_DIRS:M${_DIR}) && exists(${.CURDIR}/${_DIR}/Makefile) SUBDIR+= ${_DIR} .else .warning ${_DIR} not added to SUBDIR list. See UPDATING 20141121. .endif .endfor # We must do etc/ last as it hooks into building the man whatis file # by calling 'makedb' in share/man. This is only relevant for # install/distribute so they build the whatis file after every manpage is # installed. .if make(installworld) || make(install) SUBDIR+=.WAIT .endif SUBDIR+=etc .endif # !empty(SUBDIR_OVERRIDE) .if defined(NOCLEAN) .warning NOCLEAN option is deprecated. Use NO_CLEAN instead. NO_CLEAN= ${NOCLEAN} .endif .if defined(NO_CLEANDIR) CLEANDIR= clean cleandepend .else CLEANDIR= cleandir .endif .if ${MK_META_MODE} == "yes" # If filemon is used then we can rely on the build being incremental-safe. # The .meta files will also track the build command and rebuild should # it change. .if empty(.MAKE.MODE:Mnofilemon) NO_CLEAN= t .endif .endif LOCAL_TOOL_DIRS?= PACKAGEDIR?= ${DESTDIR}/${DISTDIR} .if empty(SHELL:M*csh*) BUILDENV_SHELL?=${SHELL} .else BUILDENV_SHELL?=/bin/sh .endif .if !defined(SVN) || empty(SVN) . for _P in /usr/bin /usr/local/bin . for _S in svn svnlite . if exists(${_P}/${_S}) SVN= ${_P}/${_S} . endif . endfor . endfor .endif SVNFLAGS?= -r HEAD MAKEOBJDIRPREFIX?= /usr/obj .if !defined(OSRELDATE) .if exists(/usr/include/osreldate.h) OSRELDATE!= awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \ /usr/include/osreldate.h .else OSRELDATE= 0 .endif .export OSRELDATE .endif # Set VERSION for CTFMERGE to use via the default CTFFLAGS=-L VERSION. .if !defined(_REVISION) _REVISION!= MK_AUTO_OBJ=no ${MAKE} -C ${SRCDIR}/release -V REVISION .export _REVISION .endif .if !defined(_BRANCH) _BRANCH!= MK_AUTO_OBJ=no ${MAKE} -C ${SRCDIR}/release -V BRANCH .export _BRANCH .endif .if !defined(SRCRELDATE) SRCRELDATE!= awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \ ${SRCDIR}/sys/sys/param.h .export SRCRELDATE .endif .if !defined(VERSION) VERSION= FreeBSD ${_REVISION}-${_BRANCH:C/-p[0-9]+$//} ${TARGET_ARCH} ${SRCRELDATE} .export VERSION .endif .if !defined(PKG_VERSION) .if ${_BRANCH:MSTABLE*} || ${_BRANCH:MCURRENT*} || ${_BRANCH:MALPHA*} TIMENOW= %Y%m%d%H%M%S EXTRA_REVISION= .s${TIMENOW:gmtime} .endif .if ${_BRANCH:M*-p*} EXTRA_REVISION= _${_BRANCH:C/.*-p([0-9]+$)/\1/} .endif PKG_VERSION= ${_REVISION}${EXTRA_REVISION} .endif KNOWN_ARCHES?= aarch64/arm64 \ amd64 \ arm \ armeb/arm \ armv6/arm \ i386 \ i386/pc98 \ mips \ mipsel/mips \ mips64el/mips \ mipsn32el/mips \ mips64/mips \ mipsn32/mips \ powerpc \ powerpc64/powerpc \ riscv64/riscv \ sparc64 .if ${TARGET} == ${TARGET_ARCH} _t= ${TARGET} .else _t= ${TARGET_ARCH}/${TARGET} .endif .for _t in ${_t} .if empty(KNOWN_ARCHES:M${_t}) .error Unknown target ${TARGET_ARCH}:${TARGET}. .endif .endfor .if ${TARGET} == ${MACHINE} TARGET_CPUTYPE?=${CPUTYPE} .else TARGET_CPUTYPE?= .endif .if !empty(TARGET_CPUTYPE) _TARGET_CPUTYPE=${TARGET_CPUTYPE} .else _TARGET_CPUTYPE=dummy .endif _CPUTYPE!= MK_AUTO_OBJ=no MAKEFLAGS= CPUTYPE=${_TARGET_CPUTYPE} ${MAKE} \ -f /dev/null -m ${.CURDIR}/share/mk -V CPUTYPE .if ${_CPUTYPE} != ${_TARGET_CPUTYPE} .error CPUTYPE global should be set with ?=. .endif .if make(buildworld) BUILD_ARCH!= uname -p .if ${MACHINE_ARCH} != ${BUILD_ARCH} .error To cross-build, set TARGET_ARCH. .endif .endif .if ${MACHINE} == ${TARGET} && ${MACHINE_ARCH} == ${TARGET_ARCH} && !defined(CROSS_BUILD_TESTING) OBJTREE= ${MAKEOBJDIRPREFIX} .else OBJTREE= ${MAKEOBJDIRPREFIX}/${TARGET}.${TARGET_ARCH} .endif WORLDTMP= ${OBJTREE}${.CURDIR}/tmp BPATH= ${WORLDTMP}/legacy/usr/sbin:${WORLDTMP}/legacy/usr/bin:${WORLDTMP}/legacy/bin XPATH= ${WORLDTMP}/usr/sbin:${WORLDTMP}/usr/bin STRICTTMPPATH= ${BPATH}:${XPATH} TMPPATH= ${STRICTTMPPATH}:${PATH} # # Avoid running mktemp(1) unless actually needed. # It may not be functional, e.g., due to new ABI # when in the middle of installing over this system. # .if make(distributeworld) || make(installworld) || make(stageworld) INSTALLTMP!= /usr/bin/mktemp -d -u -t install .endif .if make(stagekernel) || make(distributekernel) TAGS+= kernel PACKAGE= kernel .endif # # Building a world goes through the following stages # # 1. legacy stage [BMAKE] # This stage is responsible for creating compatibility # shims that are needed by the bootstrap-tools, # build-tools and cross-tools stages. These are generally # APIs that tools from one of those three stages need to # build that aren't present on the host. # 1. bootstrap-tools stage [BMAKE] # This stage is responsible for creating programs that # are needed for backward compatibility reasons. They # are not built as cross-tools. # 2. build-tools stage [TMAKE] # This stage is responsible for creating the object # tree and building any tools that are needed during # the build process. Some programs are listed during # this phase because they build binaries to generate # files needed to build these programs. This stage also # builds the 'build-tools' target rather than 'all'. # 3. cross-tools stage [XMAKE] # This stage is responsible for creating any tools that # are needed for building the system. A cross-compiler is one # of them. This differs from build tools in two ways: # 1. the 'all' target is built rather than 'build-tools' # 2. these tools are installed into TMPPATH for stage 4. # 4. world stage [WMAKE] # This stage actually builds the world. # 5. install stage (optional) [IMAKE] # This stage installs a previously built world. # BOOTSTRAPPING?= 0 # Keep these in sync MINIMUM_SUPPORTED_OSREL?= 900044 MINIMUM_SUPPORTED_REL?= 9.1 # Common environment for world related stages CROSSENV+= MAKEOBJDIRPREFIX=${OBJTREE} \ MACHINE_ARCH=${TARGET_ARCH} \ MACHINE=${TARGET} \ CPUTYPE=${TARGET_CPUTYPE} .if ${MK_META_MODE} != "no" # Don't rebuild build-tools targets during normal build. CROSSENV+= BUILD_TOOLS_META=.NOMETA_CMP .endif .if ${MK_GROFF} != "no" CROSSENV+= GROFF_BIN_PATH=${WORLDTMP}/legacy/usr/bin \ GROFF_FONT_PATH=${WORLDTMP}/legacy/usr/share/groff_font \ GROFF_TMAC_PATH=${WORLDTMP}/legacy/usr/share/tmac .endif .if defined(TARGET_CFLAGS) CROSSENV+= ${TARGET_CFLAGS} .endif # bootstrap-tools stage BMAKEENV= INSTALL="sh ${.CURDIR}/tools/install.sh" \ TOOLS_PREFIX=${WORLDTMP} \ PATH=${BPATH}:${PATH} \ WORLDTMP=${WORLDTMP} \ MAKEFLAGS="-m ${.CURDIR}/tools/build/mk ${.MAKEFLAGS}" # need to keep this in sync with targets/pseudo/bootstrap-tools/Makefile BSARGS= DESTDIR= \ BOOTSTRAPPING=${OSRELDATE} \ SSP_CFLAGS= \ MK_HTML=no NO_LINT=yes MK_MAN=no \ -DNO_PIC MK_PROFILE=no -DNO_SHARED \ -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \ MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \ MK_LLDB=no MK_TESTS=no \ MK_INCLUDES=yes BMAKE= MAKEOBJDIRPREFIX=${WORLDTMP} \ ${BMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \ ${BSARGS} # build-tools stage TMAKE= MAKEOBJDIRPREFIX=${OBJTREE} \ ${BMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \ TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \ DESTDIR= \ BOOTSTRAPPING=${OSRELDATE} \ SSP_CFLAGS= \ -DNO_LINT \ -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \ MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \ MK_LLDB=no MK_TESTS=no # cross-tools stage XMAKE= TOOLS_PREFIX=${WORLDTMP} ${BMAKE} \ TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \ MK_GDB=no MK_TESTS=no # kernel-tools stage KTMAKEENV= INSTALL="sh ${.CURDIR}/tools/install.sh" \ PATH=${BPATH}:${PATH} \ WORLDTMP=${WORLDTMP} KTMAKE= TOOLS_PREFIX=${WORLDTMP} MAKEOBJDIRPREFIX=${WORLDTMP} \ ${KTMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \ DESTDIR= \ BOOTSTRAPPING=${OSRELDATE} \ SSP_CFLAGS= \ MK_HTML=no -DNO_LINT MK_MAN=no \ -DNO_PIC MK_PROFILE=no -DNO_SHARED \ -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no # world stage WMAKEENV= ${CROSSENV} \ INSTALL="sh ${.CURDIR}/tools/install.sh" \ PATH=${TMPPATH} # make hierarchy HMAKE= PATH=${TMPPATH} ${MAKE} LOCAL_MTREE=${LOCAL_MTREE:Q} .if defined(NO_ROOT) HMAKE+= PATH=${TMPPATH} METALOG=${METALOG} -DNO_ROOT .endif CROSSENV+= CC="${XCC} ${XCFLAGS}" CXX="${XCXX} ${XCXXFLAGS} ${XCFLAGS}" \ CPP="${XCPP} ${XCFLAGS}" \ AS="${XAS}" AR="${XAR}" LD="${XLD}" NM=${XNM} \ OBJDUMP=${XOBJDUMP} OBJCOPY="${XOBJCOPY}" \ RANLIB=${XRANLIB} STRINGS=${XSTRINGS} \ SIZE="${XSIZE}" .if defined(CROSS_BINUTILS_PREFIX) && exists(${CROSS_BINUTILS_PREFIX}) # In the case of xdev-build tools, CROSS_BINUTILS_PREFIX won't be a # directory, but the compiler will look in the right place for its # tools so we don't need to tell it where to look. BFLAGS+= -B${CROSS_BINUTILS_PREFIX} .endif # External compiler needs sysroot and target flags. .if ${MK_CROSS_COMPILER} == "no" || \ (${MK_CLANG_BOOTSTRAP} == "no" && ${MK_GCC_BOOTSTRAP} == "no") .if !defined(CROSS_BINUTILS_PREFIX) || !exists(${CROSS_BINUTILS_PREFIX}) BFLAGS+= -B${WORLDTMP}/usr/bin .endif .if ${TARGET} == "arm" .if ${TARGET_ARCH:Marmv6*} != "" && ${TARGET_CPUTYPE:M*soft*} == "" TARGET_ABI= gnueabihf .else TARGET_ABI= gnueabi .endif .endif .if defined(X_COMPILER_TYPE) && ${X_COMPILER_TYPE} == gcc # GCC requires -isystem and -L when using a cross-compiler. --sysroot # won't set header path and -L is used to ensure the base library path # is added before the port PREFIX library path. XCFLAGS+= -isystem ${WORLDTMP}/usr/include -L${WORLDTMP}/usr/lib # Force using libc++ for external GCC. # XXX: This should be checking MK_GNUCXX == no +.if ${X_COMPILER_VERSION} >= 40800 XCXXFLAGS+= -isystem ${WORLDTMP}/usr/include/c++/v1 -std=c++11 \ -nostdinc++ -L${WORLDTMP}/../lib/libc++ +.endif .else TARGET_ABI?= unknown TARGET_TRIPLE?= ${TARGET_ARCH:C/amd64/x86_64/}-${TARGET_ABI}-freebsd11.0 XCFLAGS+= -target ${TARGET_TRIPLE} .endif XCFLAGS+= --sysroot=${WORLDTMP} .endif # ${MK_CROSS_COMPILER} == "no" .if !empty(BFLAGS) XCFLAGS+= ${BFLAGS} .endif .if ${MK_LIB32} != "no" && (${TARGET_ARCH} == "amd64" || \ ${TARGET_ARCH} == "powerpc64") LIBCOMPAT= 32 .include "Makefile.libcompat" .elif ${MK_LIBSOFT} != "no" && ${TARGET_ARCH} == "armv6" LIBCOMPAT= SOFT .include "Makefile.libcompat" .endif WMAKE= ${WMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 DESTDIR=${WORLDTMP} IMAKEENV= ${CROSSENV} IMAKE= ${IMAKEENV} ${MAKE} -f Makefile.inc1 \ ${IMAKE_INSTALL} ${IMAKE_MTREE} .if empty(.MAKEFLAGS:M-n) IMAKEENV+= PATH=${STRICTTMPPATH}:${INSTALLTMP} \ LD_LIBRARY_PATH=${INSTALLTMP} \ PATH_LOCALE=${INSTALLTMP}/locale IMAKE+= __MAKE_SHELL=${INSTALLTMP}/sh .else IMAKEENV+= PATH=${TMPPATH}:${INSTALLTMP} .endif .if defined(DB_FROM_SRC) INSTALLFLAGS+= -N ${.CURDIR}/etc MTREEFLAGS+= -N ${.CURDIR}/etc .endif _INSTALL_DDIR= ${DESTDIR}/${DISTDIR} INSTALL_DDIR= ${_INSTALL_DDIR:S://:/:g:C:/$::} .if defined(NO_ROOT) METALOG?= ${DESTDIR}/${DISTDIR}/METALOG IMAKE+= -DNO_ROOT METALOG=${METALOG} INSTALLFLAGS+= -U -M ${METALOG} -D ${INSTALL_DDIR} MTREEFLAGS+= -W .endif .if defined(BUILD_PKGS) INSTALLFLAGS+= -h sha256 .endif .if defined(DB_FROM_SRC) || defined(NO_ROOT) IMAKE_INSTALL= INSTALL="install ${INSTALLFLAGS}" IMAKE_MTREE= MTREE_CMD="mtree ${MTREEFLAGS}" .endif # kernel stage KMAKEENV= ${WMAKEENV} KMAKE= ${KMAKEENV} ${MAKE} ${.MAKEFLAGS} ${KERNEL_FLAGS} KERNEL=${INSTKERNNAME} # # buildworld # # Attempt to rebuild the entire system, with reasonable chance of # success, regardless of how old your existing system is. # _worldtmp: .PHONY .if ${.CURDIR:C/[^,]//g} != "" # The m4 build of sendmail files doesn't like it if ',' is used # anywhere in the path of it's files. @echo @echo "*** Error: path to source tree contains a comma ','" @echo false .endif @echo @echo "--------------------------------------------------------------" @echo ">>> Rebuilding the temporary build tree" @echo "--------------------------------------------------------------" .if !defined(NO_CLEAN) rm -rf ${WORLDTMP} .if defined(LIBCOMPAT) rm -rf ${LIBCOMPATTMP} .endif .else rm -rf ${WORLDTMP}/legacy/usr/include # XXX - These can depend on any header file. rm -f ${OBJTREE}${.CURDIR}/lib/libsysdecode/ioctl.c rm -f ${OBJTREE}${.CURDIR}/usr.bin/kdump/kdump_subr.c .endif .for _dir in \ lib lib/casper usr legacy/bin legacy/usr mkdir -p ${WORLDTMP}/${_dir} .endfor mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${WORLDTMP}/legacy/usr >/dev/null .if ${MK_GROFF} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.groff.dist \ -p ${WORLDTMP}/legacy/usr >/dev/null .endif mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${WORLDTMP}/legacy/usr/include >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${WORLDTMP}/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${WORLDTMP}/usr/include >/dev/null ln -sf ${.CURDIR}/sys ${WORLDTMP} .if ${MK_DEBUG_FILES} != "no" # We could instead disable debug files for these build stages mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${WORLDTMP}/legacy/usr/lib >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${WORLDTMP}/usr/lib >/dev/null .endif .if defined(LIBCOMPAT) mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${WORLDTMP}/usr >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${WORLDTMP}/legacy/usr/lib/debug/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${WORLDTMP}/usr/lib/debug/usr >/dev/null .endif .endif .if ${MK_TESTS} != "no" mkdir -p ${WORLDTMP}${TESTSBASE} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${WORLDTMP}${TESTSBASE} >/dev/null .if ${MK_DEBUG_FILES} != "no" mkdir -p ${WORLDTMP}/usr/lib/debug/${TESTSBASE} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${WORLDTMP}/usr/lib/debug/${TESTSBASE} >/dev/null .endif .endif .for _mtree in ${LOCAL_MTREE} mtree -deU -f ${.CURDIR}/${_mtree} -p ${WORLDTMP} > /dev/null .endfor _legacy: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 1.1: legacy release compatibility shims" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${BMAKE} legacy _bootstrap-tools: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 1.2: bootstrap tools" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${BMAKE} bootstrap-tools _cleanobj: .if !defined(NO_CLEAN) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.1: cleaning up the object tree" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${WMAKE} ${CLEANDIR} .if defined(LIBCOMPAT) ${_+_}cd ${.CURDIR}; ${LIBCOMPATWMAKE} -f Makefile.inc1 ${CLEANDIR} .endif .endif _obj: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.2: rebuilding the object tree" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${WMAKE} obj _build-tools: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.3: build tools" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${TMAKE} build-tools _cross-tools: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 3: cross tools" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${XMAKE} cross-tools ${_+_}cd ${.CURDIR}; ${XMAKE} kernel-tools _includes: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 4.1: building includes" @echo "--------------------------------------------------------------" # Special handling for SUBDIR_OVERRIDE in buildworld as they most likely need # headers from default SUBDIR. Do SUBDIR_OVERRIDE includes last. ${_+_}cd ${.CURDIR}; ${WMAKE} SUBDIR_OVERRIDE= SHARED=symlinks \ MK_INCLUDES=yes includes .if !empty(SUBDIR_OVERRIDE) && make(buildworld) ${_+_}cd ${.CURDIR}; ${WMAKE} MK_INCLUDES=yes SHARED=symlinks includes .endif _libraries: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 4.2: building libraries" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; \ ${WMAKE} -DNO_FSCHG MK_HTML=no -DNO_LINT MK_MAN=no \ MK_PROFILE=no MK_TESTS=no MK_TESTS_SUPPORT=${MK_TESTS} libraries everything: .PHONY @echo @echo "--------------------------------------------------------------" @echo ">>> stage 4.3: building everything" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; _PARALLEL_SUBDIR_OK=1 ${WMAKE} all WMAKE_TGTS= WMAKE_TGTS+= _worldtmp _legacy .if empty(SUBDIR_OVERRIDE) WMAKE_TGTS+= _bootstrap-tools .endif WMAKE_TGTS+= _cleanobj _obj _build-tools _cross-tools WMAKE_TGTS+= _includes _libraries WMAKE_TGTS+= everything .if defined(LIBCOMPAT) && empty(SUBDIR_OVERRIDE) WMAKE_TGTS+= build${libcompat} .endif buildworld: buildworld_prologue ${WMAKE_TGTS} buildworld_epilogue .PHONY .ORDER: buildworld_prologue ${WMAKE_TGTS} buildworld_epilogue buildworld_prologue: .PHONY @echo "--------------------------------------------------------------" @echo ">>> World build started on `LC_ALL=C date`" @echo "--------------------------------------------------------------" buildworld_epilogue: .PHONY @echo @echo "--------------------------------------------------------------" @echo ">>> World build completed on `LC_ALL=C date`" @echo "--------------------------------------------------------------" # # We need to have this as a target because the indirection between Makefile # and Makefile.inc1 causes the correct PATH to be used, rather than a # modification of the current environment's PATH. In addition, we need # to quote multiword values. # buildenvvars: .PHONY @echo ${WMAKEENV:Q} ${.MAKE.EXPORTED:@v@$v=\"${$v}\"@} .if ${.TARGETS:Mbuildenv} .if ${.MAKEFLAGS:M-j} .error The buildenv target is incompatible with -j .endif .endif BUILDENV_DIR?= ${.CURDIR} buildenv: .PHONY @echo Entering world for ${TARGET_ARCH}:${TARGET} .if ${BUILDENV_SHELL:M*zsh*} @echo For ZSH you must run: export CPUTYPE=${TARGET_CPUTYPE} .endif @cd ${BUILDENV_DIR} && env ${WMAKEENV} BUILDENV=1 ${BUILDENV_SHELL} \ || true TOOLCHAIN_TGTS= ${WMAKE_TGTS:Neverything:Nbuild${libcompat}} toolchain: ${TOOLCHAIN_TGTS} .PHONY kernel-toolchain: ${TOOLCHAIN_TGTS:N_includes:N_libraries} .PHONY # # installcheck # # Checks to be sure system is ready for installworld/installkernel. # installcheck: _installcheck_world _installcheck_kernel .PHONY _installcheck_world: .PHONY _installcheck_kernel: .PHONY # # Require DESTDIR to be set if installing for a different architecture or # using the user/group database in the source tree. # .if ${TARGET_ARCH} != ${MACHINE_ARCH} || ${TARGET} != ${MACHINE} || \ defined(DB_FROM_SRC) .if !make(distributeworld) _installcheck_world: __installcheck_DESTDIR _installcheck_kernel: __installcheck_DESTDIR __installcheck_DESTDIR: .PHONY .if !defined(DESTDIR) || empty(DESTDIR) @echo "ERROR: Please set DESTDIR!"; \ false .endif .endif .endif .if !defined(DB_FROM_SRC) # # Check for missing UIDs/GIDs. # CHECK_UIDS= auditdistd CHECK_GIDS= audit .if ${MK_SENDMAIL} != "no" CHECK_UIDS+= smmsp CHECK_GIDS+= smmsp .endif .if ${MK_PF} != "no" CHECK_UIDS+= proxy CHECK_GIDS+= proxy authpf .endif .if ${MK_UNBOUND} != "no" CHECK_UIDS+= unbound CHECK_GIDS+= unbound .endif _installcheck_world: __installcheck_UGID __installcheck_UGID: .PHONY .for uid in ${CHECK_UIDS} @if ! `id -u ${uid} >/dev/null 2>&1`; then \ echo "ERROR: Required ${uid} user is missing, see /usr/src/UPDATING."; \ false; \ fi .endfor .for gid in ${CHECK_GIDS} @if ! `find / -prune -group ${gid} >/dev/null 2>&1`; then \ echo "ERROR: Required ${gid} group is missing, see /usr/src/UPDATING."; \ false; \ fi .endfor .endif # # Required install tools to be saved in a scratch dir for safety. # .if ${MK_ZONEINFO} != "no" _zoneinfo= zic tzsetup .endif ITOOLS= [ awk cap_mkdb cat chflags chmod chown cmp cp \ date echo egrep find grep id install ${_install-info} \ ln make mkdir mtree mv pwd_mkdb \ rm sed services_mkdb sh strip sysctl test true uname wc ${_zoneinfo} \ ${LOCAL_ITOOLS} # Needed for share/man .if ${MK_MAN} != "no" ITOOLS+=makewhatis .endif # # distributeworld # # Distributes everything compiled by a `buildworld'. # # installworld # # Installs everything compiled by a 'buildworld'. # # Non-base distributions produced by the base system EXTRA_DISTRIBUTIONS= doc .if defined(LIBCOMPAT) EXTRA_DISTRIBUTIONS+= lib${libcompat} .endif .if ${MK_TESTS} != "no" EXTRA_DISTRIBUTIONS+= tests .endif DEBUG_DISTRIBUTIONS= .if ${MK_DEBUG_FILES} != "no" DEBUG_DISTRIBUTIONS+= base ${EXTRA_DISTRIBUTIONS:S,doc,,:S,tests,,} .endif MTREE_MAGIC?= mtree 2.0 distributeworld installworld stageworld: _installcheck_world .PHONY mkdir -p ${INSTALLTMP} progs=$$(for prog in ${ITOOLS}; do \ if progpath=`which $$prog`; then \ echo $$progpath; \ else \ echo "Required tool $$prog not found in PATH." >&2; \ exit 1; \ fi; \ done); \ libs=$$(ldd -f "%o %p\n" -f "%o %p\n" $$progs 2>/dev/null | sort -u | \ while read line; do \ set -- $$line; \ if [ "$$2 $$3" != "not found" ]; then \ echo $$2; \ else \ echo "Required library $$1 not found." >&2; \ exit 1; \ fi; \ done); \ cp $$libs $$progs ${INSTALLTMP} cp -R $${PATH_LOCALE:-"/usr/share/locale"} ${INSTALLTMP}/locale .if defined(NO_ROOT) -mkdir -p ${METALOG:H} echo "#${MTREE_MAGIC}" > ${METALOG} .endif .if make(distributeworld) .for dist in ${EXTRA_DISTRIBUTIONS} -mkdir ${DESTDIR}/${DISTDIR}/${dist} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.root.dist \ -p ${DESTDIR}/${DISTDIR}/${dist} >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr/include >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib >/dev/null .endif .if defined(LIBCOMPAT) mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib/debug/usr >/dev/null .endif .endif .if ${MK_TESTS} != "no" && ${dist} == "tests" -mkdir -p ${DESTDIR}/${DISTDIR}/${dist}${TESTSBASE} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}${TESTSBASE} >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib/debug/${TESTSBASE} >/dev/null .endif .endif .if defined(NO_ROOT) ${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.root.dist | \ sed -e 's#^\./#./${dist}/#' >> ${METALOG} ${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.usr.dist | \ sed -e 's#^\./#./${dist}/usr/#' >> ${METALOG} ${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.include.dist | \ sed -e 's#^\./#./${dist}/usr/include/#' >> ${METALOG} .if defined(LIBCOMPAT) ${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist | \ sed -e 's#^\./#./${dist}/usr/#' >> ${METALOG} .endif .endif .endfor -mkdir ${DESTDIR}/${DISTDIR}/base ${_+_}cd ${.CURDIR}/etc; ${CROSSENV} PATH=${TMPPATH} ${MAKE} \ METALOG=${METALOG} ${IMAKE_INSTALL} ${IMAKE_MTREE} \ DISTBASE=/base DESTDIR=${DESTDIR}/${DISTDIR}/base \ LOCAL_MTREE=${LOCAL_MTREE:Q} distrib-dirs .endif ${_+_}cd ${.CURDIR}; ${IMAKE} re${.TARGET:S/world$//}; \ ${IMAKEENV} rm -rf ${INSTALLTMP} .if make(distributeworld) .for dist in ${EXTRA_DISTRIBUTIONS} find ${DESTDIR}/${DISTDIR}/${dist} -mindepth 1 -empty -delete .endfor .if defined(NO_ROOT) .for dist in base ${EXTRA_DISTRIBUTIONS} @# For each file that exists in this dist, print the corresponding @# line from the METALOG. This relies on the fact that @# a line containing only the filename will sort immediately before @# the relevant mtree line. cd ${DESTDIR}/${DISTDIR}; \ find ./${dist} | sort -u ${METALOG} - | \ awk 'BEGIN { print "#${MTREE_MAGIC}" } !/ type=/ { file = $$1 } / type=/ { if ($$1 == file) { sub(/^\.\/${dist}\//, "./"); print } }' > \ ${DESTDIR}/${DISTDIR}/${dist}.meta .endfor .for dist in ${DEBUG_DISTRIBUTIONS} @# For each file that exists in this dist, print the corresponding @# line from the METALOG. This relies on the fact that @# a line containing only the filename will sort immediately before @# the relevant mtree line. cd ${DESTDIR}/${DISTDIR}; \ find ./${dist}/usr/lib/debug | sort -u ${METALOG} - | \ awk 'BEGIN { print "#${MTREE_MAGIC}" } !/ type=/ { file = $$1 } / type=/ { if ($$1 == file) { sub(/^\.\/${dist}\//, "./"); print } }' > \ ${DESTDIR}/${DISTDIR}/${dist}.debug.meta .endfor .endif .endif packageworld: .PHONY .for dist in base ${EXTRA_DISTRIBUTIONS} .if defined(NO_ROOT) ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ tar cvf - --exclude usr/lib/debug \ @${DESTDIR}/${DISTDIR}/${dist}.meta | \ ${XZ_CMD} > ${PACKAGEDIR}/${dist}.txz .else ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ tar cvf - --exclude usr/lib/debug . | \ ${XZ_CMD} > ${PACKAGEDIR}/${dist}.txz .endif .endfor .for dist in ${DEBUG_DISTRIBUTIONS} . if defined(NO_ROOT) ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ tar cvf - @${DESTDIR}/${DISTDIR}/${dist}.debug.meta | \ ${XZ_CMD} > ${PACKAGEDIR}/${dist}-dbg.txz . else ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ tar cvLf - usr/lib/debug | \ ${XZ_CMD} > ${PACKAGEDIR}/${dist}-dbg.txz . endif .endfor # # reinstall # # If you have a build server, you can NFS mount the source and obj directories # and do a 'make reinstall' on the *client* to install new binaries from the # most recent server build. # restage reinstall: .MAKE .PHONY @echo "--------------------------------------------------------------" @echo ">>> Making hierarchy" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 \ LOCAL_MTREE=${LOCAL_MTREE:Q} hierarchy .if make(restage) @echo "--------------------------------------------------------------" @echo ">>> Making distribution" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 \ LOCAL_MTREE=${LOCAL_MTREE:Q} distribution .endif @echo @echo "--------------------------------------------------------------" @echo ">>> Installing everything" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 install .if defined(LIBCOMPAT) ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 install${libcompat} .endif redistribute: .MAKE .PHONY @echo "--------------------------------------------------------------" @echo ">>> Distributing everything" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 distribute .if defined(LIBCOMPAT) ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 distribute${libcompat} \ DISTRIBUTION=lib${libcompat} .endif distrib-dirs distribution: .MAKE .PHONY ${_+_}cd ${.CURDIR}/etc; ${CROSSENV} PATH=${TMPPATH} ${MAKE} \ ${IMAKE_INSTALL} ${IMAKE_MTREE} METALOG=${METALOG} ${.TARGET} .if make(distribution) ${_+_}cd ${.CURDIR}; ${CROSSENV} PATH=${TMPPATH} \ ${MAKE} -f Makefile.inc1 ${IMAKE_INSTALL} \ METALOG=${METALOG} MK_TESTS=no installconfig .endif # # buildkernel and installkernel # # Which kernels to build and/or install is specified by setting # KERNCONF. If not defined a GENERIC kernel is built/installed. # Only the existing (depending TARGET) config files are used # for building kernels and only the first of these is designated # as the one being installed. # # Note that we have to use TARGET instead of TARGET_ARCH when # we're in kernel-land. Since only TARGET_ARCH is (expected) to # be set to cross-build, we have to make sure TARGET is set # properly. .if defined(KERNFAST) NO_KERNELCLEAN= t NO_KERNELCONFIG= t NO_KERNELOBJ= t # Shortcut for KERNCONF=Blah -DKERNFAST is now KERNFAST=Blah .if !defined(KERNCONF) && ${KERNFAST} != "1" KERNCONF=${KERNFAST} .endif .endif .if ${TARGET_ARCH} == "powerpc64" KERNCONF?= GENERIC64 .else KERNCONF?= GENERIC .endif INSTKERNNAME?= kernel KERNSRCDIR?= ${.CURDIR}/sys KRNLCONFDIR= ${KERNSRCDIR}/${TARGET}/conf KRNLOBJDIR= ${OBJTREE}${KERNSRCDIR} KERNCONFDIR?= ${KRNLCONFDIR} BUILDKERNELS= INSTALLKERNEL= .if defined(NO_INSTALLKERNEL) # All of the BUILDKERNELS loops start at index 1. BUILDKERNELS+= dummy .endif .for _kernel in ${KERNCONF} .if exists(${KERNCONFDIR}/${_kernel}) BUILDKERNELS+= ${_kernel} .if empty(INSTALLKERNEL) && !defined(NO_INSTALLKERNEL) INSTALLKERNEL= ${_kernel} .endif .endif .endfor ${WMAKE_TGTS:N_worldtmp:Nbuild${libcompat}} ${.ALLTARGETS:M_*:N_worldtmp}: .MAKE .PHONY # # buildkernel # # Builds all kernels defined by BUILDKERNELS. # buildkernel: .MAKE .PHONY .if empty(BUILDKERNELS:Ndummy) @echo "ERROR: Missing kernel configuration file(s) (${KERNCONF})."; \ false .endif @echo .for _kernel in ${BUILDKERNELS:Ndummy} @echo "--------------------------------------------------------------" @echo ">>> Kernel build for ${_kernel} started on `LC_ALL=C date`" @echo "--------------------------------------------------------------" @echo "===> ${_kernel}" mkdir -p ${KRNLOBJDIR} .if !defined(NO_KERNELCONFIG) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 1: configuring the kernel" @echo "--------------------------------------------------------------" cd ${KRNLCONFDIR}; \ PATH=${TMPPATH} \ config ${CONFIGARGS} -d ${KRNLOBJDIR}/${_kernel} \ -I '${KERNCONFDIR}' '${KERNCONFDIR}/${_kernel}' .endif .if !defined(NO_CLEAN) && !defined(NO_KERNELCLEAN) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.1: cleaning up the object tree" @echo "--------------------------------------------------------------" ${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} ${CLEANDIR} .endif .if !defined(NO_KERNELOBJ) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.2: rebuilding the object tree" @echo "--------------------------------------------------------------" ${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} obj .endif @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.3: build tools" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${KTMAKE} kernel-tools @echo @echo "--------------------------------------------------------------" @echo ">>> stage 3.1: building everything" @echo "--------------------------------------------------------------" ${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} all -DNO_MODULES_OBJ @echo "--------------------------------------------------------------" @echo ">>> Kernel build for ${_kernel} completed on `LC_ALL=C date`" @echo "--------------------------------------------------------------" .endfor NO_INSTALLEXTRAKERNELS?= yes # # installkernel, etc. # # Install the kernel defined by INSTALLKERNEL # installkernel installkernel.debug \ reinstallkernel reinstallkernel.debug: _installcheck_kernel .PHONY .if !defined(NO_INSTALLKERNEL) .if empty(INSTALLKERNEL) @echo "ERROR: No kernel \"${KERNCONF}\" to install."; \ false .endif @echo "--------------------------------------------------------------" @echo ">>> Installing kernel ${INSTALLKERNEL}" @echo "--------------------------------------------------------------" cd ${KRNLOBJDIR}/${INSTALLKERNEL}; \ ${CROSSENV} PATH=${TMPPATH} \ ${MAKE} ${IMAKE_INSTALL} KERNEL=${INSTKERNNAME} ${.TARGET:S/kernel//} .endif .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes" .for _kernel in ${BUILDKERNELS:[2..-1]} @echo "--------------------------------------------------------------" @echo ">>> Installing kernel ${_kernel}" @echo "--------------------------------------------------------------" cd ${KRNLOBJDIR}/${_kernel}; \ ${CROSSENV} PATH=${TMPPATH} \ ${MAKE} ${IMAKE_INSTALL} KERNEL=${INSTKERNNAME}.${_kernel} ${.TARGET:S/kernel//} .endfor .endif distributekernel distributekernel.debug: .PHONY .if !defined(NO_INSTALLKERNEL) .if empty(INSTALLKERNEL) @echo "ERROR: No kernel \"${KERNCONF}\" to install."; \ false .endif mkdir -p ${DESTDIR}/${DISTDIR} .if defined(NO_ROOT) @echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.premeta .endif cd ${KRNLOBJDIR}/${INSTALLKERNEL}; \ ${IMAKEENV} ${IMAKE_INSTALL:S/METALOG/kernel.premeta/} \ ${IMAKE_MTREE} PATH=${TMPPATH} ${MAKE} KERNEL=${INSTKERNNAME} \ DESTDIR=${INSTALL_DDIR}/kernel \ ${.TARGET:S/distributekernel/install/} .if defined(NO_ROOT) @sed -e 's|^./kernel|.|' ${DESTDIR}/${DISTDIR}/kernel.premeta > \ ${DESTDIR}/${DISTDIR}/kernel.meta .endif .endif .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes" .for _kernel in ${BUILDKERNELS:[2..-1]} .if defined(NO_ROOT) @echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta .endif cd ${KRNLOBJDIR}/${_kernel}; \ ${IMAKEENV} ${IMAKE_INSTALL:S/METALOG/kernel.${_kernel}.premeta/} \ ${IMAKE_MTREE} PATH=${TMPPATH} ${MAKE} \ KERNEL=${INSTKERNNAME}.${_kernel} \ DESTDIR=${INSTALL_DDIR}/kernel.${_kernel} \ ${.TARGET:S/distributekernel/install/} .if defined(NO_ROOT) @sed -e "s|^./kernel.${_kernel}|.|" \ ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta > \ ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta .endif .endfor .endif packagekernel: .PHONY .if defined(NO_ROOT) .if !defined(NO_INSTALLKERNEL) cd ${DESTDIR}/${DISTDIR}/kernel; \ tar cvf - --exclude '*.debug' \ @${DESTDIR}/${DISTDIR}/kernel.meta | \ ${XZ_CMD} > ${PACKAGEDIR}/kernel.txz .endif cd ${DESTDIR}/${DISTDIR}/kernel; \ tar cvf - --include '*/*/*.debug' \ @${DESTDIR}/${DISTDIR}/kernel.meta | \ ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel-dbg.txz .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes" .for _kernel in ${BUILDKERNELS:[2..-1]} cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ tar cvf - --exclude '*.debug' \ @${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta | \ ${XZ_CMD} > ${PACKAGEDIR}/kernel.${_kernel}.txz cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ tar cvf - --include '*/*/*.debug' \ @${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta | \ ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}-dbg.txz .endfor .endif .else .if !defined(NO_INSTALLKERNEL) cd ${DESTDIR}/${DISTDIR}/kernel; \ tar cvf - --exclude '*.debug' . | \ ${XZ_CMD} > ${PACKAGEDIR}/kernel.txz .endif cd ${DESTDIR}/${DISTDIR}/kernel; \ tar cvf - --include '*/*/*.debug' $$(eval find .) | \ ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel-dbg.txz .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes" .for _kernel in ${BUILDKERNELS:[2..-1]} cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ tar cvf - --exclude '*.debug' . | \ ${XZ_CMD} > ${PACKAGEDIR}/kernel.${_kernel}.txz cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ tar cvf - --include '*/*/*.debug' $$(eval find .) | \ ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}-dbg.txz .endfor .endif .endif stagekernel: .PHONY ${_+_}${MAKE} -C ${.CURDIR} ${.MAKEFLAGS} distributekernel PORTSDIR?= /usr/ports WSTAGEDIR?= ${MAKEOBJDIRPREFIX}${.CURDIR}/${TARGET}.${TARGET_ARCH}/worldstage KSTAGEDIR?= ${MAKEOBJDIRPREFIX}${.CURDIR}/${TARGET}.${TARGET_ARCH}/kernelstage REPODIR?= ${MAKEOBJDIRPREFIX}${.CURDIR}/repo PKGSIGNKEY?= # empty .ORDER: stage-packages create-packages .ORDER: create-packages create-world-packages .ORDER: create-packages create-kernel-packages .ORDER: create-packages sign-packages _pkgbootstrap: .PHONY .if !exists(${LOCALBASE}/sbin/pkg) @env ASSUME_ALWAYS_YES=YES pkg bootstrap .endif packages: .PHONY ${_+_}${MAKE} -C ${.CURDIR} PKG_VERSION=${PKG_VERSION} real-packages package-pkg: .PHONY rm -rf /tmp/ports.${TARGET} || : env ${WMAKEENV:Q} SRCDIR=${.CURDIR} PORTSDIR=${PORTSDIR} REVISION=${_REVISION} \ PKG_VERSION=${PKG_VERSION} REPODIR=${REPODIR} WSTAGEDIR=${WSTAGEDIR} \ sh ${.CURDIR}/release/scripts/make-pkg-package.sh real-packages: stage-packages create-packages sign-packages .PHONY stage-packages: .PHONY @mkdir -p ${REPODIR} ${WSTAGEDIR} ${KSTAGEDIR} ${_+_}@cd ${.CURDIR}; \ ${MAKE} DESTDIR=${WSTAGEDIR} -DNO_ROOT -B stageworld ; \ ${MAKE} DESTDIR=${KSTAGEDIR} -DNO_ROOT -B stagekernel create-packages: _pkgbootstrap .PHONY @mkdir -p ${REPODIR} ${_+_}@cd ${.CURDIR}; \ ${MAKE} DESTDIR=${WSTAGEDIR} \ PKG_VERSION=${PKG_VERSION} create-world-packages ; \ ${MAKE} DESTDIR=${KSTAGEDIR} \ PKG_VERSION=${PKG_VERSION} DISTDIR=kernel \ create-kernel-packages create-world-packages: _pkgbootstrap .PHONY @rm -f ${WSTAGEDIR}/*.plist 2>/dev/null || : @cd ${WSTAGEDIR} ; \ awk -f ${SRCDIR}/release/scripts/mtree-to-plist.awk \ ${WSTAGEDIR}/METALOG @for plist in ${WSTAGEDIR}/*.plist; do \ plist=$${plist##*/} ; \ pkgname=$${plist%.plist} ; \ sh ${SRCDIR}/release/packages/generate-ucl.sh -o $${pkgname} \ -s ${SRCDIR} -u ${WSTAGEDIR}/$${pkgname}.ucl ; \ done @for plist in ${WSTAGEDIR}/*.plist; do \ plist=$${plist##*/} ; \ pkgname=$${plist%.plist} ; \ awk -F\" ' \ /^name/ { printf("===> Creating %s-", $$2); next } \ /^version/ { print $$2; next } \ ' ${WSTAGEDIR}/$${pkgname}.ucl ; \ pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh -o ALLOW_BASE_SHLIBS=yes \ create -M ${WSTAGEDIR}/$${pkgname}.ucl \ -p ${WSTAGEDIR}/$${pkgname}.plist \ -r ${WSTAGEDIR} \ -o ${REPODIR}/$$(pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION} ; \ done create-kernel-packages: _pkgbootstrap .PHONY .if exists(${KSTAGEDIR}/kernel.meta) .for flavor in "" -debug @cd ${KSTAGEDIR}/${DISTDIR} ; \ awk -f ${SRCDIR}/release/scripts/mtree-to-plist.awk \ -v kernel=yes -v _kernconf=${INSTALLKERNEL} \ ${KSTAGEDIR}/kernel.meta ; \ cap_arg=`cd ${SRCDIR}/etc ; ${MAKE} -VCAP_MKDB_ENDIAN` ; \ pwd_arg=`cd ${SRCDIR}/etc ; ${MAKE} -VPWD_MKDB_ENDIAN` ; \ sed -e "s/%VERSION%/${PKG_VERSION}/" \ -e "s/%PKGNAME%/kernel-${INSTALLKERNEL:tl}${flavor}/" \ -e "s/%COMMENT%/FreeBSD ${INSTALLKERNEL} kernel ${flavor}/" \ -e "s/%DESC%/FreeBSD ${INSTALLKERNEL} kernel ${flavor}/" \ -e "s/%CAP_MKDB_ENDIAN%/$${cap_arg}/g" \ -e "s/%PWD_MKDB_ENDIAN%/$${pwd_arg}/g" \ ${SRCDIR}/release/packages/kernel.ucl \ > ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.ucl ; \ awk -F\" ' \ /name/ { printf("===> Creating %s-", $$2); next } \ /version/ {print $$2; next } ' \ ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.ucl ; \ pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh -o ALLOW_BASE_SHLIBS=yes \ create -M ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.ucl \ -p ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.plist \ -r ${KSTAGEDIR}/${DISTDIR} \ -o ${REPODIR}/$$(pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION} .endfor .endif .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes" .for _kernel in ${BUILDKERNELS:[2..-1]} .if exists(${KSTAGEDIR}/kernel.${_kernel}.meta) .for flavor in "" -debug @cd ${KSTAGEDIR}/kernel.${_kernel} ; \ awk -f ${SRCDIR}/release/scripts/mtree-to-plist.awk \ -v kernel=yes -v _kernconf=${_kernel} \ ${KSTAGEDIR}/kernel.${_kernel}.meta ; \ cap_arg=`cd ${SRCDIR}/etc ; ${MAKE} -VCAP_MKDB_ENDIAN` ; \ pwd_arg=`cd ${SRCDIR}/etc ; ${MAKE} -VPWD_MKDB_ENDIAN` ; \ sed -e "s/%VERSION%/${PKG_VERSION}/" \ -e "s/%PKGNAME%/kernel-${_kernel:tl}${flavor}/" \ -e "s/%COMMENT%/FreeBSD ${_kernel} kernel ${flavor}/" \ -e "s/%DESC%/FreeBSD ${_kernel} kernel ${flavor}/" \ -e "s/%CAP_MKDB_ENDIAN%/$${cap_arg}/g" \ -e "s/%PWD_MKDB_ENDIAN%/$${pwd_arg}/g" \ ${SRCDIR}/release/packages/kernel.ucl \ > ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.ucl ; \ awk -F\" ' \ /name/ { printf("===> Creating %s-", $$2); next } \ /version/ {print $$2; next } ' \ ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.ucl ; \ pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh -o ALLOW_BASE_SHLIBS=yes \ create -M ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.ucl \ -p ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.plist \ -r ${KSTAGEDIR}/kernel.${_kernel} \ -o ${REPODIR}/$$(pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION} .endfor .endif .endfor .endif sign-packages: _pkgbootstrap .PHONY @[ -L "${REPODIR}/$$(pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/latest" ] && \ unlink ${REPODIR}/$$(pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/latest ; \ pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh repo \ -o ${REPODIR}/$$(pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION} \ ${REPODIR}/$$(pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION} \ ${PKGSIGNKEY} ; \ ln -s ${REPODIR}/$$(pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION} \ ${REPODIR}/$$(pkg -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/latest # # # checkworld # # Run test suite on installed world. # checkworld: .PHONY @if [ ! -x ${LOCALBASE}/bin/kyua ]; then \ echo "You need kyua (devel/kyua) to run the test suite." | /usr/bin/fmt; \ exit 1; \ fi ${_+_}${LOCALBASE}/bin/kyua test -k ${TESTSBASE}/Kyuafile # # # doxygen # # Build the API documentation with doxygen # doxygen: .PHONY @if [ ! -x ${LOCALBASE}/bin/doxygen ]; then \ echo "You need doxygen (devel/doxygen) to generate the API documentation of the kernel." | /usr/bin/fmt; \ exit 1; \ fi ${_+_}cd ${.CURDIR}/tools/kerneldoc/subsys; ${MAKE} obj all # # update # # Update the source tree(s), by running svn/svnup to update to the # latest copy. # update: .PHONY .if defined(SVN_UPDATE) @echo "--------------------------------------------------------------" @echo ">>> Updating ${.CURDIR} using Subversion" @echo "--------------------------------------------------------------" @(cd ${.CURDIR}; ${SVN} update ${SVNFLAGS}) .endif # # ------------------------------------------------------------------------ # # From here onwards are utility targets used by the 'make world' and # related targets. If your 'world' breaks, you may like to try to fix # the problem and manually run the following targets to attempt to # complete the build. Beware, this is *not* guaranteed to work, you # need to have a pretty good grip on the current state of the system # to attempt to manually finish it. If in doubt, 'make world' again. # # # legacy: Build compatibility shims for the next three targets. This is a # minimal set of tools and shims necessary to compensate for older systems # which don't have the APIs required by the targets built in bootstrap-tools, # build-tools or cross-tools. # # ELF Tool Chain libraries are needed for ELF tools and dtrace tools. # r296685 fix cross-endian objcopy .if ${BOOTSTRAPPING} < 1100102 _elftoolchain_libs= lib/libelf lib/libdwarf .endif legacy: .PHONY .if ${BOOTSTRAPPING} < ${MINIMUM_SUPPORTED_OSREL} && ${BOOTSTRAPPING} != 0 @echo "ERROR: Source upgrades from versions prior to ${MINIMUM_SUPPORTED_REL} are not supported."; \ false .endif .for _tool in tools/build ${_elftoolchain_libs} ${_+_}@${ECHODIR} "===> ${_tool} (obj,includes,all,install)"; \ cd ${.CURDIR}/${_tool}; \ ${MAKE} DIRPRFX=${_tool}/ obj; \ ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX}/legacy includes; \ ${MAKE} DIRPRFX=${_tool}/ MK_INCLUDES=no all; \ ${MAKE} DIRPRFX=${_tool}/ MK_INCLUDES=no \ DESTDIR=${MAKEOBJDIRPREFIX}/legacy install .endfor # # bootstrap-tools: Build tools needed for compatibility. These are binaries that # are built to build other binaries in the system. However, the focus of these # binaries is usually quite narrow. Bootstrap tools use the host's compiler and # libraries, augmented by -legacy. # _bt= _bootstrap-tools .if ${MK_GAMES} != "no" _strfile= usr.bin/fortune/strfile .endif .if ${MK_GCC} != "no" && ${MK_CXX} != "no" _gperf= gnu/usr.bin/gperf .endif .if ${MK_GROFF} != "no" _groff= gnu/usr.bin/groff \ usr.bin/soelim .endif .if ${MK_VT} != "no" _vtfontcvt= usr.bin/vtfontcvt .endif .if ${BOOTSTRAPPING} < 900002 _sed= usr.bin/sed .endif .if ${BOOTSTRAPPING} < 1000033 _libopenbsd= lib/libopenbsd _m4= usr.bin/m4 _lex= usr.bin/lex ${_bt}-usr.bin/m4: ${_bt}-lib/libopenbsd ${_bt}-usr.bin/lex: ${_bt}-usr.bin/m4 .endif .if ${BOOTSTRAPPING} < 1000026 _nmtree= lib/libnetbsd \ usr.sbin/nmtree ${_bt}-usr.sbin/nmtree: ${_bt}-lib/libnetbsd .endif .if ${BOOTSTRAPPING} < 1000027 _cat= bin/cat .endif # r264059 support for status= .if ${BOOTSTRAPPING} < 1100017 _dd= bin/dd .endif # r277259 crunchide: Correct 64-bit section header offset # r281674 crunchide: always include both 32- and 64-bit ELF support .if ${BOOTSTRAPPING} < 1100078 _crunchide= usr.sbin/crunch/crunchide .endif # r285986 crunchen: use STRIPBIN rather than STRIP # 1100113: Support MK_AUTO_OBJ .if ${BOOTSTRAPPING} < 1100078 || \ (${MK_AUTO_OBJ} == "yes" && ${BOOTSTRAPPING} < 1100114) _crunchgen= usr.sbin/crunch/crunchgen .endif .if ${BOOTSTRAPPING} >= 900040 && ${BOOTSTRAPPING} < 900041 _awk= usr.bin/awk .endif # r296926 -P keymap search path, MFC to stable/10 in r298297 .if ${BOOTSTRAPPING} < 1003501 || \ (${BOOTSTRAPPING} >= 1100000 && ${BOOTSTRAPPING} < 1100103) _kbdcontrol= usr.sbin/kbdcontrol .endif _yacc= lib/liby \ usr.bin/yacc ${_bt}-usr.bin/yacc: ${_bt}-lib/liby .if ${MK_BSNMP} != "no" _gensnmptree= usr.sbin/bsnmpd/gensnmptree .endif # We need to build tblgen when we're building clang either as # the bootstrap compiler, or as the part of the normal build. .if ${MK_CLANG_BOOTSTRAP} != "no" || ${MK_CLANG} != "no" _clang_tblgen= \ lib/clang/libllvmsupport \ lib/clang/libllvmtablegen \ usr.bin/clang/llvm-tblgen \ usr.bin/clang/clang-tblgen ${_bt}-usr.bin/clang/clang-tblgen: ${_bt}-lib/clang/libllvmtablegen ${_bt}-lib/clang/libllvmsupport ${_bt}-usr.bin/clang/llvm-tblgen: ${_bt}-lib/clang/libllvmtablegen ${_bt}-lib/clang/libllvmsupport .endif # Default to building the GPL DTC, but build the BSDL one if users explicitly # request it. _dtc= usr.bin/dtc .if ${MK_GPL_DTC} != "no" _dtc= gnu/usr.bin/dtc .endif .if ${MK_KERBEROS} != "no" _kerberos5_bootstrap_tools= \ kerberos5/tools/make-roken \ kerberos5/lib/libroken \ kerberos5/lib/libvers \ kerberos5/tools/asn1_compile \ kerberos5/tools/slc \ usr.bin/compile_et .ORDER: ${_kerberos5_bootstrap_tools:C/^/${_bt}-/g} .endif # r283777 makewhatis(1) replaced with mandoc version which builds a database. .if ${MK_MANDOCDB} != "no" && ${BOOTSTRAPPING} < 1100075 _libopenbsd?= lib/libopenbsd _makewhatis= lib/libsqlite3 \ usr.bin/mandoc ${_bt}-usr.bin/mandoc: ${_bt}-lib/libopenbsd ${_bt}-lib/libsqlite3 .endif bootstrap-tools: .PHONY # Please document (add comment) why something is in 'bootstrap-tools'. # Try to bound the building of the bootstrap-tool to just the # FreeBSD versions that need the tool built at this stage of the build. .for _tool in \ ${_clang_tblgen} \ ${_kerberos5_bootstrap_tools} \ ${_strfile} \ ${_gperf} \ ${_groff} \ ${_dtc} \ ${_awk} \ ${_cat} \ ${_dd} \ ${_kbdcontrol} \ usr.bin/lorder \ ${_libopenbsd} \ ${_makewhatis} \ usr.bin/rpcgen \ ${_sed} \ ${_yacc} \ ${_m4} \ ${_lex} \ usr.bin/xinstall \ ${_gensnmptree} \ usr.sbin/config \ ${_crunchide} \ ${_crunchgen} \ ${_nmtree} \ ${_vtfontcvt} \ usr.bin/localedef ${_bt}-${_tool}: .PHONY .MAKE ${_+_}@${ECHODIR} "===> ${_tool} (obj,all,install)"; \ cd ${.CURDIR}/${_tool}; \ ${MAKE} DIRPRFX=${_tool}/ obj; \ ${MAKE} DIRPRFX=${_tool}/ all; \ ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX}/legacy install bootstrap-tools: ${_bt}-${_tool} .endfor # # build-tools: Build special purpose build tools # .if !defined(NO_SHARE) _share= share/syscons/scrnmaps .endif .if ${MK_GCC} != "no" _gcc_tools= gnu/usr.bin/cc/cc_tools .endif .if ${MK_RESCUE} != "no" # rescue includes programs that have build-tools targets _rescue=rescue/rescue .endif .for _tool in \ bin/csh \ bin/sh \ ${LOCAL_TOOL_DIRS} \ lib/ncurses/ncurses \ lib/ncurses/ncursesw \ ${_rescue} \ ${_share} \ usr.bin/awk \ lib/libmagic \ usr.bin/mkesdb_static \ usr.bin/mkcsmapper_static \ usr.bin/vi/catalog build-tools_${_tool}: .PHONY ${_+_}@${ECHODIR} "===> ${_tool} (obj,build-tools)"; \ cd ${.CURDIR}/${_tool}; \ ${MAKE} DIRPRFX=${_tool}/ obj; \ ${MAKE} DIRPRFX=${_tool}/ build-tools build-tools: build-tools_${_tool} .endfor .for _tool in \ ${_gcc_tools} build-tools_${_tool}: .PHONY ${_+_}@${ECHODIR} "===> ${_tool} (obj,all)"; \ cd ${.CURDIR}/${_tool}; \ ${MAKE} DIRPRFX=${_tool}/ obj; \ ${MAKE} DIRPRFX=${_tool}/ all build-tools: build-tools_${_tool} .endfor # # kernel-tools: Build kernel-building tools # kernel-tools: .PHONY mkdir -p ${MAKEOBJDIRPREFIX}/usr mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${MAKEOBJDIRPREFIX}/usr >/dev/null # # cross-tools: All the tools needed to build the rest of the system after # we get done with the earlier stages. It is the last set of tools needed # to begin building the target binaries. # .if ${TARGET_ARCH} != ${MACHINE_ARCH} .if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "i386" _btxld= usr.sbin/btxld .endif .endif # Rebuild ctfconvert and ctfmerge to avoid difficult-to-diagnose failures # resulting from missing bug fixes or ELF Toolchain updates. .if ${MK_CDDL} != "no" _dtrace_tools= cddl/lib/libctf cddl/usr.bin/ctfconvert \ cddl/usr.bin/ctfmerge .endif # If we're given an XAS, don't build binutils. .if ${XAS:M/*} == "" .if ${MK_BINUTILS_BOOTSTRAP} != "no" _binutils= gnu/usr.bin/binutils .endif .if ${MK_ELFTOOLCHAIN_BOOTSTRAP} != "no" _elftctools= lib/libelftc \ lib/libpe \ usr.bin/elfcopy \ usr.bin/nm \ usr.bin/size \ usr.bin/strings # These are not required by the build, but can be useful for developers who # cross-build on a FreeBSD 10 host: _elftctools+= usr.bin/addr2line .endif .elif ${TARGET_ARCH} != ${MACHINE_ARCH} && ${MK_ELFTOOLCHAIN_BOOTSTRAP} != "no" # If cross-building with an external binutils we still need to build strip for # the target (for at least crunchide). _elftctools= lib/libelftc \ lib/libpe \ usr.bin/elfcopy .endif .if ${MK_CROSS_COMPILER} != "no" .if ${MK_CLANG_BOOTSTRAP} != "no" _clang= usr.bin/clang _clang_libs= lib/clang .endif .if ${MK_GCC_BOOTSTRAP} != "no" _cc= gnu/usr.bin/cc .endif .endif .if ${MK_USB} != "no" _usb_tools= sys/boot/usb/tools .endif cross-tools: .MAKE .PHONY .for _tool in \ ${_clang_libs} \ ${_clang} \ ${_binutils} \ ${_elftctools} \ ${_dtrace_tools} \ ${_cc} \ ${_btxld} \ ${_usb_tools} ${_+_}@${ECHODIR} "===> ${_tool} (obj,all,install)"; \ cd ${.CURDIR}/${_tool}; \ ${MAKE} DIRPRFX=${_tool}/ obj; \ ${MAKE} DIRPRFX=${_tool}/ all; \ ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX} install .endfor NXBDESTDIR= ${OBJTREE}/nxb-bin NXBENV= MAKEOBJDIRPREFIX=${OBJTREE}/nxb \ INSTALL="sh ${.CURDIR}/tools/install.sh" \ PATH=${PATH}:${OBJTREE}/gperf_for_gcc/usr/bin NXBMAKE= ${NXBENV} ${MAKE} \ LLVM_TBLGEN=${NXBDESTDIR}/usr/bin/llvm-tblgen \ CLANG_TBLGEN=${NXBDESTDIR}/usr/bin/clang-tblgen \ MACHINE=${TARGET} MACHINE_ARCH=${TARGET_ARCH} \ MK_GDB=no MK_TESTS=no \ SSP_CFLAGS= \ MK_HTML=no NO_LINT=yes MK_MAN=no \ -DNO_PIC MK_PROFILE=no -DNO_SHARED \ -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \ MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \ MK_LLDB=no MK_DEBUG_FILES=no # native-xtools is the current target for qemu-user cross builds of ports # via poudriere and the imgact_binmisc kernel module. # For non-clang enabled targets that are still using the in tree gcc # we must build a gperf binary for one instance of its Makefiles. On # clang-enabled systems, the gperf binary is obsolete. native-xtools: .PHONY .if ${MK_GCC_BOOTSTRAP} != "no" mkdir -p ${OBJTREE}/gperf_for_gcc/usr/bin ${_+_}@${ECHODIR} "===> ${_gperf} (obj,all,install)"; \ cd ${.CURDIR}/${_gperf}; \ ${NXBMAKE} DIRPRFX=${_gperf}/ obj; \ ${NXBMAKE} DIRPRFX=${_gperf}/ all; \ ${NXBMAKE} DIRPRFX=${_gperf}/ DESTDIR=${OBJTREE}/gperf_for_gcc install .endif mkdir -p ${NXBDESTDIR}/bin ${NXBDESTDIR}/sbin ${NXBDESTDIR}/usr mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${NXBDESTDIR}/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${NXBDESTDIR}/usr/include >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${NXBDESTDIR}/usr/lib >/dev/null .endif .for _tool in \ bin/cat \ bin/chmod \ bin/cp \ bin/csh \ bin/echo \ bin/expr \ bin/hostname \ bin/ln \ bin/ls \ bin/mkdir \ bin/mv \ bin/ps \ bin/realpath \ bin/rm \ bin/rmdir \ bin/sh \ bin/sleep \ ${_clang_tblgen} \ usr.bin/ar \ ${_binutils} \ ${_elftctools} \ ${_cc} \ ${_gcc_tools} \ ${_clang_libs} \ ${_clang} \ sbin/md5 \ sbin/sysctl \ gnu/usr.bin/diff \ usr.bin/awk \ usr.bin/basename \ usr.bin/bmake \ usr.bin/bzip2 \ usr.bin/cmp \ usr.bin/dirname \ usr.bin/env \ usr.bin/fetch \ usr.bin/find \ usr.bin/grep \ usr.bin/gzip \ usr.bin/id \ usr.bin/lex \ usr.bin/lorder \ usr.bin/mktemp \ usr.bin/mt \ usr.bin/patch \ usr.bin/sed \ usr.bin/sort \ usr.bin/tar \ usr.bin/touch \ usr.bin/tr \ usr.bin/true \ usr.bin/uniq \ usr.bin/unzip \ usr.bin/xargs \ usr.bin/xinstall \ usr.bin/xz \ usr.bin/yacc \ usr.sbin/chown ${_+_}@${ECHODIR} "===> ${_tool} (obj,all,install)"; \ cd ${.CURDIR}/${_tool}; \ ${NXBMAKE} DIRPRFX=${_tool}/ obj; \ ${NXBMAKE} DIRPRFX=${_tool}/ all; \ ${NXBMAKE} DIRPRFX=${_tool}/ DESTDIR=${NXBDESTDIR} install .endfor # # hierarchy - ensure that all the needed directories are present # hierarchy hier: .MAKE .PHONY ${_+_}cd ${.CURDIR}/etc; ${HMAKE} distrib-dirs # # libraries - build all libraries, and install them under ${DESTDIR}. # # The list of libraries with dependents (${_prebuild_libs}) and their # interdependencies (__L) are built automatically by the # ${.CURDIR}/tools/make_libdeps.sh script. # libraries: .MAKE .PHONY ${_+_}cd ${.CURDIR}; \ ${MAKE} -f Makefile.inc1 _prereq_libs; \ ${MAKE} -f Makefile.inc1 _startup_libs; \ ${MAKE} -f Makefile.inc1 _prebuild_libs; \ ${MAKE} -f Makefile.inc1 _generic_libs # # static libgcc.a prerequisite for shared libc # _prereq_libs= gnu/lib/libssp/libssp_nonshared gnu/lib/libgcc lib/libcompiler_rt # These dependencies are not automatically generated: # # gnu/lib/csu, gnu/lib/libgcc, lib/csu and lib/libc must be built before # all shared libraries for ELF. # _startup_libs= gnu/lib/csu _startup_libs+= lib/csu _startup_libs+= gnu/lib/libgcc _startup_libs+= lib/libcompiler_rt _startup_libs+= lib/libc _startup_libs+= lib/libc_nonshared .if ${MK_LIBCPLUSPLUS} != "no" _startup_libs+= lib/libcxxrt .endif gnu/lib/libgcc__L: lib/libc__L gnu/lib/libgcc__L: lib/libc_nonshared__L .if ${MK_LIBCPLUSPLUS} != "no" lib/libcxxrt__L: gnu/lib/libgcc__L .endif _prebuild_libs= ${_kerberos5_lib_libasn1} \ ${_kerberos5_lib_libhdb} \ ${_kerberos5_lib_libheimbase} \ ${_kerberos5_lib_libheimntlm} \ ${_libsqlite3} \ ${_kerberos5_lib_libheimipcc} \ ${_kerberos5_lib_libhx509} ${_kerberos5_lib_libkrb5} \ ${_kerberos5_lib_libroken} \ ${_kerberos5_lib_libwind} \ lib/libbz2 ${_libcom_err} lib/libcrypt \ lib/libelf lib/libexpat \ lib/libfigpar \ ${_lib_libgssapi} \ lib/libkiconv lib/libkvm lib/liblzma lib/libmd lib/libnv \ ${_lib_casper} \ lib/ncurses/ncurses lib/ncurses/ncursesw \ lib/libopie lib/libpam/libpam ${_lib_libthr} \ ${_lib_libradius} lib/libsbuf lib/libtacplus \ lib/libgeom \ ${_cddl_lib_libumem} ${_cddl_lib_libnvpair} \ ${_cddl_lib_libuutil} \ ${_cddl_lib_libavl} \ ${_cddl_lib_libzfs_core} \ ${_cddl_lib_libctf} \ lib/libutil lib/libpjdlog ${_lib_libypclnt} lib/libz lib/msun \ ${_secure_lib_libcrypto} ${_lib_libldns} \ ${_secure_lib_libssh} ${_secure_lib_libssl} \ gnu/lib/libdialog .if ${MK_GNUCXX} != "no" _prebuild_libs+= gnu/lib/libstdc++ gnu/lib/libsupc++ gnu/lib/libstdc++__L: lib/msun__L gnu/lib/libsupc++__L: gnu/lib/libstdc++__L .endif .if ${MK_LIBCPLUSPLUS} != "no" _prebuild_libs+= lib/libc++ .endif lib/libgeom__L: lib/libexpat__L lib/libkvm__L: lib/libelf__L .if ${MK_LIBTHR} != "no" _lib_libthr= lib/libthr .endif .if ${MK_RADIUS_SUPPORT} != "no" _lib_libradius= lib/libradius .endif .if ${MK_OFED} != "no" _ofed_lib= contrib/ofed/usr.lib _prebuild_libs+= contrib/ofed/usr.lib/libosmcomp _prebuild_libs+= contrib/ofed/usr.lib/libopensm _prebuild_libs+= contrib/ofed/usr.lib/libibcommon _prebuild_libs+= contrib/ofed/usr.lib/libibverbs _prebuild_libs+= contrib/ofed/usr.lib/libibumad contrib/ofed/usr.lib/libopensm__L: lib/libthr__L contrib/ofed/usr.lib/libosmcomp__L: lib/libthr__L contrib/ofed/usr.lib/libibumad__L: contrib/ofed/usr.lib/libibcommon__L .endif .if ${MK_CASPER} != "no" _lib_casper= lib/libcasper .endif lib/libpjdlog__L: lib/libutil__L lib/libcasper__L: lib/libnv__L lib/liblzma__L: lib/libthr__L _generic_libs= ${_cddl_lib} gnu/lib ${_kerberos5_lib} lib ${_secure_lib} usr.bin/lex/lib ${_ofed_lib} .for _DIR in ${LOCAL_LIB_DIRS} .if exists(${.CURDIR}/${_DIR}/Makefile) && empty(_generic_libs:M${_DIR}) _generic_libs+= ${_DIR} .endif .endfor lib/libopie__L lib/libtacplus__L: lib/libmd__L .if ${MK_CDDL} != "no" _cddl_lib_libumem= cddl/lib/libumem _cddl_lib_libnvpair= cddl/lib/libnvpair _cddl_lib_libavl= cddl/lib/libavl _cddl_lib_libuutil= cddl/lib/libuutil _cddl_lib_libzfs_core= cddl/lib/libzfs_core _cddl_lib_libctf= cddl/lib/libctf _cddl_lib= cddl/lib cddl/lib/libzfs_core__L: cddl/lib/libnvpair__L cddl/lib/libzfs__L: lib/libgeom__L cddl/lib/libctf__L: lib/libz__L .endif # cddl/lib/libdtrace requires lib/libproc and lib/librtld_db; it's only built # on select architectures though (see cddl/lib/Makefile) .if ${MACHINE_CPUARCH} != "sparc64" _prebuild_libs+= lib/libproc lib/librtld_db .endif .if ${MK_CRYPT} != "no" .if ${MK_OPENSSL} != "no" _secure_lib_libcrypto= secure/lib/libcrypto _secure_lib_libssl= secure/lib/libssl lib/libradius__L secure/lib/libssl__L: secure/lib/libcrypto__L .if ${MK_LDNS} != "no" _lib_libldns= lib/libldns lib/libldns__L: secure/lib/libcrypto__L .endif .if ${MK_OPENSSH} != "no" _secure_lib_libssh= secure/lib/libssh secure/lib/libssh__L: lib/libz__L secure/lib/libcrypto__L lib/libcrypt__L .if ${MK_LDNS} != "no" secure/lib/libssh__L: lib/libldns__L .endif .if ${MK_KERBEROS_SUPPORT} != "no" secure/lib/libssh__L: lib/libgssapi__L kerberos5/lib/libkrb5__L \ kerberos5/lib/libhx509__L kerberos5/lib/libasn1__L lib/libcom_err__L \ lib/libmd__L kerberos5/lib/libroken__L .endif .endif .endif _secure_lib= secure/lib .endif .if ${MK_KERBEROS} != "no" kerberos5/lib/libasn1__L: lib/libcom_err__L kerberos5/lib/libroken__L kerberos5/lib/libhdb__L: kerberos5/lib/libasn1__L lib/libcom_err__L \ kerberos5/lib/libkrb5__L kerberos5/lib/libroken__L \ kerberos5/lib/libwind__L lib/libsqlite3__L kerberos5/lib/libheimntlm__L: secure/lib/libcrypto__L kerberos5/lib/libkrb5__L \ kerberos5/lib/libroken__L lib/libcom_err__L kerberos5/lib/libhx509__L: kerberos5/lib/libasn1__L lib/libcom_err__L \ secure/lib/libcrypto__L kerberos5/lib/libroken__L kerberos5/lib/libwind__L kerberos5/lib/libkrb5__L: kerberos5/lib/libasn1__L lib/libcom_err__L \ lib/libcrypt__L secure/lib/libcrypto__L kerberos5/lib/libhx509__L \ kerberos5/lib/libroken__L kerberos5/lib/libwind__L \ kerberos5/lib/libheimbase__L kerberos5/lib/libheimipcc__L kerberos5/lib/libroken__L: lib/libcrypt__L kerberos5/lib/libwind__L: kerberos5/lib/libroken__L lib/libcom_err__L kerberos5/lib/libheimbase__L: lib/libthr__L kerberos5/lib/libheimipcc__L: kerberos5/lib/libroken__L kerberos5/lib/libheimbase__L lib/libthr__L .endif lib/libsqlite3__L: lib/libthr__L .if ${MK_GSSAPI} != "no" _lib_libgssapi= lib/libgssapi .endif .if ${MK_KERBEROS} != "no" _kerberos5_lib= kerberos5/lib _kerberos5_lib_libasn1= kerberos5/lib/libasn1 _kerberos5_lib_libhdb= kerberos5/lib/libhdb _kerberos5_lib_libheimbase= kerberos5/lib/libheimbase _kerberos5_lib_libkrb5= kerberos5/lib/libkrb5 _kerberos5_lib_libhx509= kerberos5/lib/libhx509 _kerberos5_lib_libroken= kerberos5/lib/libroken _kerberos5_lib_libheimntlm= kerberos5/lib/libheimntlm _libsqlite3= lib/libsqlite3 _kerberos5_lib_libheimipcc= kerberos5/lib/libheimipcc _kerberos5_lib_libwind= kerberos5/lib/libwind _libcom_err= lib/libcom_err .endif .if ${MK_NIS} != "no" _lib_libypclnt= lib/libypclnt .endif .if ${MK_OPENSSL} == "no" lib/libradius__L: lib/libmd__L .endif lib/libproc__L: \ ${_cddl_lib_libctf:D${_cddl_lib_libctf}__L} lib/libelf__L lib/librtld_db__L lib/libutil__L .if ${MK_CXX} != "no" .if ${MK_LIBCPLUSPLUS} != "no" lib/libproc__L: lib/libcxxrt__L .else # This implies MK_GNUCXX != "no"; see lib/libproc lib/libproc__L: gnu/lib/libsupc++__L .endif .endif gnu/lib/libdialog__L: lib/msun__L lib/ncurses/ncursesw__L .for _lib in ${_prereq_libs} ${_lib}__PL: .PHONY .MAKE .if exists(${.CURDIR}/${_lib}) ${_+_}@${ECHODIR} "===> ${_lib} (obj,all,install)"; \ cd ${.CURDIR}/${_lib}; \ ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ obj; \ ${MAKE} MK_TESTS=no MK_PROFILE=no -DNO_PIC \ DIRPRFX=${_lib}/ all; \ ${MAKE} MK_TESTS=no MK_PROFILE=no -DNO_PIC \ DIRPRFX=${_lib}/ install .endif .endfor .for _lib in ${_startup_libs} ${_prebuild_libs} ${_generic_libs} ${_lib}__L: .PHONY .MAKE .if exists(${.CURDIR}/${_lib}) ${_+_}@${ECHODIR} "===> ${_lib} (obj,all,install)"; \ cd ${.CURDIR}/${_lib}; \ ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ obj; \ ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ all; \ ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ install .endif .endfor _prereq_libs: ${_prereq_libs:S/$/__PL/} _startup_libs: ${_startup_libs:S/$/__L/} _prebuild_libs: ${_prebuild_libs:S/$/__L/} _generic_libs: ${_generic_libs:S/$/__L/} # Enable SUBDIR_PARALLEL when not calling 'make all', unless called from # 'everything' with _PARALLEL_SUBDIR_OK set. This is because it is unlikely # that running 'make all' from the top-level, especially with a SUBDIR_OVERRIDE # or LOCAL_DIRS set, will have a reliable build if SUBDIRs are built in # parallel. This is safe for the world stage of buildworld though since it has # already built libraries in a proper order and installed includes into # WORLDTMP. Special handling is done for SUBDIR ordering for 'install*' to # avoid trashing a system if it crashes mid-install. .if !make(all) || defined(_PARALLEL_SUBDIR_OK) SUBDIR_PARALLEL= .endif .include .if make(check-old) || make(check-old-dirs) || \ make(check-old-files) || make(check-old-libs) || \ make(delete-old) || make(delete-old-dirs) || \ make(delete-old-files) || make(delete-old-libs) # # check for / delete old files section # .include "ObsoleteFiles.inc" OLD_LIBS_MESSAGE="Please be sure no application still uses those libraries, \ else you can not start such an application. Consult UPDATING for more \ information regarding how to cope with the removal/revision bump of a \ specific library." .if !defined(BATCH_DELETE_OLD_FILES) RM_I=-i .else RM_I=-v .endif delete-old-files: .PHONY @echo ">>> Removing old files (only deletes safe to delete libs)" # Ask for every old file if the user really wants to remove it. # It's annoying, but better safe than sorry. # NB: We cannot pass the list of OLD_FILES as a parameter because the # argument list will get too long. Using .for/.endfor make "loops" will make # the Makefile parser segfault. @exec 3<&0; \ cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_FILES -V "OLD_FILES:Musr/share/*.gz:R" | xargs -n1 | \ while read file; do \ if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \ chflags noschg "${DESTDIR}/$${file}" 2>/dev/null || true; \ rm ${RM_I} "${DESTDIR}/$${file}" <&3; \ fi; \ for ext in debug symbols; do \ if ! [ -e "${DESTDIR}/$${file}" ] && [ -f \ "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \ rm ${RM_I} "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" \ <&3; \ fi; \ done; \ done # Remove catpages without corresponding manpages. @exec 3<&0; \ find ${DESTDIR}/usr/share/man/cat* ! -type d | \ sed -ep -e's:${DESTDIR}/usr/share/man/cat:${DESTDIR}/usr/share/man/man:' | \ while read catpage; do \ read manpage; \ if [ ! -e "$${manpage}" ]; then \ rm ${RM_I} $${catpage} <&3; \ fi; \ done @echo ">>> Old files removed" check-old-files: .PHONY @echo ">>> Checking for old files" @cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_FILES -V "OLD_FILES:Musr/share/*.gz:R" | xargs -n1 | \ while read file; do \ if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \ echo "${DESTDIR}/$${file}"; \ fi; \ for ext in debug symbols; do \ if [ -f "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \ echo "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}"; \ fi; \ done; \ done # Check for catpages without corresponding manpages. @find ${DESTDIR}/usr/share/man/cat* ! -type d | \ sed -ep -e's:${DESTDIR}/usr/share/man/cat:${DESTDIR}/usr/share/man/man:' | \ while read catpage; do \ read manpage; \ if [ ! -e "$${manpage}" ]; then \ echo $${catpage}; \ fi; \ done delete-old-libs: .PHONY @echo ">>> Removing old libraries" @echo "${OLD_LIBS_MESSAGE}" | fmt @exec 3<&0; \ cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_LIBS | xargs -n1 | \ while read file; do \ if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \ chflags noschg "${DESTDIR}/$${file}" 2>/dev/null || true; \ rm ${RM_I} "${DESTDIR}/$${file}" <&3; \ fi; \ for ext in debug symbols; do \ if ! [ -e "${DESTDIR}/$${file}" ] && [ -f \ "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \ rm ${RM_I} "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" \ <&3; \ fi; \ done; \ done @echo ">>> Old libraries removed" check-old-libs: .PHONY @echo ">>> Checking for old libraries" @cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_LIBS | xargs -n1 | \ while read file; do \ if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \ echo "${DESTDIR}/$${file}"; \ fi; \ for ext in debug symbols; do \ if [ -f "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \ echo "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}"; \ fi; \ done; \ done delete-old-dirs: .PHONY @echo ">>> Removing old directories" @cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_DIRS | xargs -n1 | sort -r | \ while read dir; do \ if [ -d "${DESTDIR}/$${dir}" ]; then \ rmdir -v "${DESTDIR}/$${dir}" || true; \ elif [ -L "${DESTDIR}/$${dir}" ]; then \ echo "${DESTDIR}/$${dir} is a link, please remove everything manually."; \ fi; \ done @echo ">>> Old directories removed" check-old-dirs: .PHONY @echo ">>> Checking for old directories" @cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_DIRS | xargs -n1 | \ while read dir; do \ if [ -d "${DESTDIR}/$${dir}" ]; then \ echo "${DESTDIR}/$${dir}"; \ elif [ -L "${DESTDIR}/$${dir}" ]; then \ echo "${DESTDIR}/$${dir} is a link, please remove everything manually."; \ fi; \ done delete-old: delete-old-files delete-old-dirs .PHONY @echo "To remove old libraries run '${MAKE} delete-old-libs'." check-old: check-old-files check-old-libs check-old-dirs .PHONY @echo "To remove old files and directories run '${MAKE} delete-old'." @echo "To remove old libraries run '${MAKE} delete-old-libs'." .endif # # showconfig - show build configuration. # showconfig: .PHONY @(${MAKE} -n -f ${.CURDIR}/sys/conf/kern.opts.mk -V dummy -dg1; \ ${MAKE} -n -f ${.CURDIR}/share/mk/src.opts.mk -V dummy -dg1) 2>&1 | grep ^MK_ | sort -u .if !empty(KRNLOBJDIR) && !empty(KERNCONF) DTBOUTPUTPATH= ${KRNLOBJDIR}/${KERNCONF}/ .if !defined(FDT_DTS_FILE) || empty(FDT_DTS_FILE) .if exists(${KERNCONFDIR}/${KERNCONF}) FDT_DTS_FILE!= awk 'BEGIN {FS="="} /^makeoptions[[:space:]]+FDT_DTS_FILE/ {print $$2}' \ '${KERNCONFDIR}/${KERNCONF}' ; echo .endif .endif .endif .if !defined(DTBOUTPUTPATH) || !exists(${DTBOUTPUTPATH}) DTBOUTPUTPATH= ${.CURDIR} .endif # # Build 'standalone' Device Tree Blob # builddtb: .PHONY @PATH=${TMPPATH} MACHINE=${TARGET} \ ${.CURDIR}/sys/tools/fdt/make_dtb.sh ${.CURDIR}/sys \ "${FDT_DTS_FILE}" ${DTBOUTPUTPATH} ############### # cleanworld # In the following, the first 'rm' in a series will usually remove all # files and directories. If it does not, then there are probably some # files with file flags set, so this unsets them and tries the 'rm' a # second time. There are situations where this target will be cleaning # some directories via more than one method, but that duplication is # needed to correctly handle all the possible situations. Removing all # files without file flags set in the first 'rm' instance saves time, # because 'chflags' will need to operate on fewer files afterwards. # # It is expected that BW_CANONICALOBJDIR == the CANONICALOBJDIR as would be # created by bsd.obj.mk, except that we don't want to .include that file # in this makefile. # BW_CANONICALOBJDIR:=${OBJTREE}${.CURDIR} cleanworld: .PHONY .if exists(${BW_CANONICALOBJDIR}/) -rm -rf ${BW_CANONICALOBJDIR}/* -chflags -R 0 ${BW_CANONICALOBJDIR} rm -rf ${BW_CANONICALOBJDIR}/* .endif .if ${.CURDIR} == ${.OBJDIR} || ${.CURDIR}/obj == ${.OBJDIR} # To be safe in this case, fall back to a 'make cleandir' ${_+_}@cd ${.CURDIR}; ${MAKE} cleandir .endif .if defined(TARGET) && defined(TARGET_ARCH) .if ${TARGET} == ${MACHINE} && ${TARGET_ARCH} == ${MACHINE_ARCH} XDEV_CPUTYPE?=${CPUTYPE} .else XDEV_CPUTYPE?=${TARGET_CPUTYPE} .endif NOFUN=-DNO_FSCHG MK_HTML=no -DNO_LINT \ MK_MAN=no MK_NLS=no MK_PROFILE=no \ MK_KERBEROS=no MK_RESCUE=no MK_TESTS=no MK_WARNS=no \ TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \ CPUTYPE=${XDEV_CPUTYPE} XDDIR=${TARGET_ARCH}-freebsd XDTP?=/usr/${XDDIR} .if ${XDTP:N/*} .error XDTP variable should be an absolute path .endif CDBENV=MAKEOBJDIRPREFIX=${MAKEOBJDIRPREFIX}/${XDDIR} \ INSTALL="sh ${.CURDIR}/tools/install.sh" CDENV= ${CDBENV} \ TOOLS_PREFIX=${XDTP} CD2CFLAGS=-isystem ${XDDESTDIR}/usr/include -L${XDDESTDIR}/usr/lib \ --sysroot=${XDDESTDIR}/ -B${XDDESTDIR}/usr/libexec \ -B${XDDESTDIR}/usr/bin -B${XDDESTDIR}/usr/lib CD2ENV=${CDENV} CC="${CC} ${CD2CFLAGS}" CXX="${CXX} ${CD2CFLAGS}" \ CPP="${CPP} ${CD2CFLAGS}" \ MACHINE=${TARGET} MACHINE_ARCH=${TARGET_ARCH} CDTMP= ${MAKEOBJDIRPREFIX}/${XDDIR}/${.CURDIR}/tmp CDMAKE=${CDENV} PATH=${CDTMP}/usr/bin:${PATH} ${MAKE} ${NOFUN} CD2MAKE=${CD2ENV} PATH=${CDTMP}/usr/bin:${XDDESTDIR}/usr/bin:${PATH} ${MAKE} ${NOFUN} XDDESTDIR=${DESTDIR}/${XDTP} .if !defined(OSREL) OSREL!= uname -r | sed -e 's/[-(].*//' .endif .ORDER: xdev-build xdev-install xdev-links xdev: xdev-build xdev-install .PHONY .ORDER: _xb-worldtmp _xb-bootstrap-tools _xb-build-tools _xb-cross-tools xdev-build: _xb-worldtmp _xb-bootstrap-tools _xb-build-tools _xb-cross-tools .PHONY _xb-worldtmp: .PHONY mkdir -p ${CDTMP}/usr mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${CDTMP}/usr >/dev/null _xb-bootstrap-tools: .PHONY .for _tool in \ ${_clang_tblgen} \ ${_gperf} ${_+_}@${ECHODIR} "===> ${_tool} (obj,all,install)"; \ cd ${.CURDIR}/${_tool}; \ ${CDMAKE} DIRPRFX=${_tool}/ obj; \ ${CDMAKE} DIRPRFX=${_tool}/ all; \ ${CDMAKE} DIRPRFX=${_tool}/ DESTDIR=${CDTMP} install .endfor _xb-build-tools: .PHONY ${_+_}@cd ${.CURDIR}; \ ${CDBENV} ${MAKE} -f Makefile.inc1 ${NOFUN} build-tools _xb-cross-tools: .PHONY .for _tool in \ ${_binutils} \ ${_elftctools} \ usr.bin/ar \ ${_clang_libs} \ ${_clang} \ ${_cc} ${_+_}@${ECHODIR} "===> xdev ${_tool} (obj,all)"; \ cd ${.CURDIR}/${_tool}; \ ${CDMAKE} DIRPRFX=${_tool}/ obj; \ ${CDMAKE} DIRPRFX=${_tool}/ all .endfor _xi-mtree: .PHONY ${_+_}@${ECHODIR} "mtree populating ${XDDESTDIR}" mkdir -p ${XDDESTDIR} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.root.dist \ -p ${XDDESTDIR} >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${XDDESTDIR}/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${XDDESTDIR}/usr/include >/dev/null .if defined(LIBCOMPAT) mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${XDDESTDIR}/usr >/dev/null .endif .if ${MK_TESTS} != "no" mkdir -p ${XDDESTDIR}${TESTSBASE} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${XDDESTDIR}${TESTSBASE} >/dev/null .endif .ORDER: xdev-build _xi-mtree _xi-cross-tools _xi-includes _xi-libraries xdev-install: xdev-build _xi-mtree _xi-cross-tools _xi-includes _xi-libraries .PHONY _xi-cross-tools: .PHONY @echo "_xi-cross-tools" .for _tool in \ ${_binutils} \ ${_elftctools} \ usr.bin/ar \ ${_clang_libs} \ ${_clang} \ ${_cc} ${_+_}@${ECHODIR} "===> xdev ${_tool} (install)"; \ cd ${.CURDIR}/${_tool}; \ ${CDMAKE} DIRPRFX=${_tool}/ install DESTDIR=${XDDESTDIR} .endfor _xi-includes: .PHONY ${_+_}cd ${.CURDIR}; ${CD2MAKE} -f Makefile.inc1 includes \ DESTDIR=${XDDESTDIR} _xi-libraries: .PHONY ${_+_}cd ${.CURDIR}; ${CD2MAKE} -f Makefile.inc1 libraries \ DESTDIR=${XDDESTDIR} xdev-links: .PHONY ${_+_}cd ${XDDESTDIR}/usr/bin; \ mkdir -p ../../../../usr/bin; \ for i in *; do \ ln -sf ../../${XDTP}/usr/bin/$$i \ ../../../../usr/bin/${XDDIR}-$$i; \ ln -sf ../../${XDTP}/usr/bin/$$i \ ../../../../usr/bin/${XDDIR}${OSREL}-$$i; \ done .else xdev xdev-build xdev-install xdev-links: .PHONY @echo "*** Error: Both TARGET and TARGET_ARCH must be defined for \"${.TARGET}\" target" .endif Index: projects/vnet/Makefile.libcompat =================================================================== --- projects/vnet/Makefile.libcompat (revision 302276) +++ projects/vnet/Makefile.libcompat (revision 302277) @@ -1,183 +1,183 @@ # $FreeBSD$ .if !targets(__<${_this:T}>__) __<${_this:T}>__: # Makefile for the compatibility libraries. # - 32-bit compat libraries on PowerPC and AMD64. # could also be for mips, but that doesn't work today. # ------------------------------------------------------------------- # 32 bit world .if ${TARGET_ARCH} == "amd64" .if empty(TARGET_CPUTYPE) LIB32CPUFLAGS= -march=i686 -mmmx -msse -msse2 .else LIB32CPUFLAGS= -march=${TARGET_CPUTYPE} .endif LIB32WMAKEENV= MACHINE=i386 MACHINE_ARCH=i386 \ MACHINE_CPU="i686 mmx sse sse2" LIB32WMAKEFLAGS= \ AS="${XAS} --32" \ LD="${XLD} -m elf_i386_fbsd -L${LIBCOMPATTMP}/usr/lib32" \ OBJCOPY="${XOBJCOPY}" .elif ${TARGET_ARCH} == "powerpc64" .if empty(TARGET_CPUTYPE) LIB32CPUFLAGS= -mcpu=powerpc .else LIB32CPUFLAGS= -mcpu=${TARGET_CPUTYPE} .endif LIB32WMAKEENV= MACHINE=powerpc MACHINE_ARCH=powerpc LIB32WMAKEFLAGS= \ LD="${XLD} -m elf32ppc_fbsd" \ OBJCOPY="${XOBJCOPY}" .endif LIB32CFLAGS= -m32 -DCOMPAT_32BIT LIB32DTRACE= ${DTRACE} -32 LIB32WMAKEFLAGS+= -DCOMPAT_32BIT # ------------------------------------------------------------------- # soft-fp world .if ${TARGET_ARCH} == "armv6" LIBSOFTCFLAGS= -DCOMPAT_SOFTFP LIBSOFTCPUFLAGS= -mfloat-abi=softfp LIBSOFTWMAKEENV= CPUTYPE=soft MACHINE=arm MACHINE_ARCH=armv6 LIBSOFTWMAKEFLAGS= -DCOMPAT_SOFTFP .endif # ------------------------------------------------------------------- # Generic code for each type. # Set defaults based on type. libcompat= ${LIBCOMPAT:tl} _LIBCOMPAT_MAKEVARS= _OBJTREE TMP CPUFLAGS CFLAGS CXXFLAGS WMAKEENV \ WMAKEFLAGS WMAKE .for _var in ${_LIBCOMPAT_MAKEVARS} .if !empty(LIB${LIBCOMPAT}${_var}) LIBCOMPAT${_var}?= ${LIB${LIBCOMPAT}${_var}} .endif .endfor # Shared flags LIBCOMPAT_OBJTREE?= ${OBJTREE}${.CURDIR}/world${libcompat} LIBCOMPATTMP?= ${OBJTREE}${.CURDIR}/lib${libcompat} LIBCOMPATCFLAGS+= ${LIBCOMPATCPUFLAGS} \ -L${LIBCOMPATTMP}/usr/lib${libcompat} \ --sysroot=${LIBCOMPATTMP} \ ${BFLAGS} # -B is needed to find /usr/lib32/crti.o for GCC and /usr/libsoft/crti.o for # Clang/GCC. LIBCOMPATCFLAGS+= -B${LIBCOMPATTMP}/usr/lib${libcompat} .if defined(X_COMPILER_TYPE) && ${X_COMPILER_TYPE} == gcc # GCC requires -isystem when using a cross-compiler and --sysroot. Note that # Makefile.inc1 only applies this with an external compiler but libcompat # always does since even in-tree GCC 4.2 needs this to override the built-in # sysroot path which --sysroot does not actually do for headers. LIBCOMPATCFLAGS+= -isystem ${LIBCOMPATTMP}/usr/include # Force using libc++ for external GCC. # XXX: This should be checking MK_GNUCXX == no -.if ${MK_CROSS_COMPILER} == "no" || \ - (${MK_CLANG_BOOTSTRAP} == "no" && ${MK_GCC_BOOTSTRAP} == "no") +.if ${X_COMPILER_VERSION} >= 40800 && (${MK_CROSS_COMPILER} == "no" || \ + (${MK_CLANG_BOOTSTRAP} == "no" && ${MK_GCC_BOOTSTRAP} == "no")) LIBCOMPATCXXFLAGS+= -isystem ${LIBCOMPATTMP}/usr/include/c++/v1 -std=c++11 \ -nostdinc++ -L${LIBCOMPAT_OBJTREE}${.CURDIR}/lib/libc++ .endif .endif # Yes, the flags are redundant. LIBCOMPATWMAKEENV+= MAKEOBJDIRPREFIX=${LIBCOMPAT_OBJTREE} \ INSTALL="sh ${.CURDIR}/tools/install.sh" \ PATH=${TMPPATH} \ LIBDIR=/usr/lib${libcompat} \ SHLIBDIR=/usr/lib${libcompat} \ DTRACE="${LIB$COMPATDTRACE:U${DTRACE}}" .if ${MK_META_MODE} != "no" # Don't rebuild build-tools targets during normal build. LIBCOMPATWMAKEENV+= BUILD_TOOLS_META=.NOMETA_CMP .endif LIBCOMPATWMAKEFLAGS+= CC="${XCC} ${LIBCOMPATCFLAGS}" \ CXX="${XCXX} ${LIBCOMPATCXXFLAGS} ${LIBCOMPATCFLAGS}" \ CPP="${XCPP} ${LIBCOMPATCFLAGS}" \ DESTDIR=${LIBCOMPATTMP} \ -DNO_CPU_CFLAGS \ MK_CTF=no \ -DNO_LINT \ MK_TESTS=no LIBCOMPATWMAKE+= ${LIBCOMPATWMAKEENV} ${MAKE} ${LIBCOMPATWMAKEFLAGS} \ MK_MAN=no MK_HTML=no LIBCOMPATIMAKE+= ${LIBCOMPATWMAKE:NINSTALL=*:NDESTDIR=*} \ MK_TOOLCHAIN=no ${IMAKE_INSTALL} \ -DLIBRARIES_ONLY _LC_LIBDIRS.yes= lib gnu/lib _LC_LIBDIRS.${MK_CDDL:tl}+= cddl/lib _LC_LIBDIRS.${MK_CRYPT:tl}+= secure/lib _LC_LIBDIRS.${MK_KERBEROS:tl}+= kerberos5/lib _LC_INCDIRS= \ include \ lib/ncurses/ncursesw \ ${_LC_LIBDIRS.yes} # Shared logic build${libcompat}: .PHONY @echo @echo "--------------------------------------------------------------" @echo ">>> stage 5.1: building lib${libcompat} shim libraries" @echo "--------------------------------------------------------------" mkdir -p ${LIBCOMPATTMP}/usr/include mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${LIBCOMPATTMP}/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${LIBCOMPATTMP}/usr/include >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${LIBCOMPATTMP}/usr >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${LIBCOMPATTMP}/usr/lib >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${LIBCOMPATTMP}/usr/lib/debug/usr >/dev/null .endif mkdir -p ${WORLDTMP} ln -sf ${.CURDIR}/sys ${WORLDTMP} .for _t in obj includes .for _dir in ${_LC_INCDIRS} ${_+_}cd ${.CURDIR}/${_dir}; ${LIBCOMPATWMAKE} MK_INCLUDES=yes \ DIRPRFX=${_dir}/ ${_t} .endfor .endfor .for _dir in lib/ncurses/ncurses lib/ncurses/ncursesw lib/libmagic ${_+_}cd ${.CURDIR}/${_dir}; \ WORLDTMP=${WORLDTMP} \ MAKEFLAGS="-m ${.CURDIR}/tools/build/mk ${.MAKEFLAGS}" \ MAKEOBJDIRPREFIX=${LIBCOMPAT_OBJTREE} ${MAKE} SSP_CFLAGS= DESTDIR= \ DIRPRFX=${_dir}/ -DNO_LINT -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \ build-tools .endfor ${_+_}cd ${.CURDIR}; \ ${LIBCOMPATWMAKE} -f Makefile.inc1 -DNO_FSCHG libraries .if ${libcompat} == "32" .for _t in obj all ${_+_}cd ${.CURDIR}/libexec/rtld-elf; PROG=ld-elf32.so.1 ${LIBCOMPATWMAKE} \ -DNO_FSCHG DIRPRFX=libexec/rtld-elf/ ${_t} ${_+_}cd ${.CURDIR}/usr.bin/ldd; PROG=ldd32 ${LIBCOMPATWMAKE} \ DIRPRFX=usr.bin/ldd ${_t} .endfor .endif distribute${libcompat} install${libcompat}: .PHONY .for _dir in ${_LC_LIBDIRS.yes} ${_+_}cd ${.CURDIR}/${_dir}; ${LIBCOMPATIMAKE} ${.TARGET:S/${libcompat}$//} .endfor .if ${libcompat} == "32" ${_+_}cd ${.CURDIR}/libexec/rtld-elf; \ PROG=ld-elf32.so.1 ${LIBCOMPATIMAKE} ${.TARGET:S/32$//} ${_+_}cd ${.CURDIR}/usr.bin/ldd; PROG=ldd32 ${LIBCOMPATIMAKE} \ ${.TARGET:S/32$//} .endif .endif Index: projects/vnet/lib/libc/rpc/Makefile.inc =================================================================== --- projects/vnet/lib/libc/rpc/Makefile.inc (revision 302276) +++ projects/vnet/lib/libc/rpc/Makefile.inc (revision 302277) @@ -1,179 +1,179 @@ # @(#)Makefile 5.11 (Berkeley) 9/6/90 # $FreeBSD$ .PATH: ${LIBC_SRCTOP}/rpc ${LIBC_SRCTOP}/. SRCS+= auth_none.c auth_unix.c authunix_prot.c bindresvport.c clnt_bcast.c \ clnt_dg.c clnt_generic.c clnt_perror.c clnt_raw.c clnt_simple.c \ clnt_vc.c rpc_dtablesize.c getnetconfig.c getnetpath.c getrpcent.c \ getrpcport.c mt_misc.c pmap_clnt.c pmap_getmaps.c pmap_getport.c \ pmap_prot.c pmap_prot2.c pmap_rmt.c rpc_prot.c rpc_commondata.c \ rpc_callmsg.c rpc_generic.c rpc_soc.c rpcb_clnt.c rpcb_prot.c \ rpcb_st_xdr.c rpcsec_gss_stub.c svc.c svc_auth.c svc_dg.c \ svc_auth_unix.c svc_generic.c svc_raw.c svc_run.c svc_simple.c \ svc_vc.c # Secure-RPC SRCS+= auth_time.c auth_des.c authdes_prot.c des_crypt.c des_soft.c \ crypt_client.c key_call.c key_prot_xdr.c getpublickey.c \ svc_auth_des.c # Resolver stuff SRCS+= netname.c netnamer.c rpcdname.c # Misc Source SRCS+= rtime.c # generated sources SRCS+= crypt_clnt.c crypt_xdr.c crypt.h SYM_MAPS+=${LIBC_SRCTOP}/rpc/Symbol.map CFLAGS+= -DBROKEN_DES -DPORTMAP -DDES_BUILTIN CFLAGS+= -I${LIBC_SRCTOP}/rpc CLEANFILES+= crypt_clnt.c crypt_xdr.c crypt.h -RPCDIR= ${DESTDIR}/usr/include/rpcsvc +RPCDIR= ${SRCTOP}/include/rpcsvc RPCGEN= RPCGEN_CPP=${CPP:Q} rpcgen -C crypt_clnt.c: ${RPCDIR}/crypt.x crypt.h ${RPCGEN} -l -o ${.TARGET} ${RPCDIR}/crypt.x crypt_xdr.c: ${RPCDIR}/crypt.x crypt.h ${RPCGEN} -c -o ${.TARGET} ${RPCDIR}/crypt.x crypt.h: ${RPCDIR}/crypt.x ${RPCGEN} -h -o ${.TARGET} ${RPCDIR}/crypt.x MAN+= bindresvport.3 des_crypt.3 getnetconfig.3 getnetpath.3 getrpcent.3 \ getrpcport.3 rpc.3 rpc_soc.3 rpc_clnt_auth.3 rpc_clnt_calls.3 \ rpc_clnt_create.3 rpc_svc_calls.3 rpc_svc_create.3 rpc_svc_err.3 \ rpc_svc_reg.3 rpc_xdr.3 rpcbind.3 publickey.3 rpc_secure.3 \ rtime.3 MAN+= publickey.5 rpc.5 netconfig.5 MLINKS+= bindresvport.3 bindresvport_sa.3 \ des_crypt.3 ecb_crypt.3 \ des_crypt.3 cbc_crypt.3 \ des_crypt.3 des_setparity.3 \ getnetconfig.3 setnetconfig.3 \ getnetconfig.3 getnetconfigent.3 \ getnetconfig.3 freenetconfigent.3 \ getnetconfig.3 endnetconfig.3 \ getnetconfig.3 nc_perror.3 \ getnetconfig.3 nc_sperror.3 \ getnetpath.3 setnetpath.3 \ getnetpath.3 endnetpath.3 \ getrpcent.3 getrpcbyname.3 \ getrpcent.3 getrpcbynumber.3 \ getrpcent.3 endrpcent.3 \ getrpcent.3 setrpcent.3 \ publickey.3 getpublickey.3 \ publickey.3 getsecretkey.3 \ rpc_clnt_auth.3 auth_destroy.3 \ rpc_clnt_auth.3 authnone_create.3 \ rpc_clnt_auth.3 authsys_create.3 \ rpc_clnt_auth.3 authsys_create_default.3 \ rpc_clnt_calls.3 clnt_call.3 \ rpc_clnt_calls.3 clnt_perrno.3 \ rpc_clnt_calls.3 clnt_perror.3 \ rpc_clnt_calls.3 clnt_sperrno.3 \ rpc_clnt_calls.3 clnt_sperror.3 \ rpc_clnt_calls.3 rpc_call.3 \ rpc_clnt_calls.3 rpc_broadcast.3 \ rpc_clnt_calls.3 rpc_broadcast_exp.3 \ rpc_clnt_calls.3 clnt_freeres.3 \ rpc_clnt_calls.3 clnt_geterr.3 \ rpc_clnt_create.3 clnt_control.3 \ rpc_clnt_create.3 clnt_create.3 \ rpc_clnt_create.3 clnt_create_timed.3 \ rpc_clnt_create.3 clnt_create_vers.3 \ rpc_clnt_create.3 clnt_create_vers_timed.3 \ rpc_clnt_create.3 clnt_destroy.3 \ rpc_clnt_create.3 clnt_pcreateerror.3 \ rpc_clnt_create.3 clnt_spcreateerror.3 \ rpc_clnt_create.3 clnt_dg_create.3 \ rpc_clnt_create.3 clnt_raw_create.3 \ rpc_clnt_create.3 clnt_tli_create.3 \ rpc_clnt_create.3 clnt_tp_create.3 \ rpc_clnt_create.3 clnt_tp_create_timed.3 \ rpc_clnt_create.3 clnt_vc_create.3 \ rpc_secure.3 authdes_create.3 \ rpc_secure.3 authdes_getucred.3 \ rpc_secure.3 getnetname.3 \ rpc_secure.3 host2netname.3 \ rpc_secure.3 key_decryptsession.3 \ rpc_secure.3 key_encryptsession.3 \ rpc_secure.3 key_gendes.3 \ rpc_secure.3 key_setsecret.3 \ rpc_secure.3 netname2host.3 \ rpc_secure.3 netname2user.3 \ rpc_secure.3 user2netname.3 \ rpc_svc_calls.3 svc_dg_enablecache.3 \ rpc_svc_calls.3 svc_exit.3 \ rpc_svc_calls.3 svc_freeargs.3 \ rpc_svc_calls.3 svc_getargs.3 \ rpc_svc_calls.3 svc_getreq_common.3 \ rpc_svc_calls.3 svc_getreq_poll.3 \ rpc_svc_calls.3 svc_getreqset.3 \ rpc_svc_calls.3 svc_getrpccaller.3 \ rpc_svc_calls.3 __svc_getcallercreds.3 \ rpc_svc_calls.3 svc_pollset.3 \ rpc_svc_calls.3 svc_run.3 \ rpc_svc_calls.3 svc_sendreply.3 \ rpc_svc_create.3 svc_control.3 \ rpc_svc_create.3 svc_create.3 \ rpc_svc_create.3 svc_dg_create.3 \ rpc_svc_create.3 svc_destroy.3 \ rpc_svc_create.3 svc_fd_create.3 \ rpc_svc_create.3 svc_raw_create.3 \ rpc_svc_create.3 svc_tli_create.3 \ rpc_svc_create.3 svc_tp_create.3 \ rpc_svc_create.3 svc_vc_create.3 \ rpc_svc_err.3 svcerr_auth.3 \ rpc_svc_err.3 svcerr_decode.3 \ rpc_svc_err.3 svcerr_noproc.3 \ rpc_svc_err.3 svcerr_noprog.3 \ rpc_svc_err.3 svcerr_progvers.3 \ rpc_svc_err.3 svcerr_systemerr.3 \ rpc_svc_err.3 svcerr_weakauth.3 \ rpc_svc_reg.3 rpc_reg.3 \ rpc_svc_reg.3 svc_reg.3 \ rpc_svc_reg.3 svc_unreg.3 \ rpc_svc_reg.3 svc_auth_reg.3 \ rpc_svc_reg.3 xprt_register.3 \ rpc_svc_reg.3 xprt_unregister.3 \ rpcbind.3 rpcb_getmaps.3 \ rpcbind.3 rpcb_getaddr.3 \ rpcbind.3 rpcb_gettime.3 \ rpcbind.3 rpcb_rmtcall.3 \ rpcbind.3 rpcb_set.3 \ rpcbind.3 rpcb_unset.3 \ rpc_soc.3 authunix_create.3 \ rpc_soc.3 authunix_create_default.3 \ rpc_soc.3 callrpc.3 \ rpc_soc.3 clnt_broadcast.3 \ rpc_soc.3 clntraw_create.3 \ rpc_soc.3 clnttcp_create.3 \ rpc_soc.3 clntunix_create.3 \ rpc_soc.3 clntudp_bufcreate.3 \ rpc_soc.3 clntudp_create.3 \ rpc_soc.3 get_myaddress.3 \ rpc_soc.3 pmap_getmaps.3 \ rpc_soc.3 pmap_getport.3 \ rpc_soc.3 pmap_rmtcall.3 \ rpc_soc.3 pmap_set.3 \ rpc_soc.3 pmap_unset.3 \ rpc_soc.3 registerrpc.3 \ rpc_soc.3 rpc_createerr.3 \ rpc_soc.3 svc_fds.3 \ rpc_soc.3 svc_fdset.3 \ rpc_soc.3 svc_getcaller.3 \ rpc_soc.3 svc_register.3 \ rpc_soc.3 svc_unregister.3 \ rpc_soc.3 svcfd_create.3 \ rpc_soc.3 svcunixfd_create.3 \ rpc_soc.3 svcraw_create.3 \ rpc_soc.3 svctcp_create.3 \ rpc_soc.3 svcudp_bufcreate.3 \ rpc_soc.3 svcunix_create.3 \ rpc_soc.3 xdr_pmap.3 \ rpc_soc.3 xdr_pmaplist.3 Index: projects/vnet/lib/libc/tests/ssp/Makefile =================================================================== --- projects/vnet/lib/libc/tests/ssp/Makefile (revision 302276) +++ projects/vnet/lib/libc/tests/ssp/Makefile (revision 302277) @@ -1,56 +1,70 @@ # $FreeBSD$ +# XXX This is a workaround to allow i386 to cross-compile on an amd64 host. +.include +# XXX --- + .include NO_WERROR= WARNS?= 2 CFLAGS.h_raw+= -fstack-protector-all -Wstack-protector .if ${COMPILER_TYPE} == "clang" CFLAGS.h_raw+= -fsanitize=bounds .elif ${COMPILER_TYPE} == "gcc" CFLAGS.h_raw+= --param ssp-buffer-size=1 LDADD+= -lssp .endif NETBSD_ATF_TESTS_SH= ssp_test BINDIR= ${TESTSDIR} PROGS= h_fgets PROGS+= h_gets PROGS+= h_getcwd PROGS+= h_memcpy PROGS+= h_memmove PROGS+= h_memset # This testcase doesn't run properly when not compiled with -fsantize=bounds # with clang, which is currently contingent on a compiler_rt update # # XXX: the h_raw/h_read testcases don't cause a SIGABRT with in-tree gcc right # now on amd64 when it trips the stack bounds specified in t_ssp.sh . This # probably needs to be fixed as it's currently hardcoded. # # sanitizer is not tested or supported for ARM right now. sbruno .if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64" .if ${COMPILER_TYPE} == "clang" && ${MK_TOOLCHAIN} == "yes" .if ${COMPILER_VERSION} < 30500 || 30700 <= ${COMPILER_VERSION} + +# XXX This is a workaround to allow i386 to cross-compile on an amd64 host. +.if ${MACHINE_CPUARCH} == ${_HOST_ARCH} +# XXX --- + PROGS+= h_raw + +# XXX This is a workaround to allow i386 to cross-compile on an amd64 host. +.endif +# XXX --- + .endif .endif .endif PROGS+= h_read PROGS+= h_readlink PROGS+= h_snprintf PROGS+= h_sprintf PROGS+= h_stpcpy PROGS+= h_stpncpy PROGS+= h_strcat PROGS+= h_strcpy PROGS+= h_strncat PROGS+= h_strncpy PROGS+= h_vsnprintf PROGS+= h_vsprintf .include "../Makefile.netbsd-tests" .include Index: projects/vnet/lib/libc/yp/Makefile.inc =================================================================== --- projects/vnet/lib/libc/yp/Makefile.inc (revision 302276) +++ projects/vnet/lib/libc/yp/Makefile.inc (revision 302277) @@ -1,19 +1,19 @@ # from: @(#)Makefile.inc 5.3 (Berkeley) 2/20/91 # $FreeBSD$ # yp sources .PATH: ${LIBC_SRCTOP}/yp SRCS+= xdryp.c yp.h yp_xdr.c yplib.c CLEANFILES+= yp.h yp_xdr.c SYM_MAPS+= ${LIBC_SRCTOP}/yp/Symbol.map -RPCSRC= ${DESTDIR}/usr/include/rpcsvc/yp.x +RPCSRC= ${SRCTOP}/include/rpcsvc/yp.x RPCGEN= RPCGEN_CPP=${CPP:Q} rpcgen -C yp_xdr.c: ${RPCSRC} ${RPCGEN} -c -o ${.TARGET} ${RPCSRC} yp.h: ${RPCSRC} ${RPCGEN} -h -o ${.TARGET} ${RPCSRC} Index: projects/vnet/sbin/natd/natd.c =================================================================== --- projects/vnet/sbin/natd/natd.c (revision 302276) +++ projects/vnet/sbin/natd/natd.c (revision 302277) @@ -1,2043 +1,2043 @@ /* * natd - Network Address Translation Daemon for FreeBSD. * * This software is provided free of charge, with no * warranty of any kind, either expressed or implied. * Use at your own risk. * * You may copy, modify and distribute this software (natd.c) freely. * * Ari Suutari */ #include __FBSDID("$FreeBSD$"); #define SYSLOG_NAMES #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "natd.h" struct instance { const char *name; struct libalias *la; LIST_ENTRY(instance) list; int ifIndex; int assignAliasAddr; char* ifName; int logDropped; u_short inPort; u_short outPort; u_short inOutPort; struct in_addr aliasAddr; int ifMTU; int aliasOverhead; int dropIgnoredIncoming; int divertIn; int divertOut; int divertInOut; }; static LIST_HEAD(, instance) root = LIST_HEAD_INITIALIZER(root); struct libalias *mla; static struct instance *mip; static int ninstance = 1; /* * Default values for input and output * divert socket ports. */ #define DEFAULT_SERVICE "natd" /* * Definition of a port range, and macros to deal with values. * FORMAT: HI 16-bits == first port in range, 0 == all ports. * LO 16-bits == number of ports in range * NOTES: - Port values are not stored in network byte order. */ typedef u_long port_range; #define GETLOPORT(x) ((x) >> 0x10) #define GETNUMPORTS(x) ((x) & 0x0000ffff) #define GETHIPORT(x) (GETLOPORT((x)) + GETNUMPORTS((x))) /* Set y to be the low-port value in port_range variable x. */ #define SETLOPORT(x,y) ((x) = ((x) & 0x0000ffff) | ((y) << 0x10)) /* Set y to be the number of ports in port_range variable x. */ #define SETNUMPORTS(x,y) ((x) = ((x) & 0xffff0000) | (y)) /* * Function prototypes. */ static void DoAliasing (int fd, int direction); static void DaemonMode (void); static void HandleRoutingInfo (int fd); static void Usage (void); static char* FormatPacket (struct ip*); static void PrintPacket (struct ip*); static void SyslogPacket (struct ip*, int priority, const char *label); static int SetAliasAddressFromIfName (const char *ifName); static void InitiateShutdown (int); static void Shutdown (int); static void RefreshAddr (int); static void ParseOption (const char* option, const char* parms); static void ReadConfigFile (const char* fileName); static void SetupPortRedirect (const char* parms); static void SetupProtoRedirect(const char* parms); static void SetupAddressRedirect (const char* parms); static void StrToAddr (const char* str, struct in_addr* addr); static u_short StrToPort (const char* str, const char* proto); static int StrToPortRange (const char* str, const char* proto, port_range *portRange); static int StrToProto (const char* str); static int StrToAddrAndPortRange (char* str, struct in_addr* addr, char* proto, port_range *portRange); static void ParseArgs (int argc, char** argv); static void SetupPunchFW(const char *strValue); static void SetupSkinnyPort(const char *strValue); static void NewInstance(const char *name); static void DoGlobal (int fd); static int CheckIpfwRulenum(unsigned int rnum); /* * Globals. */ static int verbose; static int background; static int running; static int logFacility; static int dynamicMode; static int icmpSock; static int logIpfwDenied; static const char* pidName; static int routeSock; static int globalPort; static int divertGlobal; static int exitDelay; int main (int argc, char** argv) { struct sockaddr_in addr; fd_set readMask; int fdMax; int rval; /* * Initialize packet aliasing software. * Done already here to be able to alter option bits * during command line and configuration file processing. */ NewInstance("default"); /* * Parse options. */ verbose = 0; background = 0; running = 1; dynamicMode = 0; logFacility = LOG_DAEMON; logIpfwDenied = -1; pidName = PIDFILE; routeSock = -1; icmpSock = -1; fdMax = -1; divertGlobal = -1; exitDelay = EXIT_DELAY; ParseArgs (argc, argv); /* * Log ipfw(8) denied packets by default in verbose mode. */ if (logIpfwDenied == -1) logIpfwDenied = verbose; /* * Open syslog channel. */ openlog ("natd", LOG_CONS | LOG_PID | (verbose ? LOG_PERROR : 0), logFacility); LIST_FOREACH(mip, &root, list) { mla = mip->la; /* * If not doing the transparent proxying only, * check that valid aliasing address has been given. */ if (mip->aliasAddr.s_addr == INADDR_NONE && mip->ifName == NULL && !(LibAliasSetMode(mla, 0,0) & PKT_ALIAS_PROXY_ONLY)) errx (1, "instance %s: aliasing address not given", mip->name); if (mip->aliasAddr.s_addr != INADDR_NONE && mip->ifName != NULL) errx (1, "both alias address and interface " "name are not allowed"); /* * Check that valid port number is known. */ if (mip->inPort != 0 || mip->outPort != 0) if (mip->inPort == 0 || mip->outPort == 0) errx (1, "both input and output ports are required"); if (mip->inPort == 0 && mip->outPort == 0 && mip->inOutPort == 0) ParseOption ("port", DEFAULT_SERVICE); /* * Check if ignored packets should be dropped. */ mip->dropIgnoredIncoming = LibAliasSetMode (mla, 0, 0); mip->dropIgnoredIncoming &= PKT_ALIAS_DENY_INCOMING; /* * Create divert sockets. Use only one socket if -p was specified * on command line. Otherwise, create separate sockets for * outgoing and incoming connections. */ if (mip->inOutPort) { mip->divertInOut = socket (PF_INET, SOCK_RAW, IPPROTO_DIVERT); if (mip->divertInOut == -1) Quit ("Unable to create divert socket."); if (mip->divertInOut > fdMax) fdMax = mip->divertInOut; mip->divertIn = -1; mip->divertOut = -1; /* * Bind socket. */ addr.sin_family = AF_INET; addr.sin_addr.s_addr = INADDR_ANY; addr.sin_port = mip->inOutPort; if (bind (mip->divertInOut, (struct sockaddr*) &addr, sizeof addr) == -1) Quit ("Unable to bind divert socket."); } else { mip->divertIn = socket (PF_INET, SOCK_RAW, IPPROTO_DIVERT); if (mip->divertIn == -1) Quit ("Unable to create incoming divert socket."); if (mip->divertIn > fdMax) fdMax = mip->divertIn; mip->divertOut = socket (PF_INET, SOCK_RAW, IPPROTO_DIVERT); if (mip->divertOut == -1) Quit ("Unable to create outgoing divert socket."); if (mip->divertOut > fdMax) fdMax = mip->divertOut; mip->divertInOut = -1; /* * Bind divert sockets. */ addr.sin_family = AF_INET; addr.sin_addr.s_addr = INADDR_ANY; addr.sin_port = mip->inPort; if (bind (mip->divertIn, (struct sockaddr*) &addr, sizeof addr) == -1) Quit ("Unable to bind incoming divert socket."); addr.sin_family = AF_INET; addr.sin_addr.s_addr = INADDR_ANY; addr.sin_port = mip->outPort; if (bind (mip->divertOut, (struct sockaddr*) &addr, sizeof addr) == -1) Quit ("Unable to bind outgoing divert socket."); } /* * Create routing socket if interface name specified and in dynamic mode. */ if (mip->ifName) { if (dynamicMode) { if (routeSock == -1) routeSock = socket (PF_ROUTE, SOCK_RAW, 0); if (routeSock == -1) Quit ("Unable to create routing info socket."); if (routeSock > fdMax) fdMax = routeSock; mip->assignAliasAddr = 1; } else { do { rval = SetAliasAddressFromIfName (mip->ifName); if (background == 0 || dynamicMode == 0) break; if (rval == EAGAIN) sleep(1); } while (rval == EAGAIN); if (rval != 0) exit(1); } } } if (globalPort) { divertGlobal = socket (PF_INET, SOCK_RAW, IPPROTO_DIVERT); if (divertGlobal == -1) Quit ("Unable to create divert socket."); if (divertGlobal > fdMax) fdMax = divertGlobal; /* * Bind socket. */ addr.sin_family = AF_INET; addr.sin_addr.s_addr = INADDR_ANY; addr.sin_port = globalPort; if (bind (divertGlobal, (struct sockaddr*) &addr, sizeof addr) == -1) Quit ("Unable to bind global divert socket."); } /* * Create socket for sending ICMP messages. */ icmpSock = socket (AF_INET, SOCK_RAW, IPPROTO_ICMP); if (icmpSock == -1) Quit ("Unable to create ICMP socket."); /* * And disable reads for the socket, otherwise it slowly fills * up with received icmps which we do not use. */ shutdown(icmpSock, SHUT_RD); /* * Become a daemon unless verbose mode was requested. */ if (!verbose) DaemonMode (); /* * Catch signals to manage shutdown and * refresh of interface address. */ siginterrupt(SIGTERM, 1); siginterrupt(SIGHUP, 1); if (exitDelay) signal(SIGTERM, InitiateShutdown); else signal(SIGTERM, Shutdown); signal (SIGHUP, RefreshAddr); /* * Set alias address if it has been given. */ mip = LIST_FIRST(&root); /* XXX: simon */ LIST_FOREACH(mip, &root, list) { mla = mip->la; if (mip->aliasAddr.s_addr != INADDR_NONE) LibAliasSetAddress (mla, mip->aliasAddr); } while (running) { mip = LIST_FIRST(&root); /* XXX: simon */ if (mip->divertInOut != -1 && !mip->ifName && ninstance == 1) { /* * When using only one socket, just call * DoAliasing repeatedly to process packets. */ DoAliasing (mip->divertInOut, DONT_KNOW); continue; } /* * Build read mask from socket descriptors to select. */ FD_ZERO (&readMask); /* * Check if new packets are available. */ LIST_FOREACH(mip, &root, list) { if (mip->divertIn != -1) FD_SET (mip->divertIn, &readMask); if (mip->divertOut != -1) FD_SET (mip->divertOut, &readMask); if (mip->divertInOut != -1) FD_SET (mip->divertInOut, &readMask); } /* * Routing info is processed always. */ if (routeSock != -1) FD_SET (routeSock, &readMask); if (divertGlobal != -1) FD_SET (divertGlobal, &readMask); if (select (fdMax + 1, &readMask, NULL, NULL, NULL) == -1) { if (errno == EINTR) continue; Quit ("Select failed."); } if (divertGlobal != -1) if (FD_ISSET (divertGlobal, &readMask)) DoGlobal (divertGlobal); LIST_FOREACH(mip, &root, list) { mla = mip->la; if (mip->divertIn != -1) if (FD_ISSET (mip->divertIn, &readMask)) DoAliasing (mip->divertIn, INPUT); if (mip->divertOut != -1) if (FD_ISSET (mip->divertOut, &readMask)) DoAliasing (mip->divertOut, OUTPUT); if (mip->divertInOut != -1) if (FD_ISSET (mip->divertInOut, &readMask)) DoAliasing (mip->divertInOut, DONT_KNOW); } if (routeSock != -1) if (FD_ISSET (routeSock, &readMask)) HandleRoutingInfo (routeSock); } if (background) unlink (pidName); return 0; } static void DaemonMode(void) { FILE* pidFile; daemon (0, 0); background = 1; pidFile = fopen (pidName, "w"); if (pidFile) { fprintf (pidFile, "%d\n", getpid ()); fclose (pidFile); } } static void ParseArgs (int argc, char** argv) { int arg; char* opt; char parmBuf[256]; int len; /* bounds checking */ for (arg = 1; arg < argc; arg++) { opt = argv[arg]; if (*opt != '-') { warnx ("invalid option %s", opt); Usage (); } parmBuf[0] = '\0'; len = 0; while (arg < argc - 1) { if (argv[arg + 1][0] == '-') break; if (len) { strncat (parmBuf, " ", sizeof(parmBuf) - (len + 1)); len += strlen(parmBuf + len); } ++arg; strncat (parmBuf, argv[arg], sizeof(parmBuf) - (len + 1)); len += strlen(parmBuf + len); } ParseOption (opt + 1, (len ? parmBuf : NULL)); } } static void DoGlobal (int fd) { int bytes; int origBytes; char buf[IP_MAXPACKET]; struct sockaddr_in addr; int wrote; socklen_t addrSize; struct ip* ip; char msgBuf[80]; /* * Get packet from socket. */ addrSize = sizeof addr; origBytes = recvfrom (fd, buf, sizeof buf, 0, (struct sockaddr*) &addr, &addrSize); if (origBytes == -1) { if (errno != EINTR) Warn ("read from divert socket failed"); return; } #if 0 if (mip->assignAliasAddr) { if (SetAliasAddressFromIfName (mip->ifName) != 0) exit(1); mip->assignAliasAddr = 0; } #endif /* * This is an IP packet. */ ip = (struct ip*) buf; if (verbose) { /* * Print packet direction and protocol type. */ printf ("Glb "); switch (ip->ip_p) { case IPPROTO_TCP: printf ("[TCP] "); break; case IPPROTO_UDP: printf ("[UDP] "); break; case IPPROTO_ICMP: printf ("[ICMP] "); break; default: printf ("[%d] ", ip->ip_p); break; } /* * Print addresses. */ PrintPacket (ip); } LIST_FOREACH(mip, &root, list) { mla = mip->la; if (LibAliasOutTry (mla, buf, IP_MAXPACKET, 0) != PKT_ALIAS_IGNORED) break; } /* * Length might have changed during aliasing. */ bytes = ntohs (ip->ip_len); /* * Update alias overhead size for outgoing packets. */ if (mip != NULL && bytes - origBytes > mip->aliasOverhead) mip->aliasOverhead = bytes - origBytes; if (verbose) { /* * Print addresses after aliasing. */ printf (" aliased to\n"); printf (" "); PrintPacket (ip); printf ("\n"); } /* * Put packet back for processing. */ wrote = sendto (fd, buf, bytes, 0, (struct sockaddr*) &addr, sizeof addr); if (wrote != bytes) { - if (errno == EMSGSIZE) { + if (errno == EMSGSIZE && mip != NULL) { if (mip->ifMTU != -1) SendNeedFragIcmp (icmpSock, (struct ip*) buf, mip->ifMTU - mip->aliasOverhead); } else if (errno == EACCES && logIpfwDenied) { sprintf (msgBuf, "failed to write packet back"); Warn (msgBuf); } } } static void DoAliasing (int fd, int direction) { int bytes; int origBytes; char buf[IP_MAXPACKET]; struct sockaddr_in addr; int wrote; int status; socklen_t addrSize; struct ip* ip; char msgBuf[80]; int rval; if (mip->assignAliasAddr) { do { rval = SetAliasAddressFromIfName (mip->ifName); if (background == 0 || dynamicMode == 0) break; if (rval == EAGAIN) sleep(1); } while (rval == EAGAIN); if (rval != 0) exit(1); mip->assignAliasAddr = 0; } /* * Get packet from socket. */ addrSize = sizeof addr; origBytes = recvfrom (fd, buf, sizeof buf, 0, (struct sockaddr*) &addr, &addrSize); if (origBytes == -1) { if (errno != EINTR) Warn ("read from divert socket failed"); return; } /* * This is an IP packet. */ ip = (struct ip*) buf; if (direction == DONT_KNOW) { if (addr.sin_addr.s_addr == INADDR_ANY) direction = OUTPUT; else direction = INPUT; } if (verbose) { /* * Print packet direction and protocol type. */ printf (direction == OUTPUT ? "Out " : "In "); if (ninstance > 1) printf ("{%s}", mip->name); switch (ip->ip_p) { case IPPROTO_TCP: printf ("[TCP] "); break; case IPPROTO_UDP: printf ("[UDP] "); break; case IPPROTO_ICMP: printf ("[ICMP] "); break; default: printf ("[%d] ", ip->ip_p); break; } /* * Print addresses. */ PrintPacket (ip); } if (direction == OUTPUT) { /* * Outgoing packets. Do aliasing. */ LibAliasOut (mla, buf, IP_MAXPACKET); } else { /* * Do aliasing. */ status = LibAliasIn (mla, buf, IP_MAXPACKET); if (status == PKT_ALIAS_IGNORED && mip->dropIgnoredIncoming) { if (verbose) printf (" dropped.\n"); if (mip->logDropped) SyslogPacket (ip, LOG_WARNING, "denied"); return; } } /* * Length might have changed during aliasing. */ bytes = ntohs (ip->ip_len); /* * Update alias overhead size for outgoing packets. */ if (direction == OUTPUT && bytes - origBytes > mip->aliasOverhead) mip->aliasOverhead = bytes - origBytes; if (verbose) { /* * Print addresses after aliasing. */ printf (" aliased to\n"); printf (" "); PrintPacket (ip); printf ("\n"); } /* * Put packet back for processing. */ wrote = sendto (fd, buf, bytes, 0, (struct sockaddr*) &addr, sizeof addr); if (wrote != bytes) { if (errno == EMSGSIZE) { if (direction == OUTPUT && mip->ifMTU != -1) SendNeedFragIcmp (icmpSock, (struct ip*) buf, mip->ifMTU - mip->aliasOverhead); } else if (errno == EACCES && logIpfwDenied) { sprintf (msgBuf, "failed to write packet back"); Warn (msgBuf); } } } static void HandleRoutingInfo (int fd) { int bytes; struct if_msghdr ifMsg; /* * Get packet from socket. */ bytes = read (fd, &ifMsg, sizeof ifMsg); if (bytes == -1) { Warn ("read from routing socket failed"); return; } if (ifMsg.ifm_version != RTM_VERSION) { Warn ("unexpected packet read from routing socket"); return; } if (verbose) printf ("Routing message %#x received.\n", ifMsg.ifm_type); if ((ifMsg.ifm_type == RTM_NEWADDR || ifMsg.ifm_type == RTM_IFINFO)) { LIST_FOREACH(mip, &root, list) { mla = mip->la; if (ifMsg.ifm_index == mip->ifIndex) { if (verbose) printf("Interface address/MTU has probably changed.\n"); mip->assignAliasAddr = 1; } } } } static void PrintPacket (struct ip* ip) { printf ("%s", FormatPacket (ip)); } static void SyslogPacket (struct ip* ip, int priority, const char *label) { syslog (priority, "%s %s", label, FormatPacket (ip)); } static char* FormatPacket (struct ip* ip) { static char buf[256]; struct tcphdr* tcphdr; struct udphdr* udphdr; struct icmp* icmphdr; char src[20]; char dst[20]; strcpy (src, inet_ntoa (ip->ip_src)); strcpy (dst, inet_ntoa (ip->ip_dst)); switch (ip->ip_p) { case IPPROTO_TCP: tcphdr = (struct tcphdr*) ((char*) ip + (ip->ip_hl << 2)); sprintf (buf, "[TCP] %s:%d -> %s:%d", src, ntohs (tcphdr->th_sport), dst, ntohs (tcphdr->th_dport)); break; case IPPROTO_UDP: udphdr = (struct udphdr*) ((char*) ip + (ip->ip_hl << 2)); sprintf (buf, "[UDP] %s:%d -> %s:%d", src, ntohs (udphdr->uh_sport), dst, ntohs (udphdr->uh_dport)); break; case IPPROTO_ICMP: icmphdr = (struct icmp*) ((char*) ip + (ip->ip_hl << 2)); sprintf (buf, "[ICMP] %s -> %s %u(%u)", src, dst, icmphdr->icmp_type, icmphdr->icmp_code); break; default: sprintf (buf, "[%d] %s -> %s ", ip->ip_p, src, dst); break; } return buf; } static int SetAliasAddressFromIfName(const char *ifn) { size_t needed; int mib[6]; char *buf, *lim, *next; struct if_msghdr *ifm; struct ifa_msghdr *ifam; struct sockaddr_dl *sdl; struct sockaddr_in *sin; mib[0] = CTL_NET; mib[1] = PF_ROUTE; mib[2] = 0; mib[3] = AF_INET; /* Only IP addresses please */ mib[4] = NET_RT_IFLIST; mib[5] = 0; /* ifIndex??? */ /* * Get interface data. */ if (sysctl(mib, 6, NULL, &needed, NULL, 0) == -1) err(1, "iflist-sysctl-estimate"); if ((buf = malloc(needed)) == NULL) errx(1, "malloc failed"); if (sysctl(mib, 6, buf, &needed, NULL, 0) == -1 && errno != ENOMEM) err(1, "iflist-sysctl-get"); lim = buf + needed; /* * Loop through interfaces until one with * given name is found. This is done to * find correct interface index for routing * message processing. */ mip->ifIndex = 0; next = buf; while (next < lim) { ifm = (struct if_msghdr *)next; next += ifm->ifm_msglen; if (ifm->ifm_version != RTM_VERSION) { if (verbose) warnx("routing message version %d " "not understood", ifm->ifm_version); continue; } if (ifm->ifm_type == RTM_IFINFO) { sdl = (struct sockaddr_dl *)(ifm + 1); if (strlen(ifn) == sdl->sdl_nlen && strncmp(ifn, sdl->sdl_data, sdl->sdl_nlen) == 0) { mip->ifIndex = ifm->ifm_index; mip->ifMTU = ifm->ifm_data.ifi_mtu; break; } } } if (!mip->ifIndex) errx(1, "unknown interface name %s", ifn); /* * Get interface address. */ sin = NULL; while (next < lim) { ifam = (struct ifa_msghdr *)next; next += ifam->ifam_msglen; if (ifam->ifam_version != RTM_VERSION) { if (verbose) warnx("routing message version %d " "not understood", ifam->ifam_version); continue; } if (ifam->ifam_type != RTM_NEWADDR) break; if (ifam->ifam_addrs & RTA_IFA) { int i; char *cp = (char *)(ifam + 1); for (i = 1; i < RTA_IFA; i <<= 1) if (ifam->ifam_addrs & i) cp += SA_SIZE((struct sockaddr *)cp); if (((struct sockaddr *)cp)->sa_family == AF_INET) { sin = (struct sockaddr_in *)cp; break; } } } if (sin == NULL) { warnx("%s: cannot get interface address", ifn); free(buf); return EAGAIN; } LibAliasSetAddress(mla, sin->sin_addr); syslog(LOG_INFO, "Aliasing to %s, mtu %d bytes", inet_ntoa(sin->sin_addr), mip->ifMTU); free(buf); return 0; } void Quit (const char* msg) { Warn (msg); exit (1); } void Warn (const char* msg) { if (background) syslog (LOG_ALERT, "%s (%m)", msg); else warn ("%s", msg); } static void RefreshAddr (int sig __unused) { LibAliasRefreshModules(); if (mip != NULL && mip->ifName != NULL) mip->assignAliasAddr = 1; } static void InitiateShutdown (int sig __unused) { /* * Start timer to allow kernel gracefully * shutdown existing connections when system * is shut down. */ siginterrupt(SIGALRM, 1); signal (SIGALRM, Shutdown); ualarm(exitDelay*1000, 1000); } static void Shutdown (int sig __unused) { running = 0; } /* * Different options recognized by this program. */ enum Option { LibAliasOption, Instance, Verbose, InPort, OutPort, Port, GlobalPort, AliasAddress, TargetAddress, InterfaceName, RedirectPort, RedirectProto, RedirectAddress, ConfigFile, DynamicMode, ProxyRule, LogDenied, LogFacility, PunchFW, SkinnyPort, LogIpfwDenied, PidFile, ExitDelay }; enum Param { YesNo, Numeric, String, None, Address, Service }; /* * Option information structure (used by ParseOption). */ struct OptionInfo { enum Option type; int packetAliasOpt; enum Param parm; const char* parmDescription; const char* description; const char* name; const char* shortName; }; /* * Table of known options. */ static struct OptionInfo optionTable[] = { { LibAliasOption, PKT_ALIAS_UNREGISTERED_ONLY, YesNo, "[yes|no]", "alias only unregistered addresses", "unregistered_only", "u" }, { LibAliasOption, PKT_ALIAS_LOG, YesNo, "[yes|no]", "enable logging", "log", "l" }, { LibAliasOption, PKT_ALIAS_PROXY_ONLY, YesNo, "[yes|no]", "proxy only", "proxy_only", NULL }, { LibAliasOption, PKT_ALIAS_REVERSE, YesNo, "[yes|no]", "operate in reverse mode", "reverse", NULL }, { LibAliasOption, PKT_ALIAS_DENY_INCOMING, YesNo, "[yes|no]", "allow incoming connections", "deny_incoming", "d" }, { LibAliasOption, PKT_ALIAS_USE_SOCKETS, YesNo, "[yes|no]", "use sockets to inhibit port conflict", "use_sockets", "s" }, { LibAliasOption, PKT_ALIAS_SAME_PORTS, YesNo, "[yes|no]", "try to keep original port numbers for connections", "same_ports", "m" }, { Verbose, 0, YesNo, "[yes|no]", "verbose mode, dump packet information", "verbose", "v" }, { DynamicMode, 0, YesNo, "[yes|no]", "dynamic mode, automatically detect interface address changes", "dynamic", NULL }, { InPort, 0, Service, "number|service_name", "set port for incoming packets", "in_port", "i" }, { OutPort, 0, Service, "number|service_name", "set port for outgoing packets", "out_port", "o" }, { Port, 0, Service, "number|service_name", "set port (defaults to natd/divert)", "port", "p" }, { GlobalPort, 0, Service, "number|service_name", "set globalport", "globalport", NULL }, { AliasAddress, 0, Address, "x.x.x.x", "address to use for aliasing", "alias_address", "a" }, { TargetAddress, 0, Address, "x.x.x.x", "address to use for incoming sessions", "target_address", "t" }, { InterfaceName, 0, String, "network_if_name", "take aliasing address from interface", "interface", "n" }, { ProxyRule, 0, String, "[type encode_ip_hdr|encode_tcp_stream] port xxxx server " "a.b.c.d:yyyy", "add transparent proxying / destination NAT", "proxy_rule", NULL }, { RedirectPort, 0, String, "tcp|udp local_addr:local_port_range[,...] [public_addr:]public_port_range" " [remote_addr[:remote_port_range]]", "redirect a port (or ports) for incoming traffic", "redirect_port", NULL }, { RedirectProto, 0, String, "proto local_addr [public_addr] [remote_addr]", "redirect packets of a given proto", "redirect_proto", NULL }, { RedirectAddress, 0, String, "local_addr[,...] public_addr", "define mapping between local and public addresses", "redirect_address", NULL }, { ConfigFile, 0, String, "file_name", "read options from configuration file", "config", "f" }, { LogDenied, 0, YesNo, "[yes|no]", "enable logging of denied incoming packets", "log_denied", NULL }, { LogFacility, 0, String, "facility", "name of syslog facility to use for logging", "log_facility", NULL }, { PunchFW, 0, String, "basenumber:count", "punch holes in the firewall for incoming FTP/IRC DCC connections", "punch_fw", NULL }, { SkinnyPort, 0, String, "port", "set the TCP port for use with the Skinny Station protocol", "skinny_port", NULL }, { LogIpfwDenied, 0, YesNo, "[yes|no]", "log packets converted by natd, but denied by ipfw", "log_ipfw_denied", NULL }, { PidFile, 0, String, "file_name", "store PID in an alternate file", "pid_file", "P" }, { Instance, 0, String, "instance name", "name of aliasing engine instance", "instance", NULL }, { ExitDelay, 0, Numeric, "ms", "delay in ms before daemon exit after signal", "exit_delay", NULL }, }; static void ParseOption (const char* option, const char* parms) { int i; struct OptionInfo* info; int yesNoValue; int aliasValue; int numValue; u_short uNumValue; const char* strValue; struct in_addr addrValue; int max; char* end; const CODE* fac_record = NULL; /* * Find option from table. */ max = sizeof (optionTable) / sizeof (struct OptionInfo); for (i = 0, info = optionTable; i < max; i++, info++) { if (!strcmp (info->name, option)) break; if (info->shortName) if (!strcmp (info->shortName, option)) break; } if (i >= max) { warnx ("unknown option %s", option); Usage (); } uNumValue = 0; yesNoValue = 0; numValue = 0; strValue = NULL; /* * Check parameters. */ switch (info->parm) { case YesNo: if (!parms) parms = "yes"; if (!strcmp (parms, "yes")) yesNoValue = 1; else if (!strcmp (parms, "no")) yesNoValue = 0; else errx (1, "%s needs yes/no parameter", option); break; case Service: if (!parms) errx (1, "%s needs service name or " "port number parameter", option); uNumValue = StrToPort (parms, "divert"); break; case Numeric: if (parms) numValue = strtol (parms, &end, 10); else end = NULL; if (end == parms) errx (1, "%s needs numeric parameter", option); break; case String: strValue = parms; if (!strValue) errx (1, "%s needs parameter", option); break; case None: if (parms) errx (1, "%s does not take parameters", option); break; case Address: if (!parms) errx (1, "%s needs address/host parameter", option); StrToAddr (parms, &addrValue); break; } switch (info->type) { case LibAliasOption: aliasValue = yesNoValue ? info->packetAliasOpt : 0; LibAliasSetMode (mla, aliasValue, info->packetAliasOpt); break; case Verbose: verbose = yesNoValue; break; case DynamicMode: dynamicMode = yesNoValue; break; case InPort: mip->inPort = uNumValue; break; case OutPort: mip->outPort = uNumValue; break; case Port: mip->inOutPort = uNumValue; break; case GlobalPort: globalPort = uNumValue; break; case AliasAddress: memcpy (&mip->aliasAddr, &addrValue, sizeof (struct in_addr)); break; case TargetAddress: LibAliasSetTarget(mla, addrValue); break; case RedirectPort: SetupPortRedirect (strValue); break; case RedirectProto: SetupProtoRedirect(strValue); break; case RedirectAddress: SetupAddressRedirect (strValue); break; case ProxyRule: LibAliasProxyRule (mla, strValue); break; case InterfaceName: if (mip->ifName) free (mip->ifName); mip->ifName = strdup (strValue); break; case ConfigFile: ReadConfigFile (strValue); break; case LogDenied: mip->logDropped = yesNoValue; break; case LogFacility: fac_record = facilitynames; while (fac_record->c_name != NULL) { if (!strcmp (fac_record->c_name, strValue)) { logFacility = fac_record->c_val; break; } else fac_record++; } if(fac_record->c_name == NULL) errx(1, "Unknown log facility name: %s", strValue); break; case PunchFW: SetupPunchFW(strValue); break; case SkinnyPort: SetupSkinnyPort(strValue); break; case LogIpfwDenied: logIpfwDenied = yesNoValue; break; case PidFile: pidName = strdup (strValue); break; case Instance: NewInstance(strValue); break; case ExitDelay: if (numValue < 0 || numValue > MAX_EXIT_DELAY) errx(1, "Incorrect exit delay: %d", numValue); exitDelay = numValue; break; } } void ReadConfigFile (const char* fileName) { FILE* file; char *buf; size_t len; char *ptr, *p; char* option; file = fopen (fileName, "r"); if (!file) err(1, "cannot open config file %s", fileName); while ((buf = fgetln(file, &len)) != NULL) { if (buf[len - 1] == '\n') buf[len - 1] = '\0'; else errx(1, "config file format error: " "last line should end with newline"); /* * Check for comments, strip off trailing spaces. */ if ((ptr = strchr(buf, '#'))) *ptr = '\0'; for (ptr = buf; isspace(*ptr); ++ptr) continue; if (*ptr == '\0') continue; for (p = strchr(buf, '\0'); isspace(*--p);) continue; *++p = '\0'; /* * Extract option name. */ option = ptr; while (*ptr && !isspace (*ptr)) ++ptr; if (*ptr != '\0') { *ptr = '\0'; ++ptr; } /* * Skip white space between name and parms. */ while (*ptr && isspace (*ptr)) ++ptr; ParseOption (option, *ptr ? ptr : NULL); } fclose (file); } static void Usage(void) { int i; int max; struct OptionInfo* info; fprintf (stderr, "Recognized options:\n\n"); max = sizeof (optionTable) / sizeof (struct OptionInfo); for (i = 0, info = optionTable; i < max; i++, info++) { fprintf (stderr, "-%-20s %s\n", info->name, info->parmDescription); if (info->shortName) fprintf (stderr, "-%-20s %s\n", info->shortName, info->parmDescription); fprintf (stderr, " %s\n\n", info->description); } exit (1); } void SetupPortRedirect (const char* parms) { char *buf; char* ptr; char* serverPool; struct in_addr localAddr; struct in_addr publicAddr; struct in_addr remoteAddr; port_range portRange; u_short localPort = 0; u_short publicPort = 0; u_short remotePort = 0; u_short numLocalPorts = 0; u_short numPublicPorts = 0; u_short numRemotePorts = 0; int proto; char* protoName; char* separator; int i; struct alias_link *aliaslink = NULL; buf = strdup (parms); if (!buf) errx (1, "redirect_port: strdup() failed"); /* * Extract protocol. */ protoName = strtok (buf, " \t"); if (!protoName) errx (1, "redirect_port: missing protocol"); proto = StrToProto (protoName); /* * Extract local address. */ ptr = strtok (NULL, " \t"); if (!ptr) errx (1, "redirect_port: missing local address"); separator = strchr(ptr, ','); if (separator) { /* LSNAT redirection syntax. */ localAddr.s_addr = INADDR_NONE; localPort = ~0; numLocalPorts = 1; serverPool = ptr; } else { if ( StrToAddrAndPortRange (ptr, &localAddr, protoName, &portRange) != 0 ) errx (1, "redirect_port: invalid local port range"); localPort = GETLOPORT(portRange); numLocalPorts = GETNUMPORTS(portRange); serverPool = NULL; } /* * Extract public port and optionally address. */ ptr = strtok (NULL, " \t"); if (!ptr) errx (1, "redirect_port: missing public port"); separator = strchr (ptr, ':'); if (separator) { if (StrToAddrAndPortRange (ptr, &publicAddr, protoName, &portRange) != 0 ) errx (1, "redirect_port: invalid public port range"); } else { publicAddr.s_addr = INADDR_ANY; if (StrToPortRange (ptr, protoName, &portRange) != 0) errx (1, "redirect_port: invalid public port range"); } publicPort = GETLOPORT(portRange); numPublicPorts = GETNUMPORTS(portRange); /* * Extract remote address and optionally port. */ ptr = strtok (NULL, " \t"); if (ptr) { separator = strchr (ptr, ':'); if (separator) { if (StrToAddrAndPortRange (ptr, &remoteAddr, protoName, &portRange) != 0) errx (1, "redirect_port: invalid remote port range"); } else { SETLOPORT(portRange, 0); SETNUMPORTS(portRange, 1); StrToAddr (ptr, &remoteAddr); } } else { SETLOPORT(portRange, 0); SETNUMPORTS(portRange, 1); remoteAddr.s_addr = INADDR_ANY; } remotePort = GETLOPORT(portRange); numRemotePorts = GETNUMPORTS(portRange); /* * Make sure port ranges match up, then add the redirect ports. */ if (numLocalPorts != numPublicPorts) errx (1, "redirect_port: port ranges must be equal in size"); /* Remote port range is allowed to be '0' which means all ports. */ if (numRemotePorts != numLocalPorts && (numRemotePorts != 1 || remotePort != 0)) errx (1, "redirect_port: remote port must be 0 or equal to local port range in size"); for (i = 0 ; i < numPublicPorts ; ++i) { /* If remotePort is all ports, set it to 0. */ u_short remotePortCopy = remotePort + i; if (numRemotePorts == 1 && remotePort == 0) remotePortCopy = 0; aliaslink = LibAliasRedirectPort (mla, localAddr, htons(localPort + i), remoteAddr, htons(remotePortCopy), publicAddr, htons(publicPort + i), proto); } /* * Setup LSNAT server pool. */ if (serverPool != NULL && aliaslink != NULL) { ptr = strtok(serverPool, ","); while (ptr != NULL) { if (StrToAddrAndPortRange(ptr, &localAddr, protoName, &portRange) != 0) errx(1, "redirect_port: invalid local port range"); localPort = GETLOPORT(portRange); if (GETNUMPORTS(portRange) != 1) errx(1, "redirect_port: local port must be single in this context"); LibAliasAddServer(mla, aliaslink, localAddr, htons(localPort)); ptr = strtok(NULL, ","); } } free (buf); } void SetupProtoRedirect(const char* parms) { char *buf; char* ptr; struct in_addr localAddr; struct in_addr publicAddr; struct in_addr remoteAddr; int proto; char* protoName; struct protoent *protoent; buf = strdup (parms); if (!buf) errx (1, "redirect_port: strdup() failed"); /* * Extract protocol. */ protoName = strtok(buf, " \t"); if (!protoName) errx(1, "redirect_proto: missing protocol"); protoent = getprotobyname(protoName); if (protoent == NULL) errx(1, "redirect_proto: unknown protocol %s", protoName); else proto = protoent->p_proto; /* * Extract local address. */ ptr = strtok(NULL, " \t"); if (!ptr) errx(1, "redirect_proto: missing local address"); else StrToAddr(ptr, &localAddr); /* * Extract optional public address. */ ptr = strtok(NULL, " \t"); if (ptr) StrToAddr(ptr, &publicAddr); else publicAddr.s_addr = INADDR_ANY; /* * Extract optional remote address. */ ptr = strtok(NULL, " \t"); if (ptr) StrToAddr(ptr, &remoteAddr); else remoteAddr.s_addr = INADDR_ANY; /* * Create aliasing link. */ (void)LibAliasRedirectProto(mla, localAddr, remoteAddr, publicAddr, proto); free (buf); } void SetupAddressRedirect (const char* parms) { char *buf; char* ptr; char* separator; struct in_addr localAddr; struct in_addr publicAddr; char* serverPool; struct alias_link *aliaslink; buf = strdup (parms); if (!buf) errx (1, "redirect_port: strdup() failed"); /* * Extract local address. */ ptr = strtok (buf, " \t"); if (!ptr) errx (1, "redirect_address: missing local address"); separator = strchr(ptr, ','); if (separator) { /* LSNAT redirection syntax. */ localAddr.s_addr = INADDR_NONE; serverPool = ptr; } else { StrToAddr (ptr, &localAddr); serverPool = NULL; } /* * Extract public address. */ ptr = strtok (NULL, " \t"); if (!ptr) errx (1, "redirect_address: missing public address"); StrToAddr (ptr, &publicAddr); aliaslink = LibAliasRedirectAddr(mla, localAddr, publicAddr); /* * Setup LSNAT server pool. */ if (serverPool != NULL && aliaslink != NULL) { ptr = strtok(serverPool, ","); while (ptr != NULL) { StrToAddr(ptr, &localAddr); LibAliasAddServer(mla, aliaslink, localAddr, htons(~0)); ptr = strtok(NULL, ","); } } free (buf); } void StrToAddr (const char* str, struct in_addr* addr) { struct hostent* hp; if (inet_aton (str, addr)) return; hp = gethostbyname (str); if (!hp) errx (1, "unknown host %s", str); memcpy (addr, hp->h_addr, sizeof (struct in_addr)); } u_short StrToPort (const char* str, const char* proto) { u_short port; struct servent* sp; char* end; port = strtol (str, &end, 10); if (end != str) return htons (port); sp = getservbyname (str, proto); if (!sp) errx (1, "%s/%s: unknown service", str, proto); return sp->s_port; } int StrToPortRange (const char* str, const char* proto, port_range *portRange) { const char* sep; struct servent* sp; char* end; u_short loPort; u_short hiPort; /* First see if this is a service, return corresponding port if so. */ sp = getservbyname (str,proto); if (sp) { SETLOPORT(*portRange, ntohs(sp->s_port)); SETNUMPORTS(*portRange, 1); return 0; } /* Not a service, see if it's a single port or port range. */ sep = strchr (str, '-'); if (sep == NULL) { SETLOPORT(*portRange, strtol(str, &end, 10)); if (end != str) { /* Single port. */ SETNUMPORTS(*portRange, 1); return 0; } /* Error in port range field. */ errx (1, "%s/%s: unknown service", str, proto); } /* Port range, get the values and sanity check. */ sscanf (str, "%hu-%hu", &loPort, &hiPort); SETLOPORT(*portRange, loPort); SETNUMPORTS(*portRange, 0); /* Error by default */ if (loPort <= hiPort) SETNUMPORTS(*portRange, hiPort - loPort + 1); if (GETNUMPORTS(*portRange) == 0) errx (1, "invalid port range %s", str); return 0; } static int StrToProto (const char* str) { if (!strcmp (str, "tcp")) return IPPROTO_TCP; if (!strcmp (str, "udp")) return IPPROTO_UDP; errx (1, "unknown protocol %s. Expected tcp or udp", str); } static int StrToAddrAndPortRange (char* str, struct in_addr* addr, char* proto, port_range *portRange) { char* ptr; ptr = strchr (str, ':'); if (!ptr) errx (1, "%s is missing port number", str); *ptr = '\0'; ++ptr; StrToAddr (str, addr); return StrToPortRange (ptr, proto, portRange); } static void SetupPunchFW(const char *strValue) { unsigned int base, num; if (sscanf(strValue, "%u:%u", &base, &num) != 2) errx(1, "punch_fw: basenumber:count parameter required"); if (CheckIpfwRulenum(base + num - 1) == -1) errx(1, "punch_fw: basenumber:count parameter should fit " "the maximum allowed rule numbers"); LibAliasSetFWBase(mla, base, num); (void)LibAliasSetMode(mla, PKT_ALIAS_PUNCH_FW, PKT_ALIAS_PUNCH_FW); } static void SetupSkinnyPort(const char *strValue) { unsigned int port; if (sscanf(strValue, "%u", &port) != 1) errx(1, "skinny_port: port parameter required"); LibAliasSetSkinnyPort(mla, port); } static void NewInstance(const char *name) { struct instance *ip; LIST_FOREACH(ip, &root, list) { if (!strcmp(ip->name, name)) { mla = ip->la; mip = ip; return; } } ninstance++; ip = calloc(sizeof *ip, 1); ip->name = strdup(name); ip->la = LibAliasInit (ip->la); ip->assignAliasAddr = 0; ip->ifName = NULL; ip->logDropped = 0; ip->inPort = 0; ip->outPort = 0; ip->inOutPort = 0; ip->aliasAddr.s_addr = INADDR_NONE; ip->ifMTU = -1; ip->aliasOverhead = 12; LIST_INSERT_HEAD(&root, ip, list); mla = ip->la; mip = ip; } static int CheckIpfwRulenum(unsigned int rnum) { unsigned int default_rule; size_t len = sizeof(default_rule); if (sysctlbyname("net.inet.ip.fw.default_rule", &default_rule, &len, NULL, 0) == -1) { warn("Failed to get the default ipfw rule number, using " "default historical value 65535. The reason was"); default_rule = 65535; } if (rnum >= default_rule) { return -1; } return 0; } Index: projects/vnet/share/man/man4/tcp.4 =================================================================== --- projects/vnet/share/man/man4/tcp.4 (revision 302276) +++ projects/vnet/share/man/man4/tcp.4 (revision 302277) @@ -1,647 +1,669 @@ .\" Copyright (c) 1983, 1991, 1993 .\" The Regents of the University of California. .\" Copyright (c) 2010-2011 The FreeBSD Foundation .\" All rights reserved. .\" .\" Portions of this documentation were written at the Centre for Advanced .\" Internet Architectures, Swinburne University of Technology, Melbourne, .\" Australia by David Hayes under sponsorship from the FreeBSD Foundation. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 3. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93 .\" $FreeBSD$ .\" -.Dd May 19, 2016 +.Dd June 28, 2016 .Dt TCP 4 .Os .Sh NAME .Nm tcp .Nd Internet Transmission Control Protocol .Sh SYNOPSIS .In sys/types.h .In sys/socket.h .In netinet/in.h .In netinet/tcp.h .Ft int .Fn socket AF_INET SOCK_STREAM 0 .Sh DESCRIPTION The .Tn TCP protocol provides reliable, flow-controlled, two-way transmission of data. It is a byte-stream protocol used to support the .Dv SOCK_STREAM abstraction. .Tn TCP uses the standard Internet address format and, in addition, provides a per-host collection of .Dq "port addresses" . Thus, each address is composed of an Internet address specifying the host and network, with a specific .Tn TCP port on the host identifying the peer entity. .Pp Sockets utilizing the .Tn TCP protocol are either .Dq active or .Dq passive . Active sockets initiate connections to passive sockets. By default, .Tn TCP sockets are created active; to create a passive socket, the .Xr listen 2 system call must be used after binding the socket with the .Xr bind 2 system call. Only passive sockets may use the .Xr accept 2 call to accept incoming connections. Only active sockets may use the .Xr connect 2 call to initiate connections. .Pp Passive sockets may .Dq underspecify their location to match incoming connection requests from multiple networks. This technique, termed .Dq "wildcard addressing" , allows a single server to provide service to clients on multiple networks. To create a socket which listens on all networks, the Internet address .Dv INADDR_ANY must be bound. The .Tn TCP port may still be specified at this time; if the port is not specified, the system will assign one. Once a connection has been established, the socket's address is fixed by the peer entity's location. The address assigned to the socket is the address associated with the network interface through which packets are being transmitted and received. Normally, this address corresponds to the peer entity's network. .Pp .Tn TCP supports a number of socket options which can be set with .Xr setsockopt 2 and tested with .Xr getsockopt 2 : -.Bl -tag -width ".Dv TCP_CONGESTION" +.Bl -tag -width ".Dv TCP_FUNCTION_BLK" .It Dv TCP_INFO Information about a socket's underlying TCP session may be retrieved by passing the read-only option .Dv TCP_INFO to .Xr getsockopt 2 . It accepts a single argument: a pointer to an instance of .Vt "struct tcp_info" . .Pp This API is subject to change; consult the source to determine which fields are currently filled out by this option. .Fx specific additions include send window size, receive window size, and bandwidth-controlled window space. .It Dv TCP_CCALGOOPT Set or query congestion control algorithm specific parameters. See .Xr mod_cc 4 for details. .It Dv TCP_CONGESTION Select or query the congestion control algorithm that TCP will use for the connection. See .Xr mod_cc 4 for details. +.It Dv TCP_FUNCTION_BLK +Select or query the set of functions that TCP will use for this connection. +This allows a user to select an alternate TCP stack. +The alternate TCP stack must already be loaded in the kernel. +To list the available TCP stacks, see +.Va functions_available +in the +.Sx MIB Variables +section further down. +To list the default TCP stack, see +.Va functions_default +in the +.Sx MIB Variables +section. .It Dv TCP_KEEPINIT This .Xr setsockopt 2 option accepts a per-socket timeout argument of .Vt "u_int" in seconds, for new, non-established .Tn TCP connections. For the global default in milliseconds see .Va keepinit in the .Sx MIB Variables section further down. .It Dv TCP_KEEPIDLE This .Xr setsockopt 2 option accepts an argument of .Vt "u_int" for the amount of time, in seconds, that the connection must be idle before keepalive probes (if enabled) are sent for the connection of this socket. If set on a listening socket, the value is inherited by the newly created socket upon .Xr accept 2 . For the global default in milliseconds see .Va keepidle in the .Sx MIB Variables section further down. .It Dv TCP_KEEPINTVL This .Xr setsockopt 2 option accepts an argument of .Vt "u_int" to set the per-socket interval, in seconds, between keepalive probes sent to a peer. If set on a listening socket, the value is inherited by the newly created socket upon .Xr accept 2 . For the global default in milliseconds see .Va keepintvl in the .Sx MIB Variables section further down. .It Dv TCP_KEEPCNT This .Xr setsockopt 2 option accepts an argument of .Vt "u_int" and allows a per-socket tuning of the number of probes sent, with no response, before the connection will be dropped. If set on a listening socket, the value is inherited by the newly created socket upon .Xr accept 2 . For the global default see the .Va keepcnt in the .Sx MIB Variables section further down. .It Dv TCP_NODELAY Under most circumstances, .Tn TCP sends data when it is presented; when outstanding data has not yet been acknowledged, it gathers small amounts of output to be sent in a single packet once an acknowledgement is received. For a small number of clients, such as window systems that send a stream of mouse events which receive no replies, this packetization may cause significant delays. The boolean option .Dv TCP_NODELAY defeats this algorithm. .It Dv TCP_MAXSEG By default, a sender- and .No receiver- Ns Tn TCP will negotiate among themselves to determine the maximum segment size to be used for each connection. The .Dv TCP_MAXSEG option allows the user to determine the result of this negotiation, and to reduce it if desired. .It Dv TCP_NOOPT .Tn TCP usually sends a number of options in each packet, corresponding to various .Tn TCP extensions which are provided in this implementation. The boolean option .Dv TCP_NOOPT is provided to disable .Tn TCP option use on a per-connection basis. .It Dv TCP_NOPUSH By convention, the .No sender- Ns Tn TCP will set the .Dq push bit, and begin transmission immediately (if permitted) at the end of every user call to .Xr write 2 or .Xr writev 2 . When this option is set to a non-zero value, .Tn TCP will delay sending any data at all until either the socket is closed, or the internal send buffer is filled. .It Dv TCP_MD5SIG This option enables the use of MD5 digests (also known as TCP-MD5) on writes to the specified socket. Outgoing traffic is digested; digests on incoming traffic are verified if the .Va net.inet.tcp.signature_verify_input sysctl is nonzero. The current default behavior for the system is to respond to a system advertising this option with TCP-MD5; this may change. .Pp One common use for this in a .Fx router deployment is to enable based routers to interwork with Cisco equipment at peering points. Support for this feature conforms to RFC 2385. Only IPv4 .Pq Dv AF_INET sessions are supported. .Pp In order for this option to function correctly, it is necessary for the administrator to add a tcp-md5 key entry to the system's security associations database (SADB) using the .Xr setkey 8 utility. This entry must have an SPI of 0x1000 and can therefore only be specified on a per-host basis at this time. .Pp If an SADB entry cannot be found for the destination, the outgoing traffic will have an invalid digest option prepended, and the following error message will be visible on the system console: .Em "tcp_signature_compute: SADB lookup failed for %d.%d.%d.%d" . .El .Pp The option level for the .Xr setsockopt 2 call is the protocol number for .Tn TCP , available from .Xr getprotobyname 3 , or .Dv IPPROTO_TCP . All options are declared in .In netinet/tcp.h . .Pp Options at the .Tn IP transport level may be used with .Tn TCP ; see .Xr ip 4 . Incoming connection requests that are source-routed are noted, and the reverse source route is used in responding. .Pp The default congestion control algorithm for .Tn TCP is .Xr cc_newreno 4 . Other congestion control algorithms can be made available using the .Xr mod_cc 4 framework. .Ss MIB Variables The .Tn TCP protocol implements a number of variables in the .Va net.inet.tcp branch of the .Xr sysctl 3 MIB. .Bl -tag -width ".Va TCPCTL_DO_RFC1323" .It Dv TCPCTL_DO_RFC1323 .Pq Va rfc1323 Implement the window scaling and timestamp options of RFC 1323 (default is true). .It Dv TCPCTL_MSSDFLT .Pq Va mssdflt The default value used for the maximum segment size .Pq Dq MSS when no advice to the contrary is received from MSS negotiation. .It Dv TCPCTL_SENDSPACE .Pq Va sendspace Maximum .Tn TCP send window. .It Dv TCPCTL_RECVSPACE .Pq Va recvspace Maximum .Tn TCP receive window. .It Va log_in_vain Log any connection attempts to ports where there is not a socket accepting connections. The value of 1 limits the logging to .Tn SYN (connection establishment) packets only. That of 2 results in any .Tn TCP packets to closed ports being logged. Any value unlisted above disables the logging (default is 0, i.e., the logging is disabled). .It Va msl The Maximum Segment Lifetime, in milliseconds, for a packet. .It Va keepinit Timeout, in milliseconds, for new, non-established .Tn TCP connections. The default is 75000 msec. .It Va keepidle Amount of time, in milliseconds, that the connection must be idle before keepalive probes (if enabled) are sent. The default is 7200000 msec (2 hours). .It Va keepintvl The interval, in milliseconds, between keepalive probes sent to remote machines, when no response is received on a .Va keepidle probe. The default is 75000 msec. .It Va keepcnt Number of probes sent, with no response, before a connection is dropped. The default is 8 packets. .It Va always_keepalive Assume that .Dv SO_KEEPALIVE is set on all .Tn TCP connections, the kernel will periodically send a packet to the remote host to verify the connection is still up. .It Va icmp_may_rst Certain .Tn ICMP unreachable messages may abort connections in .Tn SYN-SENT state. .It Va do_tcpdrain Flush packets in the .Tn TCP reassembly queue if the system is low on mbufs. .It Va blackhole If enabled, disable sending of RST when a connection is attempted to a port where there is not a socket accepting connections. See .Xr blackhole 4 . .It Va delayed_ack Delay ACK to try and piggyback it onto a data packet. .It Va delacktime Maximum amount of time, in milliseconds, before a delayed ACK is sent. .It Va path_mtu_discovery Enable Path MTU Discovery. .It Va tcbhashsize Size of the .Tn TCP control-block hash table (read-only). This may be tuned using the kernel option .Dv TCBHASHSIZE or by setting .Va net.inet.tcp.tcbhashsize in the .Xr loader 8 . .It Va pcbcount Number of active process control blocks (read-only). .It Va syncookies Determines whether or not .Tn SYN cookies should be generated for outbound .Tn SYN-ACK packets. .Tn SYN cookies are a great help during .Tn SYN flood attacks, and are enabled by default. (See .Xr syncookies 4 . ) .It Va isn_reseed_interval The interval (in seconds) specifying how often the secret data used in RFC 1948 initial sequence number calculations should be reseeded. By default, this variable is set to zero, indicating that no reseeding will occur. Reseeding should not be necessary, and will break .Dv TIME_WAIT recycling for a few minutes. .It Va rexmit_min , rexmit_slop Adjust the retransmit timer calculation for .Tn TCP . The slop is typically added to the raw calculation to take into account occasional variances that the .Tn SRTT (smoothed round-trip time) is unable to accommodate, while the minimum specifies an absolute minimum. While a number of .Tn TCP RFCs suggest a 1 second minimum, these RFCs tend to focus on streaming behavior, and fail to deal with the fact that a 1 second minimum has severe detrimental effects over lossy interactive connections, such as a 802.11b wireless link, and over very fast but lossy connections for those cases not covered by the fast retransmit code. For this reason, we use 200ms of slop and a near-0 minimum, which gives us an effective minimum of 200ms (similar to .Tn Linux ) . .It Va initcwnd_segments Enable the ability to specify initial congestion window in number of segments. The default value is 10 as suggested by RFC 6928. Changing the value on fly would not affect connections using congestion window from the hostcache. Caution: This regulates the burst of packets allowed to be sent in the first RTT. The value should be relative to the link capacity. Start with small values for lower-capacity links. Large bursts can cause buffer overruns and packet drops if routers have small buffers or the link is experiencing congestion. .It Va rfc3042 Enable the Limited Transmit algorithm as described in RFC 3042. It helps avoid timeouts on lossy links and also when the congestion window is small, as happens on short transfers. .It Va rfc3390 Enable support for RFC 3390, which allows for a variable-sized starting congestion window on new connections, depending on the maximum segment size. This helps throughput in general, but particularly affects short transfers and high-bandwidth large propagation-delay connections. .It Va sack.enable Enable support for RFC 2018, TCP Selective Acknowledgment option, which allows the receiver to inform the sender about all successfully arrived segments, allowing the sender to retransmit the missing segments only. .It Va sack.maxholes Maximum number of SACK holes per connection. Defaults to 128. .It Va sack.globalmaxholes Maximum number of SACK holes per system, across all connections. Defaults to 65536. .It Va maxtcptw When a TCP connection enters the .Dv TIME_WAIT state, its associated socket structure is freed, since it is of negligible size and use, and a new structure is allocated to contain a minimal amount of information necessary for sustaining a connection in this state, called the compressed TCP TIME_WAIT state. Since this structure is smaller than a socket structure, it can save a significant amount of system memory. The .Va net.inet.tcp.maxtcptw MIB variable controls the maximum number of these structures allocated. By default, it is initialized to .Va kern.ipc.maxsockets / 5. .It Va nolocaltimewait Suppress creating of compressed TCP TIME_WAIT states for connections in which both endpoints are local. .It Va fast_finwait2_recycle Recycle .Tn TCP .Dv FIN_WAIT_2 connections faster when the socket is marked as .Dv SBS_CANTRCVMORE (no user process has the socket open, data received on the socket cannot be read). The timeout used here is .Va finwait2_timeout . .It Va finwait2_timeout Timeout to use for fast recycling of .Tn TCP .Dv FIN_WAIT_2 connections. Defaults to 60 seconds. .It Va ecn.enable Enable support for TCP Explicit Congestion Notification (ECN). ECN allows a TCP sender to reduce the transmission rate in order to avoid packet drops. Settings: .Bl -tag -compact .It 0 Disable ECN. .It 1 Allow incoming connections to request ECN. Outgoing connections will request ECN. .It 2 Allow incoming connections to request ECN. Outgoing connections will not request ECN. .El .It Va ecn.maxretries Number of retries (SYN or SYN/ACK retransmits) before disabling ECN on a specific connection. This is needed to help with connection establishment when a broken firewall is in the network path. .It Va pmtud_blackhole_detection Turn on automatic path MTU blackhole detection. In case of retransmits OS will lower the MSS to check if it's MTU problem. If current MSS is greater than configured value to try, it will be set to configured value, otherwise, MSS will be set to default values .Po Va net.inet.tcp.mssdflt and .Va net.inet.tcp.v6mssdflt .Pc . .It Va pmtud_blackhole_mss MSS to try for IPv4 if PMTU blackhole detection is turned on. .It Va v6pmtud_blackhole_mss MSS to try for IPv6 if PMTU blackhole detection is turned on. .It Va pmtud_blackhole_activated Number of times configured values were used in an attempt to downshift. .It Va pmtud_blackhole_activated_min_mss Number of times default MSS was used in an attempt to downshift. .It Va pmtud_blackhole_failed Number of connections for which retransmits continued even after MSS downshift. +.It Va functions_available +List of available TCP function blocks (TCP stacks). +.It Va functions_default +The default TCP function block (TCP stack). .El .Sh ERRORS A socket operation may fail with one of the following errors returned: .Bl -tag -width Er .It Bq Er EISCONN when trying to establish a connection on a socket which already has one; .It Bq Er ENOBUFS when the system runs out of memory for an internal data structure; .It Bq Er ETIMEDOUT when a connection was dropped due to excessive retransmissions; .It Bq Er ECONNRESET when the remote peer forces the connection to be closed; .It Bq Er ECONNREFUSED when the remote peer actively refuses connection establishment (usually because no process is listening to the port); .It Bq Er EADDRINUSE when an attempt is made to create a socket with a port which has already been allocated; .It Bq Er EADDRNOTAVAIL when an attempt is made to create a socket with a network address for which no network interface exists; .It Bq Er EAFNOSUPPORT when an attempt is made to bind or connect a socket to a multicast address. +.It Bq Er EINVAL +when trying to change TCP function blocks at an invalid point in the session; +.It Bq Er ENOENT +when trying to use a TCP function block that is not available; .El .Sh SEE ALSO .Xr getsockopt 2 , .Xr socket 2 , .Xr sysctl 3 , .Xr blackhole 4 , .Xr inet 4 , .Xr intro 4 , .Xr ip 4 , .Xr mod_cc 4 , .Xr siftr 4 , .Xr syncache 4 , .Xr setkey 8 .Rs .%A "V. Jacobson" .%A "R. Braden" .%A "D. Borman" .%T "TCP Extensions for High Performance" .%O "RFC 1323" .Re .Rs .%A "A. Heffernan" .%T "Protection of BGP Sessions via the TCP MD5 Signature Option" .%O "RFC 2385" .Re .Rs .%A "K. Ramakrishnan" .%A "S. Floyd" .%A "D. Black" .%T "The Addition of Explicit Congestion Notification (ECN) to IP" .%O "RFC 3168" .Re .Sh HISTORY The .Tn TCP protocol appeared in .Bx 4.2 . The RFC 1323 extensions for window scaling and timestamps were added in .Bx 4.4 . The .Dv TCP_INFO option was introduced in .Tn Linux 2.6 and is .Em subject to change . Index: projects/vnet/share/man/man5/src.conf.5 =================================================================== --- projects/vnet/share/man/man5/src.conf.5 (revision 302276) +++ projects/vnet/share/man/man5/src.conf.5 (revision 302277) @@ -1,1632 +1,1635 @@ .\" DO NOT EDIT-- this file is automatically generated. .\" from FreeBSD: head/tools/build/options/makeman 292283 2015-12-15 18:42:30Z bdrewery .\" $FreeBSD$ -.Dd June 24, 2016 +.Dd June 28, 2016 .Dt SRC.CONF 5 .Os .Sh NAME .Nm src.conf .Nd "source build options" .Sh DESCRIPTION The .Nm file contains settings that will apply to every build involving the .Fx source tree; see .Xr build 7 . .Pp The .Nm file uses the standard makefile syntax. However, .Nm should not specify any dependencies to .Xr make 1 . Instead, .Nm is to set .Xr make 1 variables that control the aspects of how the system builds. .Pp The default location of .Nm is .Pa /etc/src.conf , though an alternative location can be specified in the .Xr make 1 variable .Va SRCCONF . Overriding the location of .Nm may be necessary if the system-wide settings are not suitable for a particular build. For instance, setting .Va SRCCONF to .Pa /dev/null effectively resets all build controls to their defaults. .Pp The only purpose of .Nm is to control the compilation of the .Fx source code, which is usually located in .Pa /usr/src . As a rule, the system administrator creates .Nm when the values of certain control variables need to be changed from their defaults. .Pp In addition, control variables can be specified for a particular build via the .Fl D option of .Xr make 1 or in its environment; see .Xr environ 7 . .Pp The environment of .Xr make 1 for the build can be controlled via the .Va SRC_ENV_CONF variable, which defaults to .Pa /etc/src-env.conf . Some examples that may only be set in this file are .Va WITH_DIRDEPS_BUILD , and .Va WITH_META_MODE as they are environment-only variables. Note that .Va MAKEOBJDIRPREFIX may be set here only when using .Va WITH_DIRDEPS_BUILD . .Pp The values of variables are ignored regardless of their setting; even if they would be set to .Dq Li FALSE or .Dq Li NO . Just the existence of an option will cause it to be honoured by .Xr make 1 . .Pp The following list provides a name and short description for variables that can be used for source builds. .Bl -tag -width indent .It Va WITHOUT_ACCT .\" from FreeBSD: head/tools/build/options/WITHOUT_ACCT 223201 2011-06-17 20:47:44Z ed Set to not build process accounting tools such as .Xr accton 8 and .Xr sa 8 . .It Va WITHOUT_ACPI .\" from FreeBSD: head/tools/build/options/WITHOUT_ACPI 156932 2006-03-21 07:50:50Z ru Set to not build .Xr acpiconf 8 , .Xr acpidump 8 and related programs. .It Va WITHOUT_AMD .\" from FreeBSD: head/tools/build/options/WITHOUT_AMD 183242 2008-09-21 22:02:26Z sam Set to not build .Xr amd 8 , and related programs. .It Va WITHOUT_APM .\" from FreeBSD: head/tools/build/options/WITHOUT_APM 183242 2008-09-21 22:02:26Z sam Set to not build .Xr apm 8 , .Xr apmd 8 and related programs. .It Va WITHOUT_ASSERT_DEBUG .\" from FreeBSD: head/tools/build/options/WITHOUT_ASSERT_DEBUG 162215 2006-09-11 13:55:27Z ru Set to compile programs and libraries without the .Xr assert 3 checks. .It Va WITHOUT_AT .\" from FreeBSD: head/tools/build/options/WITHOUT_AT 183242 2008-09-21 22:02:26Z sam Set to not build .Xr at 1 and related utilities. .It Va WITHOUT_ATM .\" from FreeBSD: head/tools/build/options/WITHOUT_ATM 156932 2006-03-21 07:50:50Z ru Set to not build programs and libraries related to ATM networking. .It Va WITHOUT_AUDIT .\" from FreeBSD: head/tools/build/options/WITHOUT_AUDIT 156932 2006-03-21 07:50:50Z ru Set to not build audit support into system programs. .It Va WITHOUT_AUTHPF .\" from FreeBSD: head/tools/build/options/WITHOUT_AUTHPF 156932 2006-03-21 07:50:50Z ru Set to not build .Xr authpf 8 . .It Va WITHOUT_AUTOFS .\" from FreeBSD: head/tools/build/options/WITHOUT_AUTOFS 296264 2016-03-01 11:36:10Z trasz Set to not build .Xr autofs 5 related programs, libraries, and kernel modules. .It Va WITH_AUTO_OBJ .\" from FreeBSD: head/tools/build/options/WITH_AUTO_OBJ 284708 2015-06-22 20:21:57Z sjg Enable automatic creation of objdirs. .Pp This must be set in the environment, make command line, or .Pa /etc/src-env.conf , not .Pa /etc/src.conf . .It Va WITHOUT_BHYVE .\" from FreeBSD: head/tools/build/options/WITHOUT_BHYVE 277727 2015-01-26 06:44:48Z ngie Set to not build or install .Xr bhyve 8 , associated utilities, and examples. .Pp This option only affects amd64/amd64. .It Va WITHOUT_BINUTILS .\" from FreeBSD: head/tools/build/options/WITHOUT_BINUTILS 286332 2015-08-05 18:30:00Z emaste Set to not build or install binutils (as, ld, objcopy, and objdump ) as part of the normal system build. The resulting system cannot build programs from source. .Pp It is a default setting on arm64/aarch64. .It Va WITH_BINUTILS .\" from FreeBSD: head/tools/build/options/WITH_BINUTILS 295491 2016-02-11 00:14:00Z emaste Set to build and install binutils (as, ld, objcopy, and objdump) as part of the normal system build. .Pp It is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, pc98/i386, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64. .It Va WITHOUT_BINUTILS_BOOTSTRAP .\" from FreeBSD: head/tools/build/options/WITHOUT_BINUTILS_BOOTSTRAP 295490 2016-02-10 23:57:09Z emaste Set to not build binutils (as, ld, objcopy and objdump) as part of the bootstrap process. .Bf -symbolic The option does not work for build targets unless some alternative toolchain is provided. .Ef .Pp It is a default setting on arm64/aarch64. .It Va WITH_BINUTILS_BOOTSTRAP .\" from FreeBSD: head/tools/build/options/WITH_BINUTILS_BOOTSTRAP 295491 2016-02-11 00:14:00Z emaste Set build binutils (as, ld, objcopy and objdump) as part of the bootstrap process. .Pp It is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, pc98/i386, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64. .It Va WITHOUT_BLACKLIST .\" from FreeBSD: head/tools/build/options/WITHOUT_BLACKLIST 301554 2016-06-07 16:35:55Z lidl Set this if you do not want to build blacklistd / blacklistctl. When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_BLACKLIST_SUPPORT .El .It Va WITHOUT_BLACKLIST_SUPPORT .\" from FreeBSD: head/tools/build/options/WITHOUT_BLACKLIST_SUPPORT 301554 2016-06-07 16:35:55Z lidl Set to build some programs without blacklistd support, like .Xr fingerd 8 , .Xr ftpd 8 , .Xr rlogind 8 , .Xr rshd 8 , and .Xr sshd 8 . .It Va WITHOUT_BLUETOOTH .\" from FreeBSD: head/tools/build/options/WITHOUT_BLUETOOTH 156932 2006-03-21 07:50:50Z ru Set to not build Bluetooth related kernel modules, programs and libraries. .It Va WITHOUT_BOOT .\" from FreeBSD: head/tools/build/options/WITHOUT_BOOT 156932 2006-03-21 07:50:50Z ru Set to not build the boot blocks and loader. .It Va WITHOUT_BOOTPARAMD .\" from FreeBSD: head/tools/build/options/WITHOUT_BOOTPARAMD 278192 2015-02-04 10:19:32Z ngie Set to not build or install .Xr bootparamd 8 . .It Va WITHOUT_BOOTPD .\" from FreeBSD: head/tools/build/options/WITHOUT_BOOTPD 278192 2015-02-04 10:19:32Z ngie Set to not build or install .Xr bootpd 8 . .It Va WITHOUT_BSDINSTALL .\" from FreeBSD: head/tools/build/options/WITHOUT_BSDINSTALL 277677 2015-01-25 04:43:13Z ngie Set to not build .Xr bsdinstall 8 , .Xr sade 8 , and related programs. .It Va WITHOUT_BSD_CPIO .\" from FreeBSD: head/tools/build/options/WITHOUT_BSD_CPIO 179813 2008-06-16 05:48:15Z dougb Set to not build the BSD licensed version of cpio based on .Xr libarchive 3 . .It Va WITH_BSD_GREP .\" from FreeBSD: head/tools/build/options/WITH_BSD_GREP 222273 2011-05-25 01:04:12Z obrien Install BSD-licensed grep as '[ef]grep' instead of GNU grep. .It Va WITHOUT_BSNMP .\" from FreeBSD: head/tools/build/options/WITHOUT_BSNMP 183306 2008-09-23 16:15:42Z sam Set to not build or install .Xr bsnmpd 1 and related libraries and data files. .It Va WITHOUT_BZIP2 .\" from FreeBSD: head/tools/build/options/WITHOUT_BZIP2 174550 2007-12-12 16:43:17Z ru Set to not build contributed bzip2 software as a part of the base system. .Bf -symbolic The option has no effect yet. .Ef When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_BZIP2_SUPPORT .El .It Va WITHOUT_BZIP2_SUPPORT .\" from FreeBSD: head/tools/build/options/WITHOUT_BZIP2_SUPPORT 166255 2007-01-26 10:19:08Z delphij Set to build some programs without optional bzip2 support. .It Va WITHOUT_CALENDAR .\" from FreeBSD: head/tools/build/options/WITHOUT_CALENDAR 156932 2006-03-21 07:50:50Z ru Set to not build .Xr calendar 1 . .It Va WITHOUT_CAPSICUM .\" from FreeBSD: head/tools/build/options/WITHOUT_CAPSICUM 229319 2012-01-02 21:57:58Z rwatson Set to not build Capsicum support into system programs. .It Va WITHOUT_CASPER .\" from FreeBSD: head/tools/build/options/WITHOUT_CASPER 258838 2013-12-02 08:21:28Z pjd Set to not build Casper program and related libraries. .It Va WITH_CCACHE_BUILD .\" from FreeBSD: head/tools/build/options/WITH_CCACHE_BUILD 297436 2016-03-30 23:53:12Z bdrewery Set to use .Xr ccache 1 for the build. No configuration is required except to install the .Sy devel/ccache package. Using with .Xr distcc 1 should set .Sy CCACHE_PREFIX=/usr/local/bin/distcc . The default cache directory of .Pa $HOME/.ccache will be used, which can be overridden by setting .Sy CCACHE_DIR . The .Sy CCACHE_COMPILERCHECK option defaults to .Sy content when using the in-tree bootstrap compiler, and .Sy mtime when using an external compiler. The .Sy CCACHE_CPP2 option is used for Clang but not GCC. .Pp Sharing a cache between multiple work directories requires using a layout similar to .Pa /some/prefix/src .Pa /some/prefix/obj and an environment such as: .Bd -literal -offset indent CCACHE_BASEDIR='${SRCTOP:H}' MAKEOBJDIRPREFIX='${SRCTOP:H}/obj' .Ed .Pp See .Xr ccache 1 for more configuration options. .It Va WITHOUT_CCD .\" from FreeBSD: head/tools/build/options/WITHOUT_CCD 277678 2015-01-25 04:52:48Z ngie Set to not build .Xr geom_ccd 4 and related utilities. .It Va WITHOUT_CDDL .\" from FreeBSD: head/tools/build/options/WITHOUT_CDDL 163861 2006-11-01 09:02:11Z jb Set to not build code licensed under Sun's CDDL. When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_CTF .It .Va WITHOUT_ZFS .El .It Va WITHOUT_CLANG .\" from FreeBSD: head/tools/build/options/WITHOUT_CLANG 264660 2014-04-18 17:03:58Z imp Set to not build the Clang C/C++ compiler during the regular phase of the build. .Pp It is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32 and sparc64/sparc64. When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_CLANG_EXTRAS .It .Va WITHOUT_CLANG_FULL .El .It Va WITH_CLANG .\" from FreeBSD: head/tools/build/options/WITH_CLANG 264660 2014-04-18 17:03:58Z imp Set to build the Clang C/C++ compiler during the normal phase of the build. .Pp It is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm64/aarch64, i386/i386, pc98/i386, powerpc/powerpc and powerpc/powerpc64. .It Va WITHOUT_CLANG_BOOTSTRAP .\" from FreeBSD: head/tools/build/options/WITHOUT_CLANG_BOOTSTRAP 273177 2014-10-16 18:28:11Z skreuzer Set to not build the Clang C/C++ compiler during the bootstrap phase of the build. You must enable either gcc or clang bootstrap to be able to build the system, unless an alternative compiler is provided via XCC. .Pp It is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64. .It Va WITH_CLANG_BOOTSTRAP .\" from FreeBSD: head/tools/build/options/WITH_CLANG_BOOTSTRAP 264660 2014-04-18 17:03:58Z imp Set to build the Clang C/C++ compiler during the bootstrap phase of the build. .Pp It is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm64/aarch64, i386/i386 and pc98/i386. .It Va WITH_CLANG_EXTRAS .\" from FreeBSD: head/tools/build/options/WITH_CLANG_EXTRAS 231057 2012-02-05 23:56:22Z dim Set to build additional clang and llvm tools, such as bugpoint. .It Va WITHOUT_CLANG_FULL .\" from FreeBSD: head/tools/build/options/WITHOUT_CLANG_FULL 246259 2013-02-02 22:28:29Z dim Set to avoid building the ARCMigrate, Rewriter and StaticAnalyzer components of the Clang C/C++ compiler. .Pp It is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32 and sparc64/sparc64. .It Va WITH_CLANG_FULL .\" from FreeBSD: head/tools/build/options/WITH_CLANG_FULL 246259 2013-02-02 22:28:29Z dim Set to build the ARCMigrate, Rewriter and StaticAnalyzer components of the Clang C/C++ compiler. .Pp It is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm64/aarch64, i386/i386, pc98/i386, powerpc/powerpc and powerpc/powerpc64. .It Va WITHOUT_CLANG_IS_CC .\" from FreeBSD: head/tools/build/options/WITHOUT_CLANG_IS_CC 242629 2012-11-05 21:53:23Z brooks Set to install the GCC compiler as .Pa /usr/bin/cc , .Pa /usr/bin/c++ and .Pa /usr/bin/cpp . .Pp It is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64. .It Va WITH_CLANG_IS_CC .\" from FreeBSD: head/tools/build/options/WITH_CLANG_IS_CC 235342 2012-05-12 16:12:36Z gjb Set to install the Clang C/C++ compiler as .Pa /usr/bin/cc , .Pa /usr/bin/c++ and .Pa /usr/bin/cpp . .Pp It is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm64/aarch64, i386/i386 and pc98/i386. .It Va WITHOUT_CPP .\" from FreeBSD: head/tools/build/options/WITHOUT_CPP 156932 2006-03-21 07:50:50Z ru Set to not build .Xr cpp 1 . .It Va WITHOUT_CROSS_COMPILER .\" from FreeBSD: head/tools/build/options/WITHOUT_CROSS_COMPILER 275138 2014-11-26 20:43:09Z gjb Set to not build any cross compiler in the cross-tools stage of buildworld. If you are compiling a different version of .Fx than what is installed on the system, you will need to provide an alternate compiler with XCC to ensure success. If you are compiling with an identical version of .Fx to the host, this option may be safely used. This option may also be safe when the host version of .Fx is close to the sources being built, but all bets are off if there have been any changes to the toolchain between the versions. When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_BINUTILS_BOOTSTRAP .It .Va WITHOUT_CLANG_BOOTSTRAP .It .Va WITHOUT_ELFTOOLCHAIN_BOOTSTRAP .It .Va WITHOUT_GCC_BOOTSTRAP .El .It Va WITHOUT_CRYPT .\" from FreeBSD: head/tools/build/options/WITHOUT_CRYPT 156932 2006-03-21 07:50:50Z ru Set to not build any crypto code. When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_KERBEROS .It .Va WITHOUT_KERBEROS_SUPPORT .It .Va WITHOUT_OPENSSH .It .Va WITHOUT_OPENSSL .El .Pp When set, the following options are also in effect: .Pp .Bl -inset -compact .It Va WITHOUT_GSSAPI (unless .Va WITH_GSSAPI is set explicitly) .El .It Va WITH_CTF .\" from FreeBSD: head/tools/build/options/WITH_CTF 228159 2011-11-30 18:22:44Z fjoe Set to compile with CTF (Compact C Type Format) data. CTF data encapsulates a reduced form of debugging information similar to DWARF and the venerable stabs and is required for DTrace. .It Va WITHOUT_CTM .\" from FreeBSD: head/tools/build/options/WITHOUT_CTM 183242 2008-09-21 22:02:26Z sam Set to not build .Xr ctm 1 and related utilities. .It Va WITHOUT_CUSE .\" from FreeBSD: head/tools/build/options/WITHOUT_CUSE 270171 2014-08-19 15:40:26Z hselasky Set to not build CUSE-related programs and libraries. .It Va WITHOUT_CXX .\" from FreeBSD: head/tools/build/options/WITHOUT_CXX 281053 2015-04-03 23:55:04Z bdrewery Set to not build .Xr c++ 1 and related libraries. It will also prevent building of .Xr gperf 1 and .Xr devd 8 . When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_CLANG .It .Va WITHOUT_CLANG_EXTRAS .It .Va WITHOUT_CLANG_FULL .It .Va WITHOUT_GNUCXX .It .Va WITHOUT_GROFF .El .It Va WITHOUT_DEBUG_FILES .\" from FreeBSD: head/tools/build/options/WITHOUT_DEBUG_FILES 290059 2015-10-27 20:49:56Z emaste Set to avoid building or installing standalone debug files for each executable binary and shared library. .It Va WITHOUT_DICT .\" from FreeBSD: head/tools/build/options/WITHOUT_DICT 156932 2006-03-21 07:50:50Z ru Set to not build the Webster dictionary files. .It Va WITH_DIRDEPS_BUILD .\" from FreeBSD: head/tools/build/options/WITH_DIRDEPS_BUILD 298007 2016-04-14 22:00:49Z bdrewery This is an experimental build system. For details see http://www.crufty.net/sjg/docs/freebsd-meta-mode.htm. Build commands can be seen from the top-level with: .Dl make show-valid-targets The build is driven by dirdeps.mk using .Va DIRDEPS stored in Makefile.depend files found in each directory. .Pp The build can be started from anywhere, and behaves the same. The initial instance of .Xr make 1 recursively reads .Va DIRDEPS from Makefile.depend computing a graph of tree dependencies from the current origin. Setting .Va NO_DIRDEPS will skip checking dirdep dependencies and will only build in the current and child directories. .Va NO_DIRDEPS_BELOW will skip building any dirdeps and only build the current directory. .Pp This also utilizes the .Va WITH_META_MODE logic for incremental builds. .Pp The build will hide commands ran unless .Va NO_SILENT is defined. .Pp Note that there is currently no mass install feature for this. .Pp When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITH_INSTALL_AS_USER .El .Pp When set, the following options are also in effect: .Pp .Bl -inset -compact .It Va WITHOUT_SYSTEM_COMPILER (unless .Va WITH_SYSTEM_COMPILER is set explicitly) .It Va WITH_AUTO_OBJ (unless .Va WITHOUT_AUTO_OBJ is set explicitly) .It Va WITH_META_MODE (unless .Va WITHOUT_META_MODE is set explicitly) .It Va WITH_STAGING (unless .Va WITHOUT_STAGING is set explicitly) .It Va WITH_STAGING_MAN (unless .Va WITHOUT_STAGING_MAN is set explicitly) .It Va WITH_STAGING_PROG (unless .Va WITHOUT_STAGING_PROG is set explicitly) .It Va WITH_SYSROOT (unless .Va WITHOUT_SYSROOT is set explicitly) .El .Pp This must be set in the environment, make command line, or .Pa /etc/src-env.conf , not .Pa /etc/src.conf . .It Va WITH_DIRDEPS_CACHE .\" from FreeBSD: head/tools/build/options/WITH_DIRDEPS_CACHE 290816 2015-11-14 03:24:48Z sjg Cache result of dirdeps.mk which can save significant time for subsequent builds. Depends on .Va WITH_DIRDEPS_BUILD . .Pp This must be set in the environment, make command line, or .Pa /etc/src-env.conf , not .Pa /etc/src.conf . .It Va WITHOUT_DMAGENT .\" from FreeBSD: head/tools/build/options/WITHOUT_DMAGENT 262335 2014-02-22 13:05:23Z bapt Set to not build dma Mail Transport Agent .It Va WITHOUT_DOCCOMPRESS .\" from FreeBSD: head/tools/build/options/WITHOUT_DOCCOMPRESS 266752 2014-05-27 15:52:27Z gjb Set to not to install compressed system documentation. Only the uncompressed version will be installed. .It Va WITH_DTRACE_TESTS .\" from FreeBSD: head/tools/build/options/WITH_DTRACE_TESTS 286174 2015-08-02 00:37:33Z markj Set to build and install the DTrace test suite in .Pa /usr/tests/cddl/usr.sbin/dtrace . This test suite is considered experimental on architectures other than amd64/amd64 and running it may cause system instability. .It Va WITHOUT_DYNAMICROOT .\" from FreeBSD: head/tools/build/options/WITHOUT_DYNAMICROOT 156932 2006-03-21 07:50:50Z ru Set this if you do not want to link .Pa /bin and .Pa /sbin dynamically. .It Va WITHOUT_ED_CRYPTO .\" from FreeBSD: head/tools/build/options/WITHOUT_ED_CRYPTO 235660 2012-05-19 20:05:27Z marcel Set to build .Xr ed 1 without support for encryption/decryption. .It Va WITHOUT_EE .\" from FreeBSD: head/tools/build/options/WITHOUT_EE 277663 2015-01-25 00:03:44Z ngie Set to not build and install .Xr edit 1 , .Xr ee 1 , and related programs. .It Va WITH_EISA .\" from FreeBSD: head/tools/build/options/WITH_EISA 264654 2014-04-18 16:53:06Z imp Set to build EISA kernel modules. .It Va WITHOUT_ELFCOPY_AS_OBJCOPY .\" from FreeBSD: head/tools/build/options/WITHOUT_ELFCOPY_AS_OBJCOPY 296193 2016-02-29 16:39:38Z emaste Set to build and install .Xr objcopy 1 from GNU Binutils, instead of the one from ELF Tool Chain. This option is provided as a transition aid and will be removed in due time. .It Va WITHOUT_ELFTOOLCHAIN_BOOTSTRAP .\" from FreeBSD: head/tools/build/options/WITHOUT_ELFTOOLCHAIN_BOOTSTRAP 295491 2016-02-11 00:14:00Z emaste Set to not build ELF Tool Chain tools (addr2line, nm, size, strings and strip) as part of the bootstrap process. .Bf -symbolic An alternate bootstrap tool chain must be provided. .Ef .It Va WITHOUT_EXAMPLES .\" from FreeBSD: head/tools/build/options/WITHOUT_EXAMPLES 156938 2006-03-21 09:06:24Z ru Set to avoid installing examples to .Pa /usr/share/examples/ . +.It Va WITH_EXTRA_TCP_STACKS +.\" from FreeBSD: head/tools/build/options/WITH_EXTRA_TCP_STACKS 302247 2016-06-28 13:37:01Z jtl +Set to build extra TCP stack modules. .It Va WITHOUT_FDT .\" from FreeBSD: head/tools/build/options/WITHOUT_FDT 221539 2011-05-06 19:10:27Z ru Set to not build Flattened Device Tree support as part of the base system. This includes the device tree compiler (dtc) and libfdt support library. .It Va WITHOUT_FILE .\" from FreeBSD: head/tools/build/options/WITHOUT_FILE 278193 2015-02-04 10:24:40Z ngie Set to not build .Xr file 1 and related programs. .It Va WITHOUT_FINGER .\" from FreeBSD: head/tools/build/options/WITHOUT_FINGER 278192 2015-02-04 10:19:32Z ngie Set to not build or install .Xr finger 1 and .Xr fingerd 8 . .It Va WITHOUT_FLOPPY .\" from FreeBSD: head/tools/build/options/WITHOUT_FLOPPY 221540 2011-05-06 19:13:03Z ru Set to not build or install programs for operating floppy disk driver. .It Va WITHOUT_FMTREE .\" from FreeBSD: head/tools/build/options/WITHOUT_FMTREE 261299 2014-01-30 21:37:43Z brooks Set to not build and install .Pa /usr/sbin/fmtree . .It Va WITHOUT_FORMAT_EXTENSIONS .\" from FreeBSD: head/tools/build/options/WITHOUT_FORMAT_EXTENSIONS 250658 2013-05-15 13:04:10Z brooks Set to not enable .Fl fformat-extensions when compiling the kernel. Also disables all format checking. .It Va WITHOUT_FORTH .\" from FreeBSD: head/tools/build/options/WITHOUT_FORTH 156932 2006-03-21 07:50:50Z ru Set to build bootloaders without Forth support. .It Va WITHOUT_FP_LIBC .\" from FreeBSD: head/tools/build/options/WITHOUT_FP_LIBC 156932 2006-03-21 07:50:50Z ru Set to build .Nm libc without floating-point support. .It Va WITHOUT_FREEBSD_UPDATE .\" from FreeBSD: head/tools/build/options/WITHOUT_FREEBSD_UPDATE 183242 2008-09-21 22:02:26Z sam Set to not build .Xr freebsd-update 8 . .It Va WITHOUT_FTP .\" from FreeBSD: head/tools/build/options/WITHOUT_FTP 278192 2015-02-04 10:19:32Z ngie Set to not build or install .Xr ftp 1 and .Xr ftpd 8 . .It Va WITHOUT_GAMES .\" from FreeBSD: head/tools/build/options/WITHOUT_GAMES 156932 2006-03-21 07:50:50Z ru Set to not build games. .It Va WITHOUT_GCC .\" from FreeBSD: head/tools/build/options/WITHOUT_GCC 264660 2014-04-18 17:03:58Z imp Set to not build and install gcc and g++ as part of the normal build process. .Pp It is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm64/aarch64, i386/i386 and pc98/i386. .It Va WITH_GCC .\" from FreeBSD: head/tools/build/options/WITH_GCC 255326 2013-09-06 20:49:48Z zeising Set to build and install gcc and g++. .Pp It is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64. .It Va WITHOUT_GCC_BOOTSTRAP .\" from FreeBSD: head/tools/build/options/WITHOUT_GCC_BOOTSTRAP 273177 2014-10-16 18:28:11Z skreuzer Set to not build gcc and g++ as part of the bootstrap process. You must enable either gcc or clang bootstrap to be able to build the system, unless an alternative compiler is provided via XCC. .Pp It is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm64/aarch64, i386/i386 and pc98/i386. .It Va WITH_GCC_BOOTSTRAP .\" from FreeBSD: head/tools/build/options/WITH_GCC_BOOTSTRAP 264660 2014-04-18 17:03:58Z imp Set to build gcc and g++ as part of the bootstrap process. .Pp It is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64. .It Va WITHOUT_GCOV .\" from FreeBSD: head/tools/build/options/WITHOUT_GCOV 156932 2006-03-21 07:50:50Z ru Set to not build the .Xr gcov 1 tool. .It Va WITHOUT_GDB .\" from FreeBSD: head/tools/build/options/WITHOUT_GDB 156932 2006-03-21 07:50:50Z ru Set to not build .Xr gdb 1 . .Pp It is a default setting on arm64/aarch64. .It Va WITH_GDB .\" from FreeBSD: head/tools/build/options/WITH_GDB 295493 2016-02-11 00:30:51Z emaste Set to build .Xr gdb 1 . .Pp It is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, pc98/i386, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64. .It Va WITHOUT_GNU .\" from FreeBSD: head/tools/build/options/WITHOUT_GNU 174550 2007-12-12 16:43:17Z ru Set to not build contributed GNU software as a part of the base system. This option can be useful if the system built must not contain any code covered by the GNU Public License due to legal reasons. .Bf -symbolic The option has no effect yet. .Ef When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_GNU_SUPPORT .El .It Va WITHOUT_GNUCXX .\" from FreeBSD: head/tools/build/options/WITHOUT_GNUCXX 255321 2013-09-06 20:08:03Z theraven Do not build the GNU C++ stack (g++, libstdc++). This is the default on platforms where clang is the system compiler. .Pp It is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm64/aarch64, i386/i386 and pc98/i386. .It Va WITH_GNUCXX .\" from FreeBSD: head/tools/build/options/WITH_GNUCXX 255321 2013-09-06 20:08:03Z theraven Build the GNU C++ stack (g++, libstdc++). This is the default on platforms where gcc is the system compiler. .Pp It is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64. .It Va WITHOUT_GNU_GREP_COMPAT .\" from FreeBSD: head/tools/build/options/WITHOUT_GNU_GREP_COMPAT 273421 2014-10-21 20:44:33Z emaste Set this option to omit the gnu extensions to grep from being included in BSD grep. .It Va WITHOUT_GNU_SUPPORT .\" from FreeBSD: head/tools/build/options/WITHOUT_GNU_SUPPORT 156932 2006-03-21 07:50:50Z ru Set to build some programs without optional GNU support. .It Va WITHOUT_GPIO .\" from FreeBSD: head/tools/build/options/WITHOUT_GPIO 228081 2011-11-28 17:54:34Z dim Set to not build .Xr gpioctl 8 as part of the base system. .It Va WITHOUT_GPL_DTC .\" from FreeBSD: head/tools/build/options/WITHOUT_GPL_DTC 264515 2014-04-15 20:41:55Z imp Set to build the BSD licensed version of the device tree compiler, instead of the GPL'd one from elinux.org. .It Va WITHOUT_GROFF .\" from FreeBSD: head/tools/build/options/WITHOUT_GROFF 218941 2011-02-22 08:13:49Z uqs Set to not build .Xr groff 1 and .Xr vgrind 1 . You should consider installing the textproc/groff port to not break .Xr man 1 . .It Va WITHOUT_GSSAPI .\" from FreeBSD: head/tools/build/options/WITHOUT_GSSAPI 174548 2007-12-12 16:39:32Z ru Set to not build libgssapi. .It Va WITHOUT_HAST .\" from FreeBSD: head/tools/build/options/WITHOUT_HAST 277725 2015-01-26 06:27:07Z ngie Set to not build .Xr hastd 8 and related utilities. .It Va WITH_HESIOD .\" from FreeBSD: head/tools/build/options/WITH_HESIOD 156932 2006-03-21 07:50:50Z ru Set to build Hesiod support. .It Va WITHOUT_HTML .\" from FreeBSD: head/tools/build/options/WITHOUT_HTML 156932 2006-03-21 07:50:50Z ru Set to not build HTML docs. .It Va WITHOUT_HYPERV .\" from FreeBSD: head/tools/build/options/WITHOUT_HYPERV 271493 2014-09-13 02:15:31Z delphij Set to not build or install HyperV utilities. .It Va WITHOUT_ICONV .\" from FreeBSD: head/tools/build/options/WITHOUT_ICONV 254919 2013-08-26 17:15:56Z antoine Set to not build iconv as part of libc. .It Va WITHOUT_INCLUDES .\" from FreeBSD: head/tools/build/options/WITHOUT_INCLUDES 275138 2014-11-26 20:43:09Z gjb Set to not install header files. This option used to be spelled .Va NO_INCS . .Bf -symbolic The option does not work for build targets. .Ef .It Va WITHOUT_INET .\" from FreeBSD: head/tools/build/options/WITHOUT_INET 221266 2011-04-30 17:58:28Z bz Set to not build programs and libraries related to IPv4 networking. When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_INET_SUPPORT .El .It Va WITHOUT_INET6 .\" from FreeBSD: head/tools/build/options/WITHOUT_INET6 156932 2006-03-21 07:50:50Z ru Set to not build programs and libraries related to IPv6 networking. When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_INET6_SUPPORT .El .It Va WITHOUT_INET6_SUPPORT .\" from FreeBSD: head/tools/build/options/WITHOUT_INET6_SUPPORT 156932 2006-03-21 07:50:50Z ru Set to build libraries, programs, and kernel modules without IPv6 support. .It Va WITHOUT_INETD .\" from FreeBSD: head/tools/build/options/WITHOUT_INETD 278192 2015-02-04 10:19:32Z ngie Set to not build .Xr inetd 8 . .It Va WITHOUT_INET_SUPPORT .\" from FreeBSD: head/tools/build/options/WITHOUT_INET_SUPPORT 221266 2011-04-30 17:58:28Z bz Set to build libraries, programs, and kernel modules without IPv4 support. .It Va WITHOUT_INSTALLLIB .\" from FreeBSD: head/tools/build/options/WITHOUT_INSTALLLIB 297941 2016-04-13 21:01:58Z bdrewery Set this if you do not want to install optional libraries. For example when creating a .Xr nanobsd 8 image. .Bf -symbolic The option does not work for build targets. .Ef .It Va WITH_INSTALL_AS_USER .\" from FreeBSD: head/tools/build/options/WITH_INSTALL_AS_USER 238021 2012-07-02 20:24:01Z marcel Set to make install targets succeed for non-root users by installing files with owner and group attributes set to that of the user running the .Xr make 1 command. The user still has to set the .Va DESTDIR variable to point to a directory where the user has write permissions. .It Va WITHOUT_IPFILTER .\" from FreeBSD: head/tools/build/options/WITHOUT_IPFILTER 156932 2006-03-21 07:50:50Z ru Set to not build IP Filter package. .It Va WITHOUT_IPFW .\" from FreeBSD: head/tools/build/options/WITHOUT_IPFW 183242 2008-09-21 22:02:26Z sam Set to not build IPFW tools. .It Va WITHOUT_ISCSI .\" from FreeBSD: head/tools/build/options/WITHOUT_ISCSI 277675 2015-01-25 04:20:11Z ngie Set to not build .Xr iscid 8 and related utilities. .It Va WITHOUT_JAIL .\" from FreeBSD: head/tools/build/options/WITHOUT_JAIL 249966 2013-04-27 04:09:09Z eadler Set to not build tools for the support of jails; e.g., .Xr jail 8 . .It Va WITHOUT_KDUMP .\" from FreeBSD: head/tools/build/options/WITHOUT_KDUMP 240690 2012-09-19 11:38:37Z zeising Set to not build .Xr kdump 1 and .Xr truss 1 . .It Va WITHOUT_KERBEROS .\" from FreeBSD: head/tools/build/options/WITHOUT_KERBEROS 174549 2007-12-12 16:42:03Z ru Set this if you do not want to build Kerberos 5 (KTH Heimdal). When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_KERBEROS_SUPPORT .El .Pp When set, the following options are also in effect: .Pp .Bl -inset -compact .It Va WITHOUT_GSSAPI (unless .Va WITH_GSSAPI is set explicitly) .El .It Va WITHOUT_KERBEROS_SUPPORT .\" from FreeBSD: head/tools/build/options/WITHOUT_KERBEROS_SUPPORT 251794 2013-06-15 20:29:07Z eadler Set to build some programs without Kerberos support, like .Xr ssh 1 , .Xr telnet 1 , .Xr sshd 8 , and .Xr telnetd 8 . .It Va WITHOUT_KERNEL_SYMBOLS .\" from FreeBSD: head/tools/build/options/WITHOUT_KERNEL_SYMBOLS 222189 2011-05-22 18:23:17Z imp Set to not install kernel symbol files. .Bf -symbolic This option is recommended for those people who have small root partitions. .Ef .It Va WITHOUT_KVM .\" from FreeBSD: head/tools/build/options/WITHOUT_KVM 174550 2007-12-12 16:43:17Z ru Set to not build the .Nm libkvm library as a part of the base system. .Bf -symbolic The option has no effect yet. .Ef When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_KVM_SUPPORT .El .It Va WITHOUT_KVM_SUPPORT .\" from FreeBSD: head/tools/build/options/WITHOUT_KVM_SUPPORT 170644 2007-06-13 02:08:04Z sepotvin Set to build some programs without optional .Nm libkvm support. .It Va WITHOUT_LDNS .\" from FreeBSD: head/tools/build/options/WITHOUT_LDNS 255591 2013-09-15 13:11:13Z des Setting this variable will prevent the LDNS library from being built. When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_LDNS_UTILS .It .Va WITHOUT_UNBOUND .El .It Va WITHOUT_LDNS_UTILS .\" from FreeBSD: head/tools/build/options/WITHOUT_LDNS_UTILS 255850 2013-09-24 14:33:31Z des Setting this variable will prevent building the LDNS utilities .Xr drill 1 and .Xr host 1 . .It Va WITHOUT_LEGACY_CONSOLE .\" from FreeBSD: head/tools/build/options/WITHOUT_LEGACY_CONSOLE 296264 2016-03-01 11:36:10Z trasz Set to not build programs that support a legacy PC console; e.g., .Xr kbdcontrol 1 and .Xr vidcontrol 1 . .It Va WITHOUT_LIB32 .\" from FreeBSD: head/tools/build/options/WITHOUT_LIB32 274664 2014-11-18 17:06:48Z imp On 64-bit platforms, set to not build 32-bit library set and a .Nm ld-elf32.so.1 runtime linker. .It Va WITHOUT_LIBCPLUSPLUS .\" from FreeBSD: head/tools/build/options/WITHOUT_LIBCPLUSPLUS 246262 2013-02-02 22:42:46Z dim Set to avoid building libcxxrt and libc++. .It Va WITHOUT_LIBPTHREAD .\" from FreeBSD: head/tools/build/options/WITHOUT_LIBPTHREAD 188848 2009-02-20 11:09:55Z mtm Set to not build the .Nm libpthread providing library, .Nm libthr . When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_LIBTHR .El .It Va WITH_LIBSOFT .\" from FreeBSD: head/tools/build/options/WITH_LIBSOFT 300325 2016-05-20 19:23:07Z bdrewery On armv6 only, set to enable soft float ABI compatibility libraries. This option is for transitioning to the new hard float ABI. .It Va WITHOUT_LIBTHR .\" from FreeBSD: head/tools/build/options/WITHOUT_LIBTHR 156932 2006-03-21 07:50:50Z ru Set to not build the .Nm libthr (1:1 threading) library. .It Va WITHOUT_LLDB .\" from FreeBSD: head/tools/build/options/WITHOUT_LLDB 289275 2015-10-14 00:23:31Z emaste Set to not build the LLDB debugger. .Pp It is a default setting on arm/arm, arm/armeb, arm/armv6, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, pc98/i386, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64. .It Va WITH_LLDB .\" from FreeBSD: head/tools/build/options/WITH_LLDB 255722 2013-09-20 01:52:02Z emaste Set to build the LLDB debugger. .Pp It is a default setting on amd64/amd64 and arm64/aarch64. .It Va WITHOUT_LLVM_LIBUNWIND .\" from FreeBSD: head/tools/build/options/WITHOUT_LLVM_LIBUNWIND 293450 2016-01-09 00:42:07Z emaste Set to use GCC's stack unwinder (instead of LLVM's libunwind). .Pp It is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, pc98/i386, powerpc/powerpc, powerpc/powerpc64 and sparc64/sparc64. .It Va WITH_LLVM_LIBUNWIND .\" from FreeBSD: head/tools/build/options/WITH_LLVM_LIBUNWIND 293450 2016-01-09 00:42:07Z emaste Set to use LLVM's libunwind stack unwinder (instead of GCC's unwinder). .Pp It is a default setting on arm64/aarch64. .It Va WITHOUT_LOCALES .\" from FreeBSD: head/tools/build/options/WITHOUT_LOCALES 156932 2006-03-21 07:50:50Z ru Set to not build localization files; see .Xr locale 1 . .It Va WITHOUT_LOCATE .\" from FreeBSD: head/tools/build/options/WITHOUT_LOCATE 183242 2008-09-21 22:02:26Z sam Set to not build .Xr locate 1 and related programs. .It Va WITHOUT_LPR .\" from FreeBSD: head/tools/build/options/WITHOUT_LPR 156932 2006-03-21 07:50:50Z ru Set to not build .Xr lpr 1 and related programs. .It Va WITHOUT_LS_COLORS .\" from FreeBSD: head/tools/build/options/WITHOUT_LS_COLORS 235660 2012-05-19 20:05:27Z marcel Set to build .Xr ls 1 without support for colors to distinguish file types. .It Va WITHOUT_LZMA_SUPPORT .\" from FreeBSD: head/tools/build/options/WITHOUT_LZMA_SUPPORT 245171 2013-01-08 18:37:12Z obrien Set to build some programs without optional lzma compression support. .It Va WITHOUT_MAIL .\" from FreeBSD: head/tools/build/options/WITHOUT_MAIL 183242 2008-09-21 22:02:26Z sam Set to not build any mail support (MUA or MTA). When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_DMAGENT .It .Va WITHOUT_MAILWRAPPER .It .Va WITHOUT_SENDMAIL .El .It Va WITHOUT_MAILWRAPPER .\" from FreeBSD: head/tools/build/options/WITHOUT_MAILWRAPPER 156932 2006-03-21 07:50:50Z ru Set to not build the .Xr mailwrapper 8 MTA selector. .It Va WITHOUT_MAKE .\" from FreeBSD: head/tools/build/options/WITHOUT_MAKE 183242 2008-09-21 22:02:26Z sam Set to not install .Xr make 1 and related support files. .It Va WITHOUT_MAN .\" from FreeBSD: head/tools/build/options/WITHOUT_MAN 156932 2006-03-21 07:50:50Z ru Set to not build manual pages. When set, the following options are also in effect: .Pp .Bl -inset -compact .It Va WITHOUT_MAN_UTILS (unless .Va WITH_MAN_UTILS is set explicitly) .El .It Va WITHOUT_MANCOMPRESS .\" from FreeBSD: head/tools/build/options/WITHOUT_MANCOMPRESS 266752 2014-05-27 15:52:27Z gjb Set to not to install compressed man pages. Only the uncompressed versions will be installed. .It Va WITHOUT_MANDOCDB .\" from FreeBSD: head/tools/build/options/WITHOUT_MANDOCDB 283777 2015-05-30 17:41:37Z bapt Use the .Xr mandoc 1 version of .Xr makewhatis 8 database and utilities. .It Va WITHOUT_MAN_UTILS .\" from FreeBSD: head/tools/build/options/WITHOUT_MAN_UTILS 208322 2010-05-20 00:07:21Z jkim Set to not build utilities for manual pages, .Xr apropos 1 , .Xr catman 1 , .Xr makewhatis 1 , .Xr man 1 , .Xr whatis 1 , .Xr manctl 8 , and related support files. .It Va WITH_META_MODE .\" from FreeBSD: head/tools/build/options/WITH_META_MODE 301889 2016-06-14 16:20:25Z bdrewery Creates .Xr make 1 meta files when building, which can provide a reliable incremental build when using .Xr filemon 4 . The meta file is created in the OBJDIR as .Pa target.meta . These meta files track the command ran, its output, and the current directory. The .Xr filemon 4 module is required unless .Va NO_FILEMON is defined. When the module is loaded, any files used by the commands executed will be tracked as dependencies for the target in its meta file. The target will be considered out-of-date and rebuilt if any of the following are true compared to the last build: .Bl -bullet -compact .It The command to execute changes. .It The current working directory changes. .It The target's meta file is missing. .It The target's meta file is missing filemon data when filemon is loaded and a previous run did not have it loaded. .It [requires .Xr filemon 4 ] Files read, executed or linked to are newer than the target. .It [requires .Xr filemon 4 ] Files read, written, executed or linked are missing. .El The meta files can also be useful for debugging. .Pp The build will hide commands ran unless .Va NO_SILENT is defined. Errors will cause .Xr make 1 to show some of its environment for further debugging. .Pp The build operates as it normally would otherwise. This option originally invoked a different build system but that was renamed to .Va WITH_DIRDEPS_BUILD . .Pp Currently this also enforces .Va WITHOUT_SYSTEM_COMPILER . When set, the following options are also in effect: .Pp .Bl -inset -compact .It Va WITHOUT_SYSTEM_COMPILER (unless .Va WITH_SYSTEM_COMPILER is set explicitly) .El .Pp This must be set in the environment, make command line, or .Pa /etc/src-env.conf , not .Pa /etc/src.conf . .It Va WITH_NAND .\" from FreeBSD: head/tools/build/options/WITH_NAND 235537 2012-05-17 10:11:18Z gber Set to build the NAND Flash components. .It Va WITHOUT_NDIS .\" from FreeBSD: head/tools/build/options/WITHOUT_NDIS 183242 2008-09-21 22:02:26Z sam Set to not build programs and libraries related to NDIS emulation support. .It Va WITHOUT_NETCAT .\" from FreeBSD: head/tools/build/options/WITHOUT_NETCAT 156932 2006-03-21 07:50:50Z ru Set to not build .Xr nc 1 utility. .It Va WITHOUT_NETGRAPH .\" from FreeBSD: head/tools/build/options/WITHOUT_NETGRAPH 183242 2008-09-21 22:02:26Z sam Set to not build applications to support .Xr netgraph 4 . When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_ATM .It .Va WITHOUT_BLUETOOTH .It .Va WITHOUT_NETGRAPH_SUPPORT .El .It Va WITHOUT_NETGRAPH_SUPPORT .\" from FreeBSD: head/tools/build/options/WITHOUT_NETGRAPH_SUPPORT 183305 2008-09-23 16:11:15Z sam Set to build libraries, programs, and kernel modules without netgraph support. .It Va WITHOUT_NIS .\" from FreeBSD: head/tools/build/options/WITHOUT_NIS 156932 2006-03-21 07:50:50Z ru Set to not build .Xr NIS 8 support and related programs. If set, you might need to adopt your .Xr nsswitch.conf 5 and remove .Sq nis entries. .It Va WITHOUT_NLS .\" from FreeBSD: head/tools/build/options/WITHOUT_NLS 156932 2006-03-21 07:50:50Z ru Set to not build NLS catalogs. .It Va WITHOUT_NLS_CATALOGS .\" from FreeBSD: head/tools/build/options/WITHOUT_NLS_CATALOGS 156932 2006-03-21 07:50:50Z ru Set to not build NLS catalog support for .Xr csh 1 . .It Va WITHOUT_NS_CACHING .\" from FreeBSD: head/tools/build/options/WITHOUT_NS_CACHING 172803 2007-10-19 14:01:25Z ru Set to disable name caching in the .Pa nsswitch subsystem. The generic caching daemon, .Xr nscd 8 , will not be built either if this option is set. .It Va WITHOUT_NTP .\" from FreeBSD: head/tools/build/options/WITHOUT_NTP 183242 2008-09-21 22:02:26Z sam Set to not build .Xr ntpd 8 and related programs. .It Va WITH_OFED .\" from FreeBSD: head/tools/build/options/WITH_OFED 228081 2011-11-28 17:54:34Z dim Set to build the .Dq "OpenFabrics Enterprise Distribution" Infiniband software stack. .It Va WITH_OPENLDAP .\" from FreeBSD: head/tools/build/options/WITH_OPENLDAP 264902 2014-04-24 23:17:31Z imp Enable building openldap support for kerberos. .It Va WITHOUT_OPENSSH .\" from FreeBSD: head/tools/build/options/WITHOUT_OPENSSH 156932 2006-03-21 07:50:50Z ru Set to not build OpenSSH. .It Va WITHOUT_OPENSSL .\" from FreeBSD: head/tools/build/options/WITHOUT_OPENSSL 156932 2006-03-21 07:50:50Z ru Set to not build OpenSSL. When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_KERBEROS .It .Va WITHOUT_KERBEROS_SUPPORT .It .Va WITHOUT_OPENSSH .El .Pp When set, the following options are also in effect: .Pp .Bl -inset -compact .It Va WITHOUT_GSSAPI (unless .Va WITH_GSSAPI is set explicitly) .El .It Va WITHOUT_PAM .\" from FreeBSD: head/tools/build/options/WITHOUT_PAM 174550 2007-12-12 16:43:17Z ru Set to not build PAM library and modules. .Bf -symbolic This option is deprecated and does nothing. .Ef When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_PAM_SUPPORT .El .It Va WITHOUT_PAM_SUPPORT .\" from FreeBSD: head/tools/build/options/WITHOUT_PAM_SUPPORT 156932 2006-03-21 07:50:50Z ru Set to build some programs without PAM support, particularly .Xr ftpd 8 and .Xr ppp 8 . .It Va WITHOUT_PC_SYSINSTALL .\" from FreeBSD: head/tools/build/options/WITHOUT_PC_SYSINSTALL 245606 2013-01-18 15:57:09Z eadler Set to not build .Xr pc-sysinstall 8 and related programs. .It Va WITHOUT_PF .\" from FreeBSD: head/tools/build/options/WITHOUT_PF 156932 2006-03-21 07:50:50Z ru Set to not build PF firewall package. When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_AUTHPF .El .It Va WITHOUT_PKGBOOTSTRAP .\" from FreeBSD: head/tools/build/options/WITHOUT_PKGBOOTSTRAP 258924 2013-12-04 15:58:42Z bdrewery Set to not build .Xr pkg 7 bootstrap tool. .It Va WITHOUT_PMC .\" from FreeBSD: head/tools/build/options/WITHOUT_PMC 183242 2008-09-21 22:02:26Z sam Set to not build .Xr pmccontrol 8 and related programs. .It Va WITHOUT_PORTSNAP .\" from FreeBSD: head/tools/build/options/WITHOUT_PORTSNAP 183242 2008-09-21 22:02:26Z sam Set to not build or install .Xr portsnap 8 and related files. .It Va WITHOUT_PPP .\" from FreeBSD: head/tools/build/options/WITHOUT_PPP 183242 2008-09-21 22:02:26Z sam Set to not build .Xr ppp 8 and related programs. .It Va WITHOUT_PROFILE .\" from FreeBSD: head/tools/build/options/WITHOUT_PROFILE 228196 2011-12-02 09:09:54Z fjoe Set to avoid compiling profiled libraries. .It Va WITHOUT_QUOTAS .\" from FreeBSD: head/tools/build/options/WITHOUT_QUOTAS 296264 2016-03-01 11:36:10Z trasz Set to not build .Xr quota 1 and related programs. .It Va WITHOUT_RADIUS_SUPPORT .\" from FreeBSD: head/tools/build/options/WITHOUT_RADIUS_SUPPORT 278182 2015-02-04 06:53:45Z ngie Set to not build radius support into various applications, like .Xr pam_radius 8 and .Xr ppp 8 . .It Va WITHOUT_RBOOTD .\" from FreeBSD: head/tools/build/options/WITHOUT_RBOOTD 278192 2015-02-04 10:19:32Z ngie Set to not build or install .Xr rbootd 8 . .It Va WITHOUT_RCMDS .\" from FreeBSD: head/tools/build/options/WITHOUT_RCMDS 156932 2006-03-21 07:50:50Z ru Disable building of the .Bx r-commands. This includes .Xr rlogin 1 , .Xr rsh 1 , etc. .It Va WITHOUT_RCS .\" from FreeBSD: head/tools/build/options/WITHOUT_RCS 275138 2014-11-26 20:43:09Z gjb Set to not build .Xr rcs 1 , .Xr etcupdate 8 , and related utilities. .It Va WITHOUT_RESCUE .\" from FreeBSD: head/tools/build/options/WITHOUT_RESCUE 156932 2006-03-21 07:50:50Z ru Set to not build .Xr rescue 8 . .It Va WITHOUT_ROUTED .\" from FreeBSD: head/tools/build/options/WITHOUT_ROUTED 183242 2008-09-21 22:02:26Z sam Set to not build .Xr routed 8 utility. .It Va WITHOUT_SENDMAIL .\" from FreeBSD: head/tools/build/options/WITHOUT_SENDMAIL 156932 2006-03-21 07:50:50Z ru Set to not build .Xr sendmail 8 and related programs. .It Va WITHOUT_SETUID_LOGIN .\" from FreeBSD: head/tools/build/options/WITHOUT_SETUID_LOGIN 156932 2006-03-21 07:50:50Z ru Set this to disable the installation of .Xr login 1 as a set-user-ID root program. .It Va WITHOUT_SHAREDOCS .\" from FreeBSD: head/tools/build/options/WITHOUT_SHAREDOCS 156932 2006-03-21 07:50:50Z ru Set to not build the .Bx 4.4 legacy docs. .It Va WITH_SHARED_TOOLCHAIN .\" from FreeBSD: head/tools/build/options/WITH_SHARED_TOOLCHAIN 235342 2012-05-12 16:12:36Z gjb Set to build the toolchain binaries shared. The set includes .Xr cc 1 , .Xr make 1 and necessary utilities like assembler, linker and library archive manager. .It Va WITH_SORT_THREADS .\" from FreeBSD: head/tools/build/options/WITH_SORT_THREADS 264158 2014-04-05 18:00:45Z imp Set to enable threads in .Xr sort 1 . .It Va WITHOUT_SOURCELESS .\" from FreeBSD: head/tools/build/options/WITHOUT_SOURCELESS 230972 2012-02-04 00:54:43Z rmh Set to not build kernel modules that include sourceless code (either microcode or native code for host CPU). When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_SOURCELESS_HOST .It .Va WITHOUT_SOURCELESS_UCODE .El .It Va WITHOUT_SOURCELESS_HOST .\" from FreeBSD: head/tools/build/options/WITHOUT_SOURCELESS_HOST 230972 2012-02-04 00:54:43Z rmh Set to not build kernel modules that include sourceless native code for host CPU. .It Va WITHOUT_SOURCELESS_UCODE .\" from FreeBSD: head/tools/build/options/WITHOUT_SOURCELESS_UCODE 230972 2012-02-04 00:54:43Z rmh Set to not build kernel modules that include sourceless microcode. .It Va WITHOUT_SSP .\" from FreeBSD: head/tools/build/options/WITHOUT_SSP 180012 2008-06-25 21:33:28Z ru Set to not build world with propolice stack smashing protection. .It Va WITH_STAGING .\" from FreeBSD: head/tools/build/options/WITH_STAGING 290816 2015-11-14 03:24:48Z sjg Enable staging of files to a stage tree. This can be best thought of as auto-install to .Va DESTDIR with some extra meta data to ensure dependencies can be tracked. Depends on .Va WITH_DIRDEPS_BUILD . When set, the following options are also in effect: .Pp .Bl -inset -compact .It Va WITH_STAGING_MAN (unless .Va WITHOUT_STAGING_MAN is set explicitly) .It Va WITH_STAGING_PROG (unless .Va WITHOUT_STAGING_PROG is set explicitly) .El .Pp This must be set in the environment, make command line, or .Pa /etc/src-env.conf , not .Pa /etc/src.conf . .It Va WITH_STAGING_MAN .\" from FreeBSD: head/tools/build/options/WITH_STAGING_MAN 284708 2015-06-22 20:21:57Z sjg Enable staging of MAN pages to stage tree. .It Va WITH_STAGING_PROG .\" from FreeBSD: head/tools/build/options/WITH_STAGING_PROG 284708 2015-06-22 20:21:57Z sjg Enable staging of PROGs to stage tree. .It Va WITH_STALE_STAGED .\" from FreeBSD: head/tools/build/options/WITH_STALE_STAGED 284708 2015-06-22 20:21:57Z sjg Check staged files are not stale. .It Va WITH_SVN .\" from FreeBSD: head/tools/build/options/WITH_SVN 252561 2013-07-03 12:36:47Z zeising Set to install .Xr svnlite 1 as .Xr svn 1 . .It Va WITHOUT_SVNLITE .\" from FreeBSD: head/tools/build/options/WITHOUT_SVNLITE 252561 2013-07-03 12:36:47Z zeising Set to not build .Xr svnlite 1 and related programs. .It Va WITHOUT_SYMVER .\" from FreeBSD: head/tools/build/options/WITHOUT_SYMVER 169649 2007-05-17 05:03:24Z deischen Set to disable symbol versioning when building shared libraries. .It Va WITHOUT_SYSCONS .\" from FreeBSD: head/tools/build/options/WITHOUT_SYSCONS 156932 2006-03-21 07:50:50Z ru Set to not build .Xr syscons 4 support files such as keyboard maps, fonts, and screen output maps. .It Va WITH_SYSROOT .\" from FreeBSD: head/tools/build/options/WITH_SYSROOT 290816 2015-11-14 03:24:48Z sjg Enable use of sysroot during build. Depends on .Va WITH_DIRDEPS_BUILD . .Pp This must be set in the environment, make command line, or .Pa /etc/src-env.conf , not .Pa /etc/src.conf . .It Va WITHOUT_SYSTEM_COMPILER .\" from FreeBSD: head/tools/build/options/WITHOUT_SYSTEM_COMPILER 300354 2016-05-21 01:32:23Z bdrewery Set to not opportunistically skip building a cross-compiler during the bootstrap phase of the build. Normally, if the currently installed compiler matches the planned bootstrap compiler type and revision, then it will not be built. This does not prevent a compiler from being built for installation though, only for building one for the build itself. The .Va WITHOUT_CLANG and .Va WITHOUT_GCC options control those. .It Va WITHOUT_TALK .\" from FreeBSD: head/tools/build/options/WITHOUT_TALK 277676 2015-01-25 04:37:44Z ngie Set to not build or install .Xr talk 1 and .Xr talkd 8 . .It Va WITHOUT_TCP_WRAPPERS .\" from FreeBSD: head/tools/build/options/WITHOUT_TCP_WRAPPERS 278192 2015-02-04 10:19:32Z ngie Set to not build or install .Xr tcpd 8 , and related utilities. .It Va WITHOUT_TCSH .\" from FreeBSD: head/tools/build/options/WITHOUT_TCSH 156932 2006-03-21 07:50:50Z ru Set to not build and install .Pa /bin/csh (which is .Xr tcsh 1 ) . .It Va WITHOUT_TELNET .\" from FreeBSD: head/tools/build/options/WITHOUT_TELNET 296264 2016-03-01 11:36:10Z trasz Set to not build .Xr telnet 1 and related programs. .It Va WITHOUT_TESTS .\" from FreeBSD: head/tools/build/options/WITHOUT_TESTS 268778 2014-07-16 21:40:11Z jmmv Set to not build nor install the .Fx Test Suite in .Pa /usr/tests/ . See .Xr tests 7 for more details. This also disables the build of all test-related dependencies, including ATF. When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_DTRACE_TESTS .It .Va WITHOUT_TESTS_SUPPORT .El .It Va WITHOUT_TESTS_SUPPORT .\" from FreeBSD: head/tools/build/options/WITHOUT_TESTS_SUPPORT 274665 2014-11-18 17:06:50Z imp Set to disables the build of all test-related dependencies, including ATF. .It Va WITHOUT_TEXTPROC .\" from FreeBSD: head/tools/build/options/WITHOUT_TEXTPROC 183242 2008-09-21 22:02:26Z sam Set to not build programs used for text processing. When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_GROFF .El .It Va WITHOUT_TFTP .\" from FreeBSD: head/tools/build/options/WITHOUT_TFTP 278192 2015-02-04 10:19:32Z ngie Set to not build or install .Xr tftp 1 and .Xr tftpd 8 . .It Va WITHOUT_TIMED .\" from FreeBSD: head/tools/build/options/WITHOUT_TIMED 278192 2015-02-04 10:19:32Z ngie Set to not build or install .Xr timed 8 . .It Va WITHOUT_TOOLCHAIN .\" from FreeBSD: head/tools/build/options/WITHOUT_TOOLCHAIN 297939 2016-04-13 20:55:05Z bdrewery Set to not install header or programs used for program development, compilers, debuggers etc. When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_BINUTILS .It .Va WITHOUT_CLANG .It .Va WITHOUT_CLANG_EXTRAS .It .Va WITHOUT_CLANG_FULL .It .Va WITHOUT_GCC .It .Va WITHOUT_GDB .It .Va WITHOUT_INCLUDES .It .Va WITHOUT_LLDB .El .It Va WITHOUT_UNBOUND .\" from FreeBSD: head/tools/build/options/WITHOUT_UNBOUND 255597 2013-09-15 14:51:23Z des Set to not build .Xr unbound 8 and related programs. .It Va WITHOUT_USB .\" from FreeBSD: head/tools/build/options/WITHOUT_USB 156932 2006-03-21 07:50:50Z ru Set to not build USB-related programs and libraries. .It Va WITHOUT_USB_GADGET_EXAMPLES .\" from FreeBSD: head/tools/build/options/WITHOUT_USB_GADGET_EXAMPLES 274665 2014-11-18 17:06:50Z imp Set to build USB gadget kernel modules. .It Va WITHOUT_UTMPX .\" from FreeBSD: head/tools/build/options/WITHOUT_UTMPX 231530 2012-02-11 20:28:42Z ed Set to not build user accounting tools such as .Xr last 1 , .Xr users 1 , .Xr who 1 , .Xr ac 8 , .Xr lastlogin 8 and .Xr utx 8 . .It Va WITHOUT_VI .\" from FreeBSD: head/tools/build/options/WITHOUT_VI 264903 2014-04-24 23:17:40Z imp Set to not build and install vi, view, ex and related programs. .It Va WITHOUT_VT .\" from FreeBSD: head/tools/build/options/WITHOUT_VT 268022 2014-06-30 00:20:12Z emaste Set to not build .Xr vt 4 support files (fonts and keymaps). .It Va WITHOUT_WARNS .\" from FreeBSD: head/tools/build/options/WITHOUT_WARNS 276559 2015-01-02 18:57:58Z bapt Set this to not add warning flags to the compiler invocations. Useful as a temporary workaround when code enters the tree which triggers warnings in environments that differ from the original developer. .It Va WITHOUT_WIRELESS .\" from FreeBSD: head/tools/build/options/WITHOUT_WIRELESS 183242 2008-09-21 22:02:26Z sam Set to not build programs used for 802.11 wireless networks; especially .Xr wpa_supplicant 8 and .Xr hostapd 8 . When set, it also enforces the following options: .Pp .Bl -item -compact .It .Va WITHOUT_WIRELESS_SUPPORT .El .It Va WITHOUT_WIRELESS_SUPPORT .\" from FreeBSD: head/tools/build/options/WITHOUT_WIRELESS_SUPPORT 183305 2008-09-23 16:11:15Z sam Set to build libraries, programs, and kernel modules without 802.11 wireless support. .It Va WITHOUT_WPA_SUPPLICANT_EAPOL .\" from FreeBSD: head/tools/build/options/WITHOUT_WPA_SUPPLICANT_EAPOL 156932 2006-03-21 07:50:50Z ru Build .Xr wpa_supplicant 8 without support for the IEEE 802.1X protocol and without support for EAP-PEAP, EAP-TLS, EAP-LEAP, and EAP-TTLS protocols (usable only via 802.1X). .It Va WITHOUT_ZFS .\" from FreeBSD: head/tools/build/options/WITHOUT_ZFS 168409 2007-04-06 02:13:30Z pjd Set to not build ZFS file system. .It Va WITHOUT_ZONEINFO .\" from FreeBSD: head/tools/build/options/WITHOUT_ZONEINFO 235342 2012-05-12 16:12:36Z gjb Set to not build the timezone database. .El .Sh FILES .Bl -tag -compact -width Pa .It Pa /etc/src.conf .It Pa /etc/src-env.conf .It Pa /usr/share/mk/bsd.own.mk .El .Sh SEE ALSO .Xr make 1 , .Xr make.conf 5 , .Xr build 7 , .Xr ports 7 .Sh HISTORY The .Nm file appeared in .Fx 7.0 . .Sh AUTHORS This manual page was autogenerated. Index: projects/vnet/share/man/man9/Makefile =================================================================== --- projects/vnet/share/man/man9/Makefile (revision 302276) +++ projects/vnet/share/man/man9/Makefile (revision 302277) @@ -1,1937 +1,1940 @@ # $FreeBSD$ .include PACKAGE=runtime-manuals MAN= accept_filter.9 \ accf_data.9 \ accf_dns.9 \ accf_http.9 \ acl.9 \ alq.9 \ altq.9 \ atomic.9 \ bios.9 \ bitset.9 \ boot.9 \ bpf.9 \ buf.9 \ buf_ring.9 \ BUF_ISLOCKED.9 \ BUF_LOCK.9 \ BUF_LOCKFREE.9 \ BUF_LOCKINIT.9 \ BUF_RECURSED.9 \ BUF_TIMELOCK.9 \ BUF_UNLOCK.9 \ bus_activate_resource.9 \ BUS_ADD_CHILD.9 \ bus_adjust_resource.9 \ bus_alloc_resource.9 \ BUS_BIND_INTR.9 \ bus_child_present.9 \ BUS_CHILD_DELETED.9 \ BUS_CHILD_DETACHED.9 \ BUS_CONFIG_INTR.9 \ BUS_DESCRIBE_INTR.9 \ bus_dma.9 \ bus_generic_attach.9 \ bus_generic_detach.9 \ bus_generic_new_pass.9 \ bus_generic_print_child.9 \ bus_generic_read_ivar.9 \ bus_generic_shutdown.9 \ BUS_GET_CPUS.9 \ bus_get_resource.9 \ bus_map_resource.9 \ BUS_NEW_PASS.9 \ BUS_PRINT_CHILD.9 \ BUS_READ_IVAR.9 \ BUS_RESCAN.9 \ bus_release_resource.9 \ bus_set_pass.9 \ bus_set_resource.9 \ BUS_SETUP_INTR.9 \ bus_space.9 \ byteorder.9 \ casuword.9 \ cd.9 \ condvar.9 \ config_intrhook.9 \ contigmalloc.9 \ copy.9 \ counter.9 \ cpuset.9 \ cr_cansee.9 \ critical_enter.9 \ cr_seeothergids.9 \ cr_seeotheruids.9 \ crypto.9 \ CTASSERT.9 \ DB_COMMAND.9 \ DECLARE_GEOM_CLASS.9 \ DECLARE_MODULE.9 \ DELAY.9 \ devclass.9 \ devclass_find.9 \ devclass_get_device.9 \ devclass_get_devices.9 \ devclass_get_drivers.9 \ devclass_get_maxunit.9 \ devclass_get_name.9 \ devclass_get_softc.9 \ dev_clone.9 \ devfs_set_cdevpriv.9 \ device.9 \ device_add_child.9 \ DEVICE_ATTACH.9 \ device_delete_child.9 \ DEVICE_DETACH.9 \ device_enable.9 \ device_find_child.9 \ device_get_children.9 \ device_get_devclass.9 \ device_get_driver.9 \ device_get_ivars.9 \ device_get_name.9 \ device_get_parent.9 \ device_get_softc.9 \ device_get_state.9 \ device_get_sysctl.9 \ device_get_unit.9 \ DEVICE_IDENTIFY.9 \ device_printf.9 \ DEVICE_PROBE.9 \ device_probe_and_attach.9 \ device_quiet.9 \ device_set_desc.9 \ device_set_driver.9 \ device_set_flags.9 \ DEVICE_SHUTDOWN.9 \ DEV_MODULE.9 \ devstat.9 \ devtoname.9 \ disk.9 \ domain.9 \ drbr.9 \ driver.9 \ DRIVER_MODULE.9 \ EVENTHANDLER.9 \ eventtimers.9 \ extattr.9 \ fail.9 \ fetch.9 \ firmware.9 \ fpu_kern.9 \ g_access.9 \ g_attach.9 \ g_bio.9 \ g_consumer.9 \ g_data.9 \ get_cyclecount.9 \ getenv.9 \ getnewvnode.9 \ g_event.9 \ g_geom.9 \ g_provider.9 \ g_provider_by_name.9 \ groupmember.9 \ g_wither_geom.9 \ hash.9 \ hashinit.9 \ hexdump.9 \ hhook.9 \ ieee80211.9 \ ieee80211_amrr.9 \ ieee80211_beacon.9 \ ieee80211_bmiss.9 \ ieee80211_crypto.9 \ ieee80211_ddb.9 \ ieee80211_input.9 \ ieee80211_node.9 \ ieee80211_output.9 \ ieee80211_proto.9 \ ieee80211_radiotap.9 \ ieee80211_regdomain.9 \ ieee80211_scan.9 \ ieee80211_vap.9 \ ifnet.9 \ inittodr.9 \ insmntque.9 \ intro.9 \ ithread.9 \ KASSERT.9 \ kern_testfrwk.9 \ kernacc.9 \ kernel_mount.9 \ khelp.9 \ kobj.9 \ kproc.9 \ kqueue.9 \ kthread.9 \ ktr.9 \ lock.9 \ locking.9 \ LOCK_PROFILING.9 \ mac.9 \ make_dev.9 \ malloc.9 \ mbchain.9 \ mbpool.9 \ mbuf.9 \ mbuf_tags.9 \ MD5.9 \ mdchain.9 \ memcchr.9 \ memguard.9 \ microseq.9 \ microtime.9 \ microuptime.9 \ mi_switch.9 \ mod_cc.9 \ module.9 \ MODULE_DEPEND.9 \ MODULE_VERSION.9 \ mtx_pool.9 \ mutex.9 \ namei.9 \ netisr.9 \ nv.9 \ osd.9 \ owll.9 \ own.9 \ panic.9 \ pbuf.9 \ PCBGROUP.9 \ p_candebug.9 \ p_cansee.9 \ pci.9 \ PCI_IOV_ADD_VF.9 \ PCI_IOV_INIT.9 \ pci_iov_schema.9 \ PCI_IOV_UNINIT.9 \ pfil.9 \ pfind.9 \ pget.9 \ pgfind.9 \ PHOLD.9 \ physio.9 \ pmap.9 \ pmap_activate.9 \ pmap_clear_modify.9 \ pmap_copy.9 \ pmap_enter.9 \ pmap_extract.9 \ pmap_growkernel.9 \ pmap_init.9 \ pmap_is_modified.9 \ pmap_is_prefaultable.9 \ pmap_map.9 \ pmap_mincore.9 \ pmap_object_init_pt.9 \ pmap_page_exists_quick.9 \ pmap_page_init.9 \ pmap_pinit.9 \ pmap_protect.9 \ pmap_qenter.9 \ pmap_quick_enter_page.9 \ pmap_release.9 \ pmap_remove.9 \ pmap_resident_count.9 \ pmap_unwire.9 \ pmap_zero_page.9 \ printf.9 \ prison_check.9 \ priv.9 \ proc_rwmem.9 \ pseudofs.9 \ psignal.9 \ random.9 \ random_harvest.9 \ redzone.9 \ refcount.9 \ resettodr.9 \ resource_int_value.9 \ rijndael.9 \ rman.9 \ rmlock.9 \ rtalloc.9 \ rtentry.9 \ runqueue.9 \ rwlock.9 \ sbuf.9 \ scheduler.9 \ SDT.9 \ securelevel_gt.9 \ selrecord.9 \ sema.9 \ sf_buf.9 \ sglist.9 \ shm_map.9 \ signal.9 \ sleep.9 \ sleepqueue.9 \ socket.9 \ stack.9 \ store.9 \ style.9 \ swi.9 \ sx.9 \ SYSCALL_MODULE.9 \ sysctl.9 \ sysctl_add_oid.9 \ sysctl_ctx_init.9 \ SYSINIT.9 \ taskqueue.9 \ + tcp_functions.9 \ thread_exit.9 \ time.9 \ timeout.9 \ tvtohz.9 \ ucred.9 \ uidinfo.9 \ uio.9 \ unr.9 \ utopia.9 \ vaccess.9 \ vaccess_acl_nfs4.9 \ vaccess_acl_posix1e.9 \ vcount.9 \ vflush.9 \ VFS.9 \ vfs_busy.9 \ VFS_CHECKEXP.9 \ vfsconf.9 \ VFS_FHTOVP.9 \ vfs_getnewfsid.9 \ vfs_getopt.9 \ vfs_getvfs.9 \ VFS_MOUNT.9 \ vfs_mountedfrom.9 \ VFS_QUOTACTL.9 \ VFS_ROOT.9 \ vfs_rootmountalloc.9 \ VFS_SET.9 \ VFS_STATFS.9 \ vfs_suser.9 \ VFS_SYNC.9 \ vfs_timestamp.9 \ vfs_unbusy.9 \ VFS_UNMOUNT.9 \ vfs_unmountall.9 \ VFS_VGET.9 \ vget.9 \ vgone.9 \ vhold.9 \ vinvalbuf.9 \ vm_fault_prefault.9 \ vm_map.9 \ vm_map_check_protection.9 \ vm_map_create.9 \ vm_map_delete.9 \ vm_map_entry_resize_free.9 \ vm_map_find.9 \ vm_map_findspace.9 \ vm_map_inherit.9 \ vm_map_init.9 \ vm_map_insert.9 \ vm_map_lock.9 \ vm_map_lookup.9 \ vm_map_madvise.9 \ vm_map_max.9 \ vm_map_protect.9 \ vm_map_remove.9 \ vm_map_simplify_entry.9 \ vm_map_stack.9 \ vm_map_submap.9 \ vm_map_sync.9 \ vm_map_wire.9 \ vm_page_alloc.9 \ vm_page_bits.9 \ vm_page_busy.9 \ vm_page_cache.9 \ vm_page_deactivate.9 \ vm_page_dontneed.9 \ vm_page_aflag.9 \ vm_page_free.9 \ vm_page_grab.9 \ vm_page_hold.9 \ vm_page_insert.9 \ vm_page_lookup.9 \ vm_page_rename.9 \ vm_page_wire.9 \ vm_set_page_size.9 \ vmem.9 \ vn_fullpath.9 \ vn_isdisk.9 \ vnet.9 \ vnode.9 \ VOP_ACCESS.9 \ VOP_ACLCHECK.9 \ VOP_ADVISE.9 \ VOP_ADVLOCK.9 \ VOP_ALLOCATE.9 \ VOP_ATTRIB.9 \ VOP_BWRITE.9 \ VOP_CREATE.9 \ VOP_FSYNC.9 \ VOP_GETACL.9 \ VOP_GETEXTATTR.9 \ VOP_GETPAGES.9 \ VOP_INACTIVE.9 \ VOP_IOCTL.9 \ VOP_LINK.9 \ VOP_LISTEXTATTR.9 \ VOP_LOCK.9 \ VOP_LOOKUP.9 \ VOP_OPENCLOSE.9 \ VOP_PATHCONF.9 \ VOP_PRINT.9 \ VOP_RDWR.9 \ VOP_READDIR.9 \ VOP_READLINK.9 \ VOP_REALLOCBLKS.9 \ VOP_REMOVE.9 \ VOP_RENAME.9 \ VOP_REVOKE.9 \ VOP_SETACL.9 \ VOP_SETEXTATTR.9 \ VOP_STRATEGY.9 \ VOP_VPTOCNP.9 \ VOP_VPTOFH.9 \ vref.9 \ vrefcnt.9 \ vrele.9 \ vslock.9 \ watchdog.9 \ zone.9 MLINKS= unr.9 alloc_unr.9 \ unr.9 alloc_unrl.9 \ unr.9 alloc_unr_specific.9 \ unr.9 delete_unrhdr.9 \ unr.9 free_unr.9 \ unr.9 new_unrhdr.9 MLINKS+=accept_filter.9 accept_filt_add.9 \ accept_filter.9 accept_filt_del.9 \ accept_filter.9 accept_filt_generic_mod_event.9 \ accept_filter.9 accept_filt_get.9 MLINKS+=alq.9 ALQ.9 \ alq.9 alq_close.9 \ alq.9 alq_flush.9 \ alq.9 alq_get.9 \ alq.9 alq_getn.9 \ alq.9 alq_open.9 \ alq.9 alq_open_flags.9 \ alq.9 alq_post.9 \ alq.9 alq_post_flags.9 \ alq.9 alq_write.9 \ alq.9 alq_writen.9 MLINKS+=altq.9 ALTQ.9 MLINKS+=atomic.9 atomic_add.9 \ atomic.9 atomic_clear.9 \ atomic.9 atomic_cmpset.9 \ atomic.9 atomic_fetchadd.9 \ atomic.9 atomic_load.9 \ atomic.9 atomic_readandclear.9 \ atomic.9 atomic_set.9 \ atomic.9 atomic_store.9 \ atomic.9 atomic_subtract.9 \ atomic.9 atomic_swap.9 \ atomic.9 atomic_testandset.9 MLINKS+=bitset.9 BITSET_DEFINE.9 \ bitset.9 BITSET_T_INITIALIZER.9 \ bitset.9 BITSET_FSET.9 \ bitset.9 BIT_CLR.9 \ bitset.9 BIT_COPY.9 \ bitset.9 BIT_ISSET.9 \ bitset.9 BIT_SET.9 \ bitset.9 BIT_ZERO.9 \ bitset.9 BIT_FILL.9 \ bitset.9 BIT_SETOF.9 \ bitset.9 BIT_EMPTY.9 \ bitset.9 BIT_ISFULLSET.9 \ bitset.9 BIT_FFS.9 \ bitset.9 BIT_COUNT.9 \ bitset.9 BIT_SUBSET.9 \ bitset.9 BIT_OVERLAP.9 \ bitset.9 BIT_CMP.9 \ bitset.9 BIT_OR.9 \ bitset.9 BIT_AND.9 \ bitset.9 BIT_NAND.9 \ bitset.9 BIT_CLR_ATOMIC.9 \ bitset.9 BIT_SET_ATOMIC.9 \ bitset.9 BIT_SET_ATOMIC_ACQ.9 \ bitset.9 BIT_AND_ATOMIC.9 \ bitset.9 BIT_OR_ATOMIC.9 \ bitset.9 BIT_COPY_STORE_REL.9 MLINKS+=bpf.9 bpfattach.9 \ bpf.9 bpfattach2.9 \ bpf.9 bpfdetach.9 \ bpf.9 bpf_filter.9 \ bpf.9 bpf_mtap.9 \ bpf.9 bpf_mtap2.9 \ bpf.9 bpf_tap.9 \ bpf.9 bpf_validate.9 MLINKS+=buf.9 bp.9 MLINKS+=buf_ring.9 buf_ring_alloc.9 \ buf_ring.9 buf_ring_free.9 \ buf_ring.9 buf_ring_enqueue.9 \ buf_ring.9 buf_ring_enqueue_bytes.9 \ buf_ring.9 buf_ring_dequeue_mc.9 \ buf_ring.9 buf_ring_dequeue_sc.9 \ buf_ring.9 buf_ring_count.9 \ buf_ring.9 buf_ring_empty.9 \ buf_ring.9 buf_ring_full.9 \ buf_ring.9 buf_ring_peek.9 MLINKS+=bus_activate_resource.9 bus_deactivate_resource.9 MLINKS+=bus_alloc_resource.9 bus_alloc_resource_any.9 MLINKS+=BUS_BIND_INTR.9 bus_bind_intr.9 MLINKS+=BUS_DESCRIBE_INTR.9 bus_describe_intr.9 MLINKS+=bus_dma.9 busdma.9 \ bus_dma.9 bus_dmamap_create.9 \ bus_dma.9 bus_dmamap_destroy.9 \ bus_dma.9 bus_dmamap_load.9 \ bus_dma.9 bus_dmamap_load_bio.9 \ bus_dma.9 bus_dmamap_load_ccb.9 \ bus_dma.9 bus_dmamap_load_mbuf.9 \ bus_dma.9 bus_dmamap_load_mbuf_sg.9 \ bus_dma.9 bus_dmamap_load_uio.9 \ bus_dma.9 bus_dmamap_sync.9 \ bus_dma.9 bus_dmamap_unload.9 \ bus_dma.9 bus_dmamem_alloc.9 \ bus_dma.9 bus_dmamem_free.9 \ bus_dma.9 bus_dma_tag_create.9 \ bus_dma.9 bus_dma_tag_destroy.9 MLINKS+=bus_generic_read_ivar.9 bus_generic_write_ivar.9 MLINKS+=BUS_GET_CPUS.9 bus_get_cpus.9 MLINKS+=bus_map_resource.9 bus_unmap_resource.9 \ bus_map_resource.9 resource_init_map_request.9 MLINKS+=BUS_READ_IVAR.9 BUS_WRITE_IVAR.9 MLINKS+=BUS_SETUP_INTR.9 bus_setup_intr.9 \ BUS_SETUP_INTR.9 BUS_TEARDOWN_INTR.9 \ BUS_SETUP_INTR.9 bus_teardown_intr.9 MLINKS+=bus_space.9 bus_space_alloc.9 \ bus_space.9 bus_space_barrier.9 \ bus_space.9 bus_space_copy_region_1.9 \ bus_space.9 bus_space_copy_region_2.9 \ bus_space.9 bus_space_copy_region_4.9 \ bus_space.9 bus_space_copy_region_8.9 \ bus_space.9 bus_space_copy_region_stream_1.9 \ bus_space.9 bus_space_copy_region_stream_2.9 \ bus_space.9 bus_space_copy_region_stream_4.9 \ bus_space.9 bus_space_copy_region_stream_8.9 \ bus_space.9 bus_space_free.9 \ bus_space.9 bus_space_map.9 \ bus_space.9 bus_space_read_1.9 \ bus_space.9 bus_space_read_2.9 \ bus_space.9 bus_space_read_4.9 \ bus_space.9 bus_space_read_8.9 \ bus_space.9 bus_space_read_multi_1.9 \ bus_space.9 bus_space_read_multi_2.9 \ bus_space.9 bus_space_read_multi_4.9 \ bus_space.9 bus_space_read_multi_8.9 \ bus_space.9 bus_space_read_multi_stream_1.9 \ bus_space.9 bus_space_read_multi_stream_2.9 \ bus_space.9 bus_space_read_multi_stream_4.9 \ bus_space.9 bus_space_read_multi_stream_8.9 \ bus_space.9 bus_space_read_region_1.9 \ bus_space.9 bus_space_read_region_2.9 \ bus_space.9 bus_space_read_region_4.9 \ bus_space.9 bus_space_read_region_8.9 \ bus_space.9 bus_space_read_region_stream_1.9 \ bus_space.9 bus_space_read_region_stream_2.9 \ bus_space.9 bus_space_read_region_stream_4.9 \ bus_space.9 bus_space_read_region_stream_8.9 \ bus_space.9 bus_space_read_stream_1.9 \ bus_space.9 bus_space_read_stream_2.9 \ bus_space.9 bus_space_read_stream_4.9 \ bus_space.9 bus_space_read_stream_8.9 \ bus_space.9 bus_space_set_multi_1.9 \ bus_space.9 bus_space_set_multi_2.9 \ bus_space.9 bus_space_set_multi_4.9 \ bus_space.9 bus_space_set_multi_8.9 \ bus_space.9 bus_space_set_multi_stream_1.9 \ bus_space.9 bus_space_set_multi_stream_2.9 \ bus_space.9 bus_space_set_multi_stream_4.9 \ bus_space.9 bus_space_set_multi_stream_8.9 \ bus_space.9 bus_space_set_region_1.9 \ bus_space.9 bus_space_set_region_2.9 \ bus_space.9 bus_space_set_region_4.9 \ bus_space.9 bus_space_set_region_8.9 \ bus_space.9 bus_space_set_region_stream_1.9 \ bus_space.9 bus_space_set_region_stream_2.9 \ bus_space.9 bus_space_set_region_stream_4.9 \ bus_space.9 bus_space_set_region_stream_8.9 \ bus_space.9 bus_space_subregion.9 \ bus_space.9 bus_space_unmap.9 \ bus_space.9 bus_space_write_1.9 \ bus_space.9 bus_space_write_2.9 \ bus_space.9 bus_space_write_4.9 \ bus_space.9 bus_space_write_8.9 \ bus_space.9 bus_space_write_multi_1.9 \ bus_space.9 bus_space_write_multi_2.9 \ bus_space.9 bus_space_write_multi_4.9 \ bus_space.9 bus_space_write_multi_8.9 \ bus_space.9 bus_space_write_multi_stream_1.9 \ bus_space.9 bus_space_write_multi_stream_2.9 \ bus_space.9 bus_space_write_multi_stream_4.9 \ bus_space.9 bus_space_write_multi_stream_8.9 \ bus_space.9 bus_space_write_region_1.9 \ bus_space.9 bus_space_write_region_2.9 \ bus_space.9 bus_space_write_region_4.9 \ bus_space.9 bus_space_write_region_8.9 \ bus_space.9 bus_space_write_region_stream_1.9 \ bus_space.9 bus_space_write_region_stream_2.9 \ bus_space.9 bus_space_write_region_stream_4.9 \ bus_space.9 bus_space_write_region_stream_8.9 \ bus_space.9 bus_space_write_stream_1.9 \ bus_space.9 bus_space_write_stream_2.9 \ bus_space.9 bus_space_write_stream_4.9 \ bus_space.9 bus_space_write_stream_8.9 MLINKS+=byteorder.9 be16dec.9 \ byteorder.9 be16enc.9 \ byteorder.9 be16toh.9 \ byteorder.9 be32dec.9 \ byteorder.9 be32enc.9 \ byteorder.9 be32toh.9 \ byteorder.9 be64dec.9 \ byteorder.9 be64enc.9 \ byteorder.9 be64toh.9 \ byteorder.9 bswap16.9 \ byteorder.9 bswap32.9 \ byteorder.9 bswap64.9 \ byteorder.9 htobe16.9 \ byteorder.9 htobe32.9 \ byteorder.9 htobe64.9 \ byteorder.9 htole16.9 \ byteorder.9 htole32.9 \ byteorder.9 htole64.9 \ byteorder.9 le16dec.9 \ byteorder.9 le16enc.9 \ byteorder.9 le16toh.9 \ byteorder.9 le32dec.9 \ byteorder.9 le32enc.9 \ byteorder.9 le32toh.9 \ byteorder.9 le64dec.9 \ byteorder.9 le64enc.9 \ byteorder.9 le64toh.9 MLINKS+=condvar.9 cv_broadcast.9 \ condvar.9 cv_broadcastpri.9 \ condvar.9 cv_destroy.9 \ condvar.9 cv_init.9 \ condvar.9 cv_signal.9 \ condvar.9 cv_timedwait.9 \ condvar.9 cv_timedwait_sig.9 \ condvar.9 cv_timedwait_sig_sbt.9 \ condvar.9 cv_wait.9 \ condvar.9 cv_wait_sig.9 \ condvar.9 cv_wait_unlock.9 \ condvar.9 cv_wmesg.9 MLINKS+=config_intrhook.9 config_intrhook_disestablish.9 \ config_intrhook.9 config_intrhook_establish.9 MLINKS+=contigmalloc.9 contigfree.9 MLINKS+=casuword.9 casueword.9 \ casuword.9 casueword32.9 \ casuword.9 casuword32.9 MLINKS+=copy.9 copyin.9 \ copy.9 copyin_nofault.9 \ copy.9 copyinstr.9 \ copy.9 copyout.9 \ copy.9 copyout_nofault.9 \ copy.9 copystr.9 MLINKS+=counter.9 counter_u64_alloc.9 \ counter.9 counter_u64_free.9 \ counter.9 counter_u64_add.9 \ counter.9 counter_enter.9 \ counter.9 counter_exit.9 \ counter.9 counter_u64_add_protected.9 \ counter.9 counter_u64_fetch.9 \ counter.9 counter_u64_zero.9 \ counter.9 SYSCTL_COUNTER_U64.9 \ counter.9 SYSCTL_ADD_COUNTER_U64.9 \ counter.9 SYSCTL_COUNTER_U64_ARRAY.9 \ counter.9 SYSCTL_ADD_COUNTER_U64_ARRAY.9 MLINKS+=cpuset.9 CPUSET_T_INITIALIZER.9 \ cpuset.9 CPUSET_FSET.9 \ cpuset.9 CPU_CLR.9 \ cpuset.9 CPU_COPY.9 \ cpuset.9 CPU_ISSET.9 \ cpuset.9 CPU_SET.9 \ cpuset.9 CPU_ZERO.9 \ cpuset.9 CPU_FILL.9 \ cpuset.9 CPU_SETOF.9 \ cpuset.9 CPU_EMPTY.9 \ cpuset.9 CPU_ISFULLSET.9 \ cpuset.9 CPU_FFS.9 \ cpuset.9 CPU_COUNT.9 \ cpuset.9 CPU_SUBSET.9 \ cpuset.9 CPU_OVERLAP.9 \ cpuset.9 CPU_CMP.9 \ cpuset.9 CPU_OR.9 \ cpuset.9 CPU_AND.9 \ cpuset.9 CPU_NAND.9 \ cpuset.9 CPU_CLR_ATOMIC.9 \ cpuset.9 CPU_SET_ATOMIC.9 \ cpuset.9 CPU_SET_ATOMIC_ACQ.9 \ cpuset.9 CPU_AND_ATOMIC.9 \ cpuset.9 CPU_OR_ATOMIC.9 \ cpuset.9 CPU_COPY_STORE_REL.9 MLINKS+=critical_enter.9 critical.9 \ critical_enter.9 critical_exit.9 MLINKS+=crypto.9 crypto_dispatch.9 \ crypto.9 crypto_done.9 \ crypto.9 crypto_freereq.9 \ crypto.9 crypto_freesession.9 \ crypto.9 crypto_get_driverid.9 \ crypto.9 crypto_getreq.9 \ crypto.9 crypto_kdispatch.9 \ crypto.9 crypto_kdone.9 \ crypto.9 crypto_kregister.9 \ crypto.9 crypto_newsession.9 \ crypto.9 crypto_register.9 \ crypto.9 crypto_unblock.9 \ crypto.9 crypto_unregister.9 \ crypto.9 crypto_unregister_all.9 MLINKS+=DB_COMMAND.9 DB_SHOW_ALL_COMMAND.9 \ DB_COMMAND.9 DB_SHOW_COMMAND.9 MLINKS+=dev_clone.9 drain_dev_clone_events.9 MLINKS+=devfs_set_cdevpriv.9 devfs_clear_cdevpriv.9 \ devfs_set_cdevpriv.9 devfs_get_cdevpriv.9 MLINKS+=device_add_child.9 device_add_child_ordered.9 MLINKS+=device_enable.9 device_disable.9 \ device_enable.9 device_is_enabled.9 MLINKS+=device_get_ivars.9 device_set_ivars.9 MLINKS+=device_get_name.9 device_get_nameunit.9 MLINKS+=device_get_state.9 device_busy.9 \ device_get_state.9 device_is_alive.9 \ device_get_state.9 device_is_attached.9 \ device_get_state.9 device_unbusy.9 MLINKS+=device_get_sysctl.9 device_get_sysctl_ctx.9 \ device_get_sysctl.9 device_get_sysctl_tree.9 MLINKS+=device_quiet.9 device_is_quiet.9 \ device_quiet.9 device_verbose.9 MLINKS+=device_set_desc.9 device_get_desc.9 \ device_set_desc.9 device_set_desc_copy.9 MLINKS+=device_set_flags.9 device_get_flags.9 MLINKS+=devstat.9 devicestat.9 \ devstat.9 devstat_add_entry.9 \ devstat.9 devstat_end_transaction.9 \ devstat.9 devstat_remove_entry.9 \ devstat.9 devstat_start_transaction.9 MLINKS+=disk.9 disk_alloc.9 \ disk.9 disk_create.9 \ disk.9 disk_destroy.9 \ disk.9 disk_gone.9 \ disk.9 disk_resize.9 MLINKS+=domain.9 DOMAIN_SET.9 \ domain.9 domain_add.9 \ domain.9 pfctlinput.9 \ domain.9 pfctlinput2.9 \ domain.9 pffinddomain.9 \ domain.9 pffindproto.9 \ domain.9 pffindtype.9 MLINKS+=drbr.9 drbr_free.9 \ drbr.9 drbr_enqueue.9 \ drbr.9 drbr_dequeue.9 \ drbr.9 drbr_dequeue_cond.9 \ drbr.9 drbr_flush.9 \ drbr.9 drbr_empty.9 \ drbr.9 drbr_inuse.9 \ drbr.9 drbr_stats_update.9 MLINKS+=DRIVER_MODULE.9 DRIVER_MODULE_ORDERED.9 \ DRIVER_MODULE.9 EARLY_DRIVER_MODULE.9 \ DRIVER_MODULE.9 EARLY_DRIVER_MODULE_ORDERED.9 MLINKS+=EVENTHANDLER.9 EVENTHANDLER_DECLARE.9 \ EVENTHANDLER.9 EVENTHANDLER_DEREGISTER.9 \ EVENTHANDLER.9 eventhandler_deregister.9 \ EVENTHANDLER.9 eventhandler_find_list.9 \ EVENTHANDLER.9 EVENTHANDLER_INVOKE.9 \ EVENTHANDLER.9 eventhandler_prune_list.9 \ EVENTHANDLER.9 EVENTHANDLER_REGISTER.9 \ EVENTHANDLER.9 eventhandler_register.9 MLINKS+=eventtimers.9 et_register.9 \ eventtimers.9 et_deregister.9 \ eventtimers.9 et_ban.9 \ eventtimers.9 et_find.9 \ eventtimers.9 et_free.9 \ eventtimers.9 et_init.9 \ eventtimers.9 ET_LOCK.9 \ eventtimers.9 ET_UNLOCK.9 \ eventtimers.9 et_start.9 \ eventtimers.9 et_stop.9 MLINKS+=fail.9 KFAIL_POINT_CODE.9 \ fail.9 KFAIL_POINT_ERROR.9 \ fail.9 KFAIL_POINT_GOTO.9 \ fail.9 KFAIL_POINT_RETURN.9 \ fail.9 KFAIL_POINT_RETURN_VOID.9 MLINKS+=fetch.9 fubyte.9 \ fetch.9 fuswintr.9 \ fetch.9 fuword.9 \ fetch.9 fuword16.9 \ fetch.9 fuword32.9 \ fetch.9 fuword64.9 \ fetch.9 fueword.9 \ fetch.9 fueword32.9 \ fetch.9 fueword64.9 MLINKS+=firmware.9 firmware_get.9 \ firmware.9 firmware_put.9 \ firmware.9 firmware_register.9 \ firmware.9 firmware_unregister.9 MLINKS+=fpu_kern.9 fpu_kern_alloc_ctx.9 \ fpu_kern.9 fpu_kern_free_ctx.9 \ fpu_kern.9 fpu_kern_enter.9 \ fpu_kern.9 fpu_kern_leave.9 \ fpu_kern.9 fpu_kern_thread.9 \ fpu_kern.9 is_fpu_kern_thread.9 MLINKS+=g_attach.9 g_detach.9 MLINKS+=g_bio.9 g_alloc_bio.9 \ g_bio.9 g_clone_bio.9 \ g_bio.9 g_destroy_bio.9 \ g_bio.9 g_duplicate_bio.9 \ g_bio.9 g_new_bio.9 \ g_bio.9 g_print_bio.9 \ g_bio.9 g_reset_bio.9 MLINKS+=g_consumer.9 g_destroy_consumer.9 \ g_consumer.9 g_new_consumer.9 MLINKS+=g_data.9 g_read_data.9 \ g_data.9 g_write_data.9 MLINKS+=getenv.9 freeenv.9 \ getenv.9 getenv_int.9 \ getenv.9 getenv_long.9 \ getenv.9 getenv_string.9 \ getenv.9 getenv_quad.9 \ getenv.9 getenv_uint.9 \ getenv.9 getenv_ulong.9 \ getenv.9 setenv.9 \ getenv.9 testenv.9 \ getenv.9 unsetenv.9 MLINKS+=g_event.9 g_cancel_event.9 \ g_event.9 g_post_event.9 \ g_event.9 g_waitfor_event.9 MLINKS+=g_geom.9 g_destroy_geom.9 \ g_geom.9 g_new_geomf.9 MLINKS+=g_provider.9 g_destroy_provider.9 \ g_provider.9 g_error_provider.9 \ g_provider.9 g_new_providerf.9 MLINKS+=hash.9 hash32.9 \ hash.9 hash32_buf.9 \ hash.9 hash32_str.9 \ hash.9 hash32_stre.9 \ hash.9 hash32_strn.9 \ hash.9 hash32_strne.9 \ hash.9 jenkins_hash.9 \ hash.9 jenkins_hash32.9 MLINKS+=hashinit.9 hashdestroy.9 \ hashinit.9 hashinit_flags.9 \ hashinit.9 phashinit.9 MLINKS+=hhook.9 hhook_head_register.9 \ hhook.9 hhook_head_deregister.9 \ hhook.9 hhook_head_deregister_lookup.9 \ hhook.9 hhook_run_hooks.9 \ hhook.9 HHOOKS_RUN_IF.9 \ hhook.9 HHOOKS_RUN_LOOKUP_IF.9 MLINKS+=ieee80211.9 ieee80211_ifattach.9 \ ieee80211.9 ieee80211_ifdetach.9 MLINKS+=ieee80211_amrr.9 ieee80211_amrr_choose.9 \ ieee80211_amrr.9 ieee80211_amrr_cleanup.9 \ ieee80211_amrr.9 ieee80211_amrr_init.9 \ ieee80211_amrr.9 ieee80211_amrr_node_init.9 \ ieee80211_amrr.9 ieee80211_amrr_setinterval.9 \ ieee80211_amrr.9 ieee80211_amrr_tx_complete.9 \ ieee80211_amrr.9 ieee80211_amrr_tx_update.9 MLINKS+=ieee80211_beacon.9 ieee80211_beacon_alloc.9 \ ieee80211_beacon.9 ieee80211_beacon_notify.9 \ ieee80211_beacon.9 ieee80211_beacon_update.9 MLINKS+=ieee80211_bmiss.9 ieee80211_beacon_miss.9 MLINKS+=ieee80211_crypto.9 ieee80211_crypto_available.9 \ ieee80211_crypto.9 ieee80211_crypto_decap.9 \ ieee80211_crypto.9 ieee80211_crypto_delglobalkeys.9 \ ieee80211_crypto.9 ieee80211_crypto_delkey.9 \ ieee80211_crypto.9 ieee80211_crypto_demic.9 \ ieee80211_crypto.9 ieee80211_crypto_encap.9 \ ieee80211_crypto.9 ieee80211_crypto_enmic.9 \ ieee80211_crypto.9 ieee80211_crypto_newkey.9 \ ieee80211_crypto.9 ieee80211_crypto_register.9 \ ieee80211_crypto.9 ieee80211_crypto_reload_keys.9 \ ieee80211_crypto.9 ieee80211_crypto_setkey.9 \ ieee80211_crypto.9 ieee80211_crypto_unregister.9 \ ieee80211_crypto.9 ieee80211_key_update_begin.9 \ ieee80211_crypto.9 ieee80211_key_update_end.9 \ ieee80211_crypto.9 ieee80211_notify_michael_failure.9 \ ieee80211_crypto.9 ieee80211_notify_replay_failure.9 MLINKS+=ieee80211_input.9 ieee80211_input_all.9 MLINKS+=ieee80211_node.9 ieee80211_dump_node.9 \ ieee80211_node.9 ieee80211_dump_nodes.9 \ ieee80211_node.9 ieee80211_find_rxnode.9 \ ieee80211_node.9 ieee80211_find_rxnode_withkey.9 \ ieee80211_node.9 ieee80211_free_node.9 \ ieee80211_node.9 ieee80211_iterate_nodes.9 \ ieee80211_node.9 ieee80211_ref_node.9 \ ieee80211_node.9 ieee80211_unref_node.9 MLINKS+=ieee80211_output.9 ieee80211_process_callback.9 \ ieee80211_output.9 M_SEQNO_GET.9 \ ieee80211_output.9 M_WME_GETAC.9 MLINKS+=ieee80211_proto.9 ieee80211_new_state.9 \ ieee80211_proto.9 ieee80211_resume_all.9 \ ieee80211_proto.9 ieee80211_start_all.9 \ ieee80211_proto.9 ieee80211_stop_all.9 \ ieee80211_proto.9 ieee80211_suspend_all.9 \ ieee80211_proto.9 ieee80211_waitfor_parent.9 MLINKS+=ieee80211_radiotap.9 ieee80211_radiotap_active.9 \ ieee80211_radiotap.9 ieee80211_radiotap_active_vap.9 \ ieee80211_radiotap.9 ieee80211_radiotap_attach.9 \ ieee80211_radiotap.9 ieee80211_radiotap_tx.9 \ ieee80211_radiotap.9 radiotap.9 MLINKS+=ieee80211_regdomain.9 ieee80211_alloc_countryie.9 \ ieee80211_regdomain.9 ieee80211_init_channels.9 \ ieee80211_regdomain.9 ieee80211_sort_channels.9 MLINKS+=ieee80211_scan.9 ieee80211_add_scan.9 \ ieee80211_scan.9 ieee80211_bg_scan.9 \ ieee80211_scan.9 ieee80211_cancel_scan.9 \ ieee80211_scan.9 ieee80211_cancel_scan_any.9 \ ieee80211_scan.9 ieee80211_check_scan.9 \ ieee80211_scan.9 ieee80211_check_scan_current.9 \ ieee80211_scan.9 ieee80211_flush.9 \ ieee80211_scan.9 ieee80211_probe_curchan.9 \ ieee80211_scan.9 ieee80211_scan_assoc_fail.9 \ ieee80211_scan.9 ieee80211_scan_done.9 \ ieee80211_scan.9 ieee80211_scan_dump_channels.9 \ ieee80211_scan.9 ieee80211_scan_flush.9 \ ieee80211_scan.9 ieee80211_scan_iterate.9 \ ieee80211_scan.9 ieee80211_scan_next.9 \ ieee80211_scan.9 ieee80211_scan_timeout.9 \ ieee80211_scan.9 ieee80211_scanner_get.9 \ ieee80211_scan.9 ieee80211_scanner_register.9 \ ieee80211_scan.9 ieee80211_scanner_unregister.9 \ ieee80211_scan.9 ieee80211_scanner_unregister_all.9 \ ieee80211_scan.9 ieee80211_start_scan.9 MLINKS+=ieee80211_vap.9 ieee80211_vap_attach.9 \ ieee80211_vap.9 ieee80211_vap_detach.9 \ ieee80211_vap.9 ieee80211_vap_setup.9 MLINKS+=ifnet.9 if_addmulti.9 \ ifnet.9 if_alloc.9 \ ifnet.9 if_allmulti.9 \ ifnet.9 if_attach.9 \ ifnet.9 if_data.9 \ ifnet.9 IF_DEQUEUE.9 \ ifnet.9 if_delmulti.9 \ ifnet.9 if_detach.9 \ ifnet.9 if_down.9 \ ifnet.9 if_findmulti.9 \ ifnet.9 if_free.9 \ ifnet.9 if_free_type.9 \ ifnet.9 if_up.9 \ ifnet.9 ifa_free.9 \ ifnet.9 ifa_ifwithaddr.9 \ ifnet.9 ifa_ifwithdstaddr.9 \ ifnet.9 ifa_ifwithnet.9 \ ifnet.9 ifa_ref.9 \ ifnet.9 ifaddr.9 \ ifnet.9 ifaddr_byindex.9 \ ifnet.9 ifaof_ifpforaddr.9 \ ifnet.9 ifioctl.9 \ ifnet.9 ifpromisc.9 \ ifnet.9 ifqueue.9 \ ifnet.9 ifunit.9 \ ifnet.9 ifunit_ref.9 MLINKS+=insmntque.9 insmntque1.9 MLINKS+=ithread.9 ithread_add_handler.9 \ ithread.9 ithread_create.9 \ ithread.9 ithread_destroy.9 \ ithread.9 ithread_priority.9 \ ithread.9 ithread_remove_handler.9 \ ithread.9 ithread_schedule.9 MLINKS+=kernacc.9 useracc.9 MLINKS+=kernel_mount.9 free_mntarg.9 \ kernel_mount.9 kernel_vmount.9 \ kernel_mount.9 mount_arg.9 \ kernel_mount.9 mount_argb.9 \ kernel_mount.9 mount_argf.9 \ kernel_mount.9 mount_argsu.9 MLINKS+=khelp.9 khelp_add_hhook.9 \ khelp.9 KHELP_DECLARE_MOD.9 \ khelp.9 KHELP_DECLARE_MOD_UMA.9 \ khelp.9 khelp_destroy_osd.9 \ khelp.9 khelp_get_id.9 \ khelp.9 khelp_get_osd.9 \ khelp.9 khelp_init_osd.9 \ khelp.9 khelp_remove_hhook.9 MLINKS+=kobj.9 DEFINE_CLASS.9 \ kobj.9 kobj_class_compile.9 \ kobj.9 kobj_class_compile_static.9 \ kobj.9 kobj_class_free.9 \ kobj.9 kobj_create.9 \ kobj.9 kobj_delete.9 \ kobj.9 kobj_init.9 \ kobj.9 kobj_init_static.9 MLINKS+=kproc.9 kproc_create.9 \ kproc.9 kproc_exit.9 \ kproc.9 kproc_kthread_add.9 \ kproc.9 kproc_resume.9 \ kproc.9 kproc_shutdown.9 \ kproc.9 kproc_start.9 \ kproc.9 kproc_suspend.9 \ kproc.9 kproc_suspend_check.9 \ kproc.9 kthread_create.9 MLINKS+=kqueue.9 knlist_add.9 \ kqueue.9 knlist_clear.9 \ kqueue.9 knlist_delete.9 \ kqueue.9 knlist_destroy.9 \ kqueue.9 knlist_empty.9 \ kqueue.9 knlist_init.9 \ kqueue.9 knlist_init_mtx.9 \ kqueue.9 knlist_init_rw_reader.9 \ kqueue.9 knlist_remove.9 \ kqueue.9 knlist_remove_inevent.9 \ kqueue.9 knote_fdclose.9 \ kqueue.9 KNOTE_LOCKED.9 \ kqueue.9 KNOTE_UNLOCKED.9 \ kqueue.9 kqfd_register.9 \ kqueue.9 kqueue_add_filteropts.9 \ kqueue.9 kqueue_del_filteropts.9 MLINKS+=kthread.9 kthread_add.9 \ kthread.9 kthread_exit.9 \ kthread.9 kthread_resume.9 \ kthread.9 kthread_shutdown.9 \ kthread.9 kthread_start.9 \ kthread.9 kthread_suspend.9 \ kthread.9 kthread_suspend_check.9 MLINKS+=ktr.9 CTR0.9 \ ktr.9 CTR1.9 \ ktr.9 CTR2.9 \ ktr.9 CTR3.9 \ ktr.9 CTR4.9 \ ktr.9 CTR5.9 \ ktr.9 CTR6.9 MLINKS+=lock.9 lockdestroy.9 \ lock.9 lockinit.9 \ lock.9 lockmgr.9 \ lock.9 lockmgr_args.9 \ lock.9 lockmgr_args_rw.9 \ lock.9 lockmgr_assert.9 \ lock.9 lockmgr_disown.9 \ lock.9 lockmgr_printinfo.9 \ lock.9 lockmgr_recursed.9 \ lock.9 lockmgr_rw.9 \ lock.9 lockmgr_waiters.9 \ lock.9 lockstatus.9 MLINKS+=LOCK_PROFILING.9 MUTEX_PROFILING.9 MLINKS+=make_dev.9 destroy_dev.9 \ make_dev.9 destroy_dev_drain.9 \ make_dev.9 destroy_dev_sched.9 \ make_dev.9 destroy_dev_sched_cb.9 \ make_dev.9 dev_depends.9 \ make_dev.9 make_dev_alias.9 \ make_dev.9 make_dev_alias_p.9 \ make_dev.9 make_dev_cred.9 \ make_dev.9 make_dev_credf.9 \ make_dev.9 make_dev_p.9 \ make_dev.9 make_dev_s.9 MLINKS+=malloc.9 free.9 \ malloc.9 MALLOC_DECLARE.9 \ malloc.9 MALLOC_DEFINE.9 \ malloc.9 realloc.9 \ malloc.9 reallocf.9 MLINKS+=mbchain.9 mb_detach.9 \ mbchain.9 mb_done.9 \ mbchain.9 mb_fixhdr.9 \ mbchain.9 mb_init.9 \ mbchain.9 mb_initm.9 \ mbchain.9 mb_put_int64be.9 \ mbchain.9 mb_put_int64le.9 \ mbchain.9 mb_put_mbuf.9 \ mbchain.9 mb_put_mem.9 \ mbchain.9 mb_put_uint16be.9 \ mbchain.9 mb_put_uint16le.9 \ mbchain.9 mb_put_uint32be.9 \ mbchain.9 mb_put_uint32le.9 \ mbchain.9 mb_put_uint8.9 \ mbchain.9 mb_put_uio.9 \ mbchain.9 mb_reserve.9 MLINKS+=mbpool.9 mbp_alloc.9 \ mbpool.9 mbp_card_free.9 \ mbpool.9 mbp_count.9 \ mbpool.9 mbp_create.9 \ mbpool.9 mbp_destroy.9 \ mbpool.9 mbp_ext_free.9 \ mbpool.9 mbp_free.9 \ mbpool.9 mbp_get.9 \ mbpool.9 mbp_get_keep.9 \ mbpool.9 mbp_sync.9 MLINKS+=\ mbuf.9 m_adj.9 \ mbuf.9 m_align.9 \ mbuf.9 M_ALIGN.9 \ mbuf.9 m_append.9 \ mbuf.9 m_apply.9 \ mbuf.9 m_cat.9 \ mbuf.9 m_catpkt.9 \ mbuf.9 MCHTYPE.9 \ mbuf.9 MCLGET.9 \ mbuf.9 m_collapse.9 \ mbuf.9 m_copyback.9 \ mbuf.9 m_copydata.9 \ mbuf.9 m_copym.9 \ mbuf.9 m_copypacket.9 \ mbuf.9 m_copyup.9 \ mbuf.9 m_defrag.9 \ mbuf.9 m_devget.9 \ mbuf.9 m_dup.9 \ mbuf.9 m_dup_pkthdr.9 \ mbuf.9 MEXTADD.9 \ mbuf.9 m_fixhdr.9 \ mbuf.9 m_free.9 \ mbuf.9 m_freem.9 \ mbuf.9 MGET.9 \ mbuf.9 m_get.9 \ mbuf.9 m_get2.9 \ mbuf.9 m_getjcl.9 \ mbuf.9 m_getcl.9 \ mbuf.9 m_getclr.9 \ mbuf.9 MGETHDR.9 \ mbuf.9 m_gethdr.9 \ mbuf.9 m_getm.9 \ mbuf.9 m_getptr.9 \ mbuf.9 MH_ALIGN.9 \ mbuf.9 M_LEADINGSPACE.9 \ mbuf.9 m_length.9 \ mbuf.9 M_MOVE_PKTHDR.9 \ mbuf.9 m_move_pkthdr.9 \ mbuf.9 M_PREPEND.9 \ mbuf.9 m_prepend.9 \ mbuf.9 m_pulldown.9 \ mbuf.9 m_pullup.9 \ mbuf.9 m_split.9 \ mbuf.9 mtod.9 \ mbuf.9 M_TRAILINGSPACE.9 \ mbuf.9 m_unshare.9 \ mbuf.9 M_WRITABLE.9 MLINKS+=\ mbuf_tags.9 m_tag_alloc.9 \ mbuf_tags.9 m_tag_copy.9 \ mbuf_tags.9 m_tag_copy_chain.9 \ mbuf_tags.9 m_tag_delete.9 \ mbuf_tags.9 m_tag_delete_chain.9 \ mbuf_tags.9 m_tag_delete_nonpersistent.9 \ mbuf_tags.9 m_tag_find.9 \ mbuf_tags.9 m_tag_first.9 \ mbuf_tags.9 m_tag_free.9 \ mbuf_tags.9 m_tag_get.9 \ mbuf_tags.9 m_tag_init.9 \ mbuf_tags.9 m_tag_locate.9 \ mbuf_tags.9 m_tag_next.9 \ mbuf_tags.9 m_tag_prepend.9 \ mbuf_tags.9 m_tag_unlink.9 MLINKS+=MD5.9 MD5Init.9 \ MD5.9 MD5Transform.9 MLINKS+=mdchain.9 md_append_record.9 \ mdchain.9 md_done.9 \ mdchain.9 md_get_int64.9 \ mdchain.9 md_get_int64be.9 \ mdchain.9 md_get_int64le.9 \ mdchain.9 md_get_mbuf.9 \ mdchain.9 md_get_mem.9 \ mdchain.9 md_get_uint16.9 \ mdchain.9 md_get_uint16be.9 \ mdchain.9 md_get_uint16le.9 \ mdchain.9 md_get_uint32.9 \ mdchain.9 md_get_uint32be.9 \ mdchain.9 md_get_uint32le.9 \ mdchain.9 md_get_uint8.9 \ mdchain.9 md_get_uio.9 \ mdchain.9 md_initm.9 \ mdchain.9 md_next_record.9 MLINKS+=microtime.9 bintime.9 \ microtime.9 getbintime.9 \ microtime.9 getmicrotime.9 \ microtime.9 getnanotime.9 \ microtime.9 nanotime.9 MLINKS+=microuptime.9 binuptime.9 \ microuptime.9 getbinuptime.9 \ microuptime.9 getmicrouptime.9 \ microuptime.9 getnanouptime.9 \ microuptime.9 getsbinuptime.9 \ microuptime.9 nanouptime.9 \ microuptime.9 sbinuptime.9 MLINKS+=mi_switch.9 cpu_switch.9 \ mi_switch.9 cpu_throw.9 MLINKS+=mod_cc.9 CCV.9 \ mod_cc.9 DECLARE_CC_MODULE.9 MLINKS+=mtx_pool.9 mtx_pool_alloc.9 \ mtx_pool.9 mtx_pool_create.9 \ mtx_pool.9 mtx_pool_destroy.9 \ mtx_pool.9 mtx_pool_find.9 \ mtx_pool.9 mtx_pool_lock.9 \ mtx_pool.9 mtx_pool_lock_spin.9 \ mtx_pool.9 mtx_pool_unlock.9 \ mtx_pool.9 mtx_pool_unlock_spin.9 MLINKS+=mutex.9 mtx_assert.9 \ mutex.9 mtx_destroy.9 \ mutex.9 mtx_init.9 \ mutex.9 mtx_initialized.9 \ mutex.9 mtx_lock.9 \ mutex.9 mtx_lock_flags.9 \ mutex.9 mtx_lock_spin.9 \ mutex.9 mtx_lock_spin_flags.9 \ mutex.9 mtx_owned.9 \ mutex.9 mtx_recursed.9 \ mutex.9 mtx_sleep.9 \ mutex.9 MTX_SYSINIT.9 \ mutex.9 mtx_trylock.9 \ mutex.9 mtx_trylock_flags.9 \ mutex.9 mtx_unlock.9 \ mutex.9 mtx_unlock_flags.9 \ mutex.9 mtx_unlock_spin.9 \ mutex.9 mtx_unlock_spin_flags.9 MLINKS+=namei.9 NDFREE.9 \ namei.9 NDINIT.9 MLINKS+=netisr.9 netisr_clearqdrops.9 \ netisr.9 netisr_default_flow2cpu.9 \ netisr.9 netisr_dispatch.9 \ netisr.9 netisr_dispatch_src.9 \ netisr.9 netisr_get_cpucount.9 \ netisr.9 netisr_get_cpuid.9 \ netisr.9 netisr_getqdrops.9 \ netisr.9 netisr_getqlimit.9 \ netisr.9 netisr_queue.9 \ netisr.9 netisr_queue_src.9 \ netisr.9 netisr_register.9 \ netisr.9 netisr_setqlimit.9 \ netisr.9 netisr_unregister.9 MLINKS+=nv.9 libnv.9 \ nv.9 nvlist.9 \ nv.9 nvlist_add_binary.9 \ nv.9 nvlist_add_bool.9 \ nv.9 nvlist_add_descriptor.9 \ nv.9 nvlist_add_null.9 \ nv.9 nvlist_add_number.9 \ nv.9 nvlist_add_nvlist.9 \ nv.9 nvlist_add_string.9 \ nv.9 nvlist_add_stringf.9 \ nv.9 nvlist_add_stringv.9 \ nv.9 nvlist_clone.9 \ nv.9 nvlist_create.9 \ nv.9 nvlist_destroy.9 \ nv.9 nvlist_dump.9 \ nv.9 nvlist_empty.9 \ nv.9 nvlist_error.9 \ nv.9 nvlist_exists.9 \ nv.9 nvlist_exists_binary.9 \ nv.9 nvlist_exists_bool.9 \ nv.9 nvlist_exists_descriptor.9 \ nv.9 nvlist_exists_null.9 \ nv.9 nvlist_exists_number.9 \ nv.9 nvlist_exists_nvlist.9 \ nv.9 nvlist_exists_string.9 \ nv.9 nvlist_exists_type.9 \ nv.9 nvlist_fdump.9 \ nv.9 nvlist_flags.9 \ nv.9 nvlist_free.9 \ nv.9 nvlist_free_binary.9 \ nv.9 nvlist_free_bool.9 \ nv.9 nvlist_free_descriptor.9 \ nv.9 nvlist_free_null.9 \ nv.9 nvlist_free_number.9 \ nv.9 nvlist_free_nvlist.9 \ nv.9 nvlist_free_string.9 \ nv.9 nvlist_free_type.9 \ nv.9 nvlist_get_binary.9 \ nv.9 nvlist_get_bool.9 \ nv.9 nvlist_get_descriptor.9 \ nv.9 nvlist_get_number.9 \ nv.9 nvlist_get_nvlist.9 \ nv.9 nvlist_get_parent.9 \ nv.9 nvlist_get_string.9 \ nv.9 nvlist_move_binary.9 \ nv.9 nvlist_move_descriptor.9 \ nv.9 nvlist_move_nvlist.9 \ nv.9 nvlist_move_string.9 \ nv.9 nvlist_next.9 \ nv.9 nvlist_pack.9 \ nv.9 nvlist_recv.9 \ nv.9 nvlist_send.9 \ nv.9 nvlist_set_error.9 \ nv.9 nvlist_size.9 \ nv.9 nvlist_take_binary.9 \ nv.9 nvlist_take_bool.9 \ nv.9 nvlist_take_descriptor.9 \ nv.9 nvlist_take_number.9 \ nv.9 nvlist_take_nvlist.9 \ nv.9 nvlist_take_string.9 \ nv.9 nvlist_unpack.9 \ nv.9 nvlist_xfer.9 MLINKS+=osd.9 osd_call.9 \ osd.9 osd_del.9 \ osd.9 osd_deregister.9 \ osd.9 osd_exit.9 \ osd.9 osd_get.9 \ osd.9 osd_register.9 \ osd.9 osd_set.9 MLINKS+=panic.9 vpanic.9 MLINKS+=pbuf.9 getpbuf.9 \ pbuf.9 relpbuf.9 \ pbuf.9 trypbuf.9 MLINKS+=PCBGROUP.9 in_pcbgroup_byhash.9 \ PCBGROUP.9 in_pcbgroup_byinpcb.9 \ PCBGROUP.9 in_pcbgroup_destroy.9 \ PCBGROUP.9 in_pcbgroup_enabled.9 \ PCBGROUP.9 in_pcbgroup_init.9 \ PCBGROUP.9 in_pcbgroup_remove.9 \ PCBGROUP.9 in_pcbgroup_update.9 \ PCBGROUP.9 in_pcbgroup_update_mbuf.9 \ PCBGROUP.9 in6_pcbgroup_byhash.9 MLINKS+=pci.9 pci_alloc_msi.9 \ pci.9 pci_alloc_msix.9 \ pci.9 pci_disable_busmaster.9 \ pci.9 pci_disable_io.9 \ pci.9 pci_enable_busmaster.9 \ pci.9 pci_enable_io.9 \ pci.9 pci_find_bsf.9 \ pci.9 pci_find_cap.9 \ pci.9 pci_find_dbsf.9 \ pci.9 pci_find_device.9 \ pci.9 pci_find_extcap.9 \ pci.9 pci_find_htcap.9 \ pci.9 pci_find_pcie_root_port.9 \ pci.9 pci_get_id.9 \ pci.9 pci_get_max_read_req.9 \ pci.9 pci_get_powerstate.9 \ pci.9 pci_get_vpd_ident.9 \ pci.9 pci_get_vpd_readonly.9 \ pci.9 pci_iov_attach.9 \ pci.9 pci_iov_detach.9 \ pci.9 pci_msi_count.9 \ pci.9 pci_msix_count.9 \ pci.9 pci_msix_pba_bar.9 \ pci.9 pci_msix_table_bar.9 \ pci.9 pci_pending_msix.9 \ pci.9 pci_read_config.9 \ pci.9 pci_release_msi.9 \ pci.9 pci_remap_msix.9 \ pci.9 pci_restore_state.9 \ pci.9 pci_save_state.9 \ pci.9 pci_set_powerstate.9 \ pci.9 pci_set_max_read_req.9 \ pci.9 pci_write_config.9 \ pci.9 pcie_adjust_config.9 \ pci.9 pcie_read_config.9 \ pci.9 pcie_write_config.9 MLINKS+=pci_iov_schema.9 pci_iov_schema_alloc_node.9 \ pci_iov_schema.9 pci_iov_schema_add_bool.9 \ pci_iov_schema.9 pci_iov_schema_add_string.9 \ pci_iov_schema.9 pci_iov_schema_add_uint8.9 \ pci_iov_schema.9 pci_iov_schema_add_uint16.9 \ pci_iov_schema.9 pci_iov_schema_add_uint32.9 \ pci_iov_schema.9 pci_iov_schema_add_uint64.9 \ pci_iov_schema.9 pci_iov_schema_add_unicast_mac.9 MLINKS+=pfil.9 pfil_add_hook.9 \ pfil.9 pfil_head_register.9 \ pfil.9 pfil_head_unregister.9 \ pfil.9 pfil_hook_get.9 \ pfil.9 pfil_remove_hook.9 \ pfil.9 pfil_rlock.9 \ pfil.9 pfil_run_hooks.9 \ pfil.9 pfil_runlock.9 \ pfil.9 pfil_wlock.9 \ pfil.9 pfil_wunlock.9 MLINKS+=pfind.9 zpfind.9 MLINKS+=PHOLD.9 PRELE.9 \ PHOLD.9 _PHOLD.9 \ PHOLD.9 _PRELE.9 \ PHOLD.9 PROC_ASSERT_HELD.9 \ PHOLD.9 PROC_ASSERT_NOT_HELD.9 MLINKS+=pmap_copy.9 pmap_copy_page.9 MLINKS+=pmap_extract.9 pmap_extract_and_hold.9 MLINKS+=pmap_init.9 pmap_init2.9 MLINKS+=pmap_is_modified.9 pmap_ts_referenced.9 MLINKS+=pmap_pinit.9 pmap_pinit0.9 \ pmap_pinit.9 pmap_pinit2.9 MLINKS+=pmap_qenter.9 pmap_qremove.9 MLINKS+=pmap_quick_enter_page.9 pmap_quick_remove_page.9 MLINKS+=pmap_remove.9 pmap_remove_all.9 \ pmap_remove.9 pmap_remove_pages.9 MLINKS+=pmap_resident_count.9 pmap_wired_count.9 MLINKS+=pmap_zero_page.9 pmap_zero_area.9 \ pmap_zero_page.9 pmap_zero_idle.9 MLINKS+=printf.9 log.9 \ printf.9 tprintf.9 \ printf.9 uprintf.9 MLINKS+=priv.9 priv_check.9 \ priv.9 priv_check_cred.9 MLINKS+=proc_rwmem.9 proc_readmem.9 \ proc_rwmem.9 proc_writemem.9 MLINKS+=psignal.9 gsignal.9 \ psignal.9 pgsignal.9 \ psignal.9 tdsignal.9 MLINKS+=random.9 arc4rand.9 \ random.9 arc4random.9 \ random.9 read_random.9 \ random.9 read_random_uio.9 \ random.9 srandom.9 MLINKS+=refcount.9 refcount_acquire.9 \ refcount.9 refcount_init.9 \ refcount.9 refcount_release.9 MLINKS+=resource_int_value.9 resource_long_value.9 \ resource_int_value.9 resource_string_value.9 MLINKS+=rman.9 rman_activate_resource.9 \ rman.9 rman_adjust_resource.9 \ rman.9 rman_deactivate_resource.9 \ rman.9 rman_fini.9 \ rman.9 rman_first_free_region.9 \ rman.9 rman_get_bushandle.9 \ rman.9 rman_get_bustag.9 \ rman.9 rman_get_device.9 \ rman.9 rman_get_end.9 \ rman.9 rman_get_flags.9 \ rman.9 rman_get_mapping.9 \ rman.9 rman_get_rid.9 \ rman.9 rman_get_size.9 \ rman.9 rman_get_start.9 \ rman.9 rman_get_virtual.9 \ rman.9 rman_init.9 \ rman.9 rman_init_from_resource.9 \ rman.9 rman_is_region_manager.9 \ rman.9 rman_last_free_region.9 \ rman.9 rman_make_alignment_flags.9 \ rman.9 rman_manage_region.9 \ rman.9 rman_release_resource.9 \ rman.9 rman_reserve_resource.9 \ rman.9 rman_reserve_resource_bound.9 \ rman.9 rman_set_bushandle.9 \ rman.9 rman_set_bustag.9 \ rman.9 rman_set_mapping.9 \ rman.9 rman_set_rid.9 \ rman.9 rman_set_virtual.9 MLINKS+=rmlock.9 rm_assert.9 \ rmlock.9 rm_destroy.9 \ rmlock.9 rm_init.9 \ rmlock.9 rm_init_flags.9 \ rmlock.9 rm_rlock.9 \ rmlock.9 rm_runlock.9 \ rmlock.9 rm_sleep.9 \ rmlock.9 RM_SYSINIT.9 \ rmlock.9 rm_try_rlock.9 \ rmlock.9 rm_wlock.9 \ rmlock.9 rm_wowned.9 \ rmlock.9 rm_wunlock.9 MLINKS+=rtalloc.9 rtalloc1.9 \ rtalloc.9 rtalloc_ign.9 \ rtalloc.9 RT_ADDREF.9 \ rtalloc.9 RT_LOCK.9 \ rtalloc.9 RT_REMREF.9 \ rtalloc.9 RT_RTFREE.9 \ rtalloc.9 RT_UNLOCK.9 \ rtalloc.9 RTFREE_LOCKED.9 \ rtalloc.9 RTFREE.9 \ rtalloc.9 rtfree.9 \ rtalloc.9 rtalloc1_fib.9 \ rtalloc.9 rtalloc_ign_fib.9 \ rtalloc.9 rtalloc_fib.9 MLINKS+=runqueue.9 choosethread.9 \ runqueue.9 procrunnable.9 \ runqueue.9 remrunqueue.9 \ runqueue.9 setrunqueue.9 MLINKS+=rwlock.9 rw_assert.9 \ rwlock.9 rw_destroy.9 \ rwlock.9 rw_downgrade.9 \ rwlock.9 rw_init.9 \ rwlock.9 rw_init_flags.9 \ rwlock.9 rw_initialized.9 \ rwlock.9 rw_rlock.9 \ rwlock.9 rw_runlock.9 \ rwlock.9 rw_unlock.9 \ rwlock.9 rw_sleep.9 \ rwlock.9 RW_SYSINIT.9 \ rwlock.9 rw_try_rlock.9 \ rwlock.9 rw_try_upgrade.9 \ rwlock.9 rw_try_wlock.9 \ rwlock.9 rw_wlock.9 \ rwlock.9 rw_wowned.9 \ rwlock.9 rw_wunlock.9 MLINKS+=sbuf.9 sbuf_bcat.9 \ sbuf.9 sbuf_bcopyin.9 \ sbuf.9 sbuf_bcpy.9 \ sbuf.9 sbuf_cat.9 \ sbuf.9 sbuf_clear.9 \ sbuf.9 sbuf_copyin.9 \ sbuf.9 sbuf_cpy.9 \ sbuf.9 sbuf_data.9 \ sbuf.9 sbuf_delete.9 \ sbuf.9 sbuf_done.9 \ sbuf.9 sbuf_error.9 \ sbuf.9 sbuf_finish.9 \ sbuf.9 sbuf_len.9 \ sbuf.9 sbuf_new.9 \ sbuf.9 sbuf_new_auto.9 \ sbuf.9 sbuf_new_for_sysctl.9 \ sbuf.9 sbuf_printf.9 \ sbuf.9 sbuf_putc.9 \ sbuf.9 sbuf_set_drain.9 \ sbuf.9 sbuf_setpos.9 \ sbuf.9 sbuf_start_section.9 \ sbuf.9 sbuf_end_section.9 \ sbuf.9 sbuf_trim.9 \ sbuf.9 sbuf_vprintf.9 MLINKS+=scheduler.9 curpriority_cmp.9 \ scheduler.9 maybe_resched.9 \ scheduler.9 propagate_priority.9 \ scheduler.9 resetpriority.9 \ scheduler.9 roundrobin.9 \ scheduler.9 roundrobin_interval.9 \ scheduler.9 schedclock.9 \ scheduler.9 schedcpu.9 \ scheduler.9 sched_setup.9 \ scheduler.9 setrunnable.9 \ scheduler.9 updatepri.9 MLINKS+=SDT.9 SDT_PROVIDER_DECLARE.9 \ SDT.9 SDT_PROVIDER_DEFINE.9 \ SDT.9 SDT_PROBE_DECLARE.9 \ SDT.9 SDT_PROBE_DEFINE.9 \ SDT.9 SDT_PROBE.9 MLINKS+=securelevel_gt.9 securelevel_ge.9 MLINKS+=selrecord.9 seldrain.9 \ selrecord.9 selwakeup.9 MLINKS+=sema.9 sema_destroy.9 \ sema.9 sema_init.9 \ sema.9 sema_post.9 \ sema.9 sema_timedwait.9 \ sema.9 sema_trywait.9 \ sema.9 sema_value.9 \ sema.9 sema_wait.9 MLINKS+=sf_buf.9 sf_buf_alloc.9 \ sf_buf.9 sf_buf_free.9 \ sf_buf.9 sf_buf_kva.9 \ sf_buf.9 sf_buf_page.9 MLINKS+=sglist.9 sglist_alloc.9 \ sglist.9 sglist_append.9 \ sglist.9 sglist_append_bio.9 \ sglist.9 sglist_append_mbuf.9 \ sglist.9 sglist_append_phys.9 \ sglist.9 sglist_append_uio.9 \ sglist.9 sglist_append_user.9 \ sglist.9 sglist_append_vmpages.9 \ sglist.9 sglist_build.9 \ sglist.9 sglist_clone.9 \ sglist.9 sglist_consume_uio.9 \ sglist.9 sglist_count.9 \ sglist.9 sglist_count_vmpages.9 \ sglist.9 sglist_free.9 \ sglist.9 sglist_hold.9 \ sglist.9 sglist_init.9 \ sglist.9 sglist_join.9 \ sglist.9 sglist_length.9 \ sglist.9 sglist_reset.9 \ sglist.9 sglist_slice.9 \ sglist.9 sglist_split.9 MLINKS+=shm_map.9 shm_unmap.9 MLINKS+=signal.9 cursig.9 \ signal.9 execsigs.9 \ signal.9 issignal.9 \ signal.9 killproc.9 \ signal.9 pgsigio.9 \ signal.9 postsig.9 \ signal.9 SETSETNEQ.9 \ signal.9 SETSETOR.9 \ signal.9 SIGADDSET.9 \ signal.9 SIG_CONTSIGMASK.9 \ signal.9 SIGDELSET.9 \ signal.9 SIGEMPTYSET.9 \ signal.9 sigexit.9 \ signal.9 SIGFILLSET.9 \ signal.9 siginit.9 \ signal.9 SIGISEMPTY.9 \ signal.9 SIGISMEMBER.9 \ signal.9 SIGNOTEMPTY.9 \ signal.9 signotify.9 \ signal.9 SIGPENDING.9 \ signal.9 SIGSETAND.9 \ signal.9 SIGSETCANTMASK.9 \ signal.9 SIGSETEQ.9 \ signal.9 SIGSETNAND.9 \ signal.9 SIG_STOPSIGMASK.9 \ signal.9 trapsignal.9 MLINKS+=sleep.9 msleep.9 \ sleep.9 msleep_sbt.9 \ sleep.9 msleep_spin.9 \ sleep.9 msleep_spin_sbt.9 \ sleep.9 pause.9 \ sleep.9 pause_sbt.9 \ sleep.9 tsleep.9 \ sleep.9 tsleep_sbt.9 \ sleep.9 wakeup.9 \ sleep.9 wakeup_one.9 MLINKS+=sleepqueue.9 init_sleepqueues.9 \ sleepqueue.9 sleepq_abort.9 \ sleepqueue.9 sleepq_add.9 \ sleepqueue.9 sleepq_alloc.9 \ sleepqueue.9 sleepq_broadcast.9 \ sleepqueue.9 sleepq_free.9 \ sleepqueue.9 sleepq_lookup.9 \ sleepqueue.9 sleepq_lock.9 \ sleepqueue.9 sleepq_release.9 \ sleepqueue.9 sleepq_remove.9 \ sleepqueue.9 sleepq_set_timeout.9 \ sleepqueue.9 sleepq_set_timeout_sbt.9 \ sleepqueue.9 sleepq_signal.9 \ sleepqueue.9 sleepq_sleepcnt.9 \ sleepqueue.9 sleepq_timedwait.9 \ sleepqueue.9 sleepq_timedwait_sig.9 \ sleepqueue.9 sleepq_type.9 \ sleepqueue.9 sleepq_wait.9 \ sleepqueue.9 sleepq_wait_sig.9 MLINKS+=socket.9 soabort.9 \ socket.9 soaccept.9 \ socket.9 sobind.9 \ socket.9 socheckuid.9 \ socket.9 soclose.9 \ socket.9 soconnect.9 \ socket.9 socreate.9 \ socket.9 sodisconnect.9 \ socket.9 sodupsockaddr.9 \ socket.9 sofree.9 \ socket.9 sogetopt.9 \ socket.9 sohasoutofband.9 \ socket.9 solisten.9 \ socket.9 solisten_proto.9 \ socket.9 solisten_proto_check.9 \ socket.9 sonewconn.9 \ socket.9 sooptcopyin.9 \ socket.9 sooptcopyout.9 \ socket.9 sopoll.9 \ socket.9 sopoll_generic.9 \ socket.9 soreceive.9 \ socket.9 soreceive_dgram.9 \ socket.9 soreceive_generic.9 \ socket.9 soreceive_stream.9 \ socket.9 soreserve.9 \ socket.9 sorflush.9 \ socket.9 sosend.9 \ socket.9 sosend_dgram.9 \ socket.9 sosend_generic.9 \ socket.9 sosetopt.9 \ socket.9 soshutdown.9 \ socket.9 sotoxsocket.9 \ socket.9 soupcall_clear.9 \ socket.9 soupcall_set.9 \ socket.9 sowakeup.9 MLINKS+=stack.9 stack_copy.9 \ stack.9 stack_create.9 \ stack.9 stack_destroy.9 \ stack.9 stack_print.9 \ stack.9 stack_print_ddb.9 \ stack.9 stack_print_short.9 \ stack.9 stack_print_short_ddb.9 \ stack.9 stack_put.9 \ stack.9 stack_save.9 \ stack.9 stack_sbuf_print.9 \ stack.9 stack_sbuf_print_ddb.9 \ stack.9 stack_zero.9 MLINKS+=store.9 subyte.9 \ store.9 suswintr.9 \ store.9 suword.9 \ store.9 suword16.9 \ store.9 suword32.9 \ store.9 suword64.9 MLINKS+=swi.9 swi_add.9 \ swi.9 swi_remove.9 \ swi.9 swi_sched.9 MLINKS+=sx.9 sx_assert.9 \ sx.9 sx_destroy.9 \ sx.9 sx_downgrade.9 \ sx.9 sx_init.9 \ sx.9 sx_init_flags.9 \ sx.9 sx_sleep.9 \ sx.9 sx_slock.9 \ sx.9 sx_slock_sig.9 \ sx.9 sx_sunlock.9 \ sx.9 SX_SYSINIT.9 \ sx.9 sx_try_slock.9 \ sx.9 sx_try_upgrade.9 \ sx.9 sx_try_xlock.9 \ sx.9 sx_unlock.9 \ sx.9 sx_xholder.9 \ sx.9 sx_xlock.9 \ sx.9 sx_xlock_sig.9 \ sx.9 sx_xlocked.9 \ sx.9 sx_xunlock.9 MLINKS+=sysctl.9 SYSCTL_DECL.9 \ sysctl.9 SYSCTL_ADD_INT.9 \ sysctl.9 SYSCTL_ADD_LONG.9 \ sysctl.9 SYSCTL_ADD_NODE.9 \ sysctl.9 SYSCTL_ADD_OPAQUE.9 \ sysctl.9 SYSCTL_ADD_PROC.9 \ sysctl.9 SYSCTL_ADD_QUAD.9 \ sysctl.9 SYSCTL_ADD_ROOT_NODE.9 \ sysctl.9 SYSCTL_ADD_S8.9 \ sysctl.9 SYSCTL_ADD_S16.9 \ sysctl.9 SYSCTL_ADD_S32.9 \ sysctl.9 SYSCTL_ADD_S64.9 \ sysctl.9 SYSCTL_ADD_STRING.9 \ sysctl.9 SYSCTL_ADD_STRUCT.9 \ sysctl.9 SYSCTL_ADD_U8.9 \ sysctl.9 SYSCTL_ADD_U16.9 \ sysctl.9 SYSCTL_ADD_U32.9 \ sysctl.9 SYSCTL_ADD_U64.9 \ sysctl.9 SYSCTL_ADD_UAUTO.9 \ sysctl.9 SYSCTL_ADD_UINT.9 \ sysctl.9 SYSCTL_ADD_ULONG.9 \ sysctl.9 SYSCTL_ADD_UQUAD.9 \ sysctl.9 SYSCTL_CHILDREN.9 \ sysctl.9 SYSCTL_STATIC_CHILDREN.9 \ sysctl.9 SYSCTL_NODE_CHILDREN.9 \ sysctl.9 SYSCTL_PARENT.9 \ sysctl.9 SYSCTL_INT.9 \ sysctl.9 SYSCTL_LONG.9 \ sysctl.9 SYSCTL_NODE.9 \ sysctl.9 SYSCTL_OPAQUE.9 \ sysctl.9 SYSCTL_PROC.9 \ sysctl.9 SYSCTL_QUAD.9 \ sysctl.9 SYSCTL_ROOT_NODE.9 \ sysctl.9 SYSCTL_S8.9 \ sysctl.9 SYSCTL_S16.9 \ sysctl.9 SYSCTL_S32.9 \ sysctl.9 SYSCTL_S64.9 \ sysctl.9 SYSCTL_STRING.9 \ sysctl.9 SYSCTL_STRUCT.9 \ sysctl.9 SYSCTL_U8.9 \ sysctl.9 SYSCTL_U16.9 \ sysctl.9 SYSCTL_U32.9 \ sysctl.9 SYSCTL_U64.9 \ sysctl.9 SYSCTL_UINT.9 \ sysctl.9 SYSCTL_ULONG.9 \ sysctl.9 SYSCTL_UQUAD.9 MLINKS+=sysctl_add_oid.9 sysctl_move_oid.9 \ sysctl_add_oid.9 sysctl_remove_oid.9 \ sysctl_add_oid.9 sysctl_remove_name.9 MLINKS+=sysctl_ctx_init.9 sysctl_ctx_entry_add.9 \ sysctl_ctx_init.9 sysctl_ctx_entry_del.9 \ sysctl_ctx_init.9 sysctl_ctx_entry_find.9 \ sysctl_ctx_init.9 sysctl_ctx_free.9 MLINKS+=SYSINIT.9 SYSUNINIT.9 MLINKS+=taskqueue.9 TASK_INIT.9 \ taskqueue.9 TASK_INITIALIZER.9 \ taskqueue.9 taskqueue_block.9 \ taskqueue.9 taskqueue_cancel.9 \ taskqueue.9 taskqueue_cancel_timeout.9 \ taskqueue.9 taskqueue_create.9 \ taskqueue.9 taskqueue_create_fast.9 \ taskqueue.9 TASKQUEUE_DECLARE.9 \ taskqueue.9 TASKQUEUE_DEFINE.9 \ taskqueue.9 TASKQUEUE_DEFINE_THREAD.9 \ taskqueue.9 taskqueue_drain.9 \ taskqueue.9 taskqueue_drain_all.9 \ taskqueue.9 taskqueue_drain_timeout.9 \ taskqueue.9 taskqueue_enqueue.9 \ taskqueue.9 taskqueue_enqueue_timeout.9 \ taskqueue.9 TASKQUEUE_FAST_DEFINE.9 \ taskqueue.9 TASKQUEUE_FAST_DEFINE_THREAD.9 \ taskqueue.9 taskqueue_free.9 \ taskqueue.9 taskqueue_member.9 \ taskqueue.9 taskqueue_run.9 \ taskqueue.9 taskqueue_set_callback.9 \ taskqueue.9 taskqueue_start_threads.9 \ taskqueue.9 taskqueue_start_threads_pinned.9 \ taskqueue.9 taskqueue_unblock.9 \ taskqueue.9 TIMEOUT_TASK_INIT.9 +MLINKS+=tcp_functions.9 register_tcp_functions.9 \ + tcp_functions.9 deregister_tcp_functions.9 MLINKS+=time.9 boottime.9 \ time.9 time_second.9 \ time.9 time_uptime.9 MLINKS+=timeout.9 callout.9 \ timeout.9 callout_active.9 \ timeout.9 callout_async_drain.9 \ timeout.9 callout_deactivate.9 \ timeout.9 callout_drain.9 \ timeout.9 callout_handle_init.9 \ timeout.9 callout_init.9 \ timeout.9 callout_init_mtx.9 \ timeout.9 callout_init_rm.9 \ timeout.9 callout_init_rw.9 \ timeout.9 callout_pending.9 \ timeout.9 callout_reset.9 \ timeout.9 callout_reset_curcpu.9 \ timeout.9 callout_reset_on.9 \ timeout.9 callout_reset_sbt.9 \ timeout.9 callout_reset_sbt_curcpu.9 \ timeout.9 callout_reset_sbt_on.9 \ timeout.9 callout_schedule.9 \ timeout.9 callout_schedule_curcpu.9 \ timeout.9 callout_schedule_on.9 \ timeout.9 callout_schedule_sbt.9 \ timeout.9 callout_schedule_sbt_curcpu.9 \ timeout.9 callout_schedule_sbt_on.9 \ timeout.9 callout_stop.9 \ timeout.9 untimeout.9 MLINKS+=ucred.9 cred_update_thread.9 \ ucred.9 crcopy.9 \ ucred.9 crcopysafe.9 \ ucred.9 crdup.9 \ ucred.9 crfree.9 \ ucred.9 crget.9 \ ucred.9 crhold.9 \ ucred.9 crsetgroups.9 \ ucred.9 crshared.9 \ ucred.9 cru2x.9 MLINKS+=uidinfo.9 uifind.9 \ uidinfo.9 uifree.9 \ uidinfo.9 uihashinit.9 \ uidinfo.9 uihold.9 MLINKS+=uio.9 uiomove.9 \ uio.9 uiomove_nofault.9 .if ${MK_USB} != "no" MAN+= usbdi.9 MLINKS+=usbdi.9 usbd_do_request.9 \ usbdi.9 usbd_do_request_flags.9 \ usbdi.9 usbd_errstr.9 \ usbdi.9 usbd_lookup_id_by_info.9 \ usbdi.9 usbd_lookup_id_by_uaa.9 \ usbdi.9 usbd_transfer_clear_stall.9 \ usbdi.9 usbd_transfer_drain.9 \ usbdi.9 usbd_transfer_pending.9 \ usbdi.9 usbd_transfer_poll.9 \ usbdi.9 usbd_transfer_setup.9 \ usbdi.9 usbd_transfer_start.9 \ usbdi.9 usbd_transfer_stop.9 \ usbdi.9 usbd_transfer_submit.9 \ usbdi.9 usbd_transfer_unsetup.9 \ usbdi.9 usbd_xfer_clr_flag.9 \ usbdi.9 usbd_xfer_frame_data.9 \ usbdi.9 usbd_xfer_frame_len.9 \ usbdi.9 usbd_xfer_get_frame.9 \ usbdi.9 usbd_xfer_get_priv.9 \ usbdi.9 usbd_xfer_is_stalled.9 \ usbdi.9 usbd_xfer_max_framelen.9 \ usbdi.9 usbd_xfer_max_frames.9 \ usbdi.9 usbd_xfer_max_len.9 \ usbdi.9 usbd_xfer_set_flag.9 \ usbdi.9 usbd_xfer_set_frame_data.9 \ usbdi.9 usbd_xfer_set_frame_len.9 \ usbdi.9 usbd_xfer_set_frame_offset.9 \ usbdi.9 usbd_xfer_set_frames.9 \ usbdi.9 usbd_xfer_set_interval.9 \ usbdi.9 usbd_xfer_set_priv.9 \ usbdi.9 usbd_xfer_set_stall.9 \ usbdi.9 usbd_xfer_set_timeout.9 \ usbdi.9 usbd_xfer_softc.9 \ usbdi.9 usbd_xfer_state.9 \ usbdi.9 usbd_xfer_status.9 \ usbdi.9 usb_fifo_alloc_buffer.9 \ usbdi.9 usb_fifo_attach.9 \ usbdi.9 usb_fifo_detach.9 \ usbdi.9 usb_fifo_free_buffer.9 \ usbdi.9 usb_fifo_get_data.9 \ usbdi.9 usb_fifo_get_data_buffer.9 \ usbdi.9 usb_fifo_get_data_error.9 \ usbdi.9 usb_fifo_get_data_linear.9 \ usbdi.9 usb_fifo_put_bytes_max.9 \ usbdi.9 usb_fifo_put_data.9 \ usbdi.9 usb_fifo_put_data_buffer.9 \ usbdi.9 usb_fifo_put_data_error.9 \ usbdi.9 usb_fifo_put_data_linear.9 \ usbdi.9 usb_fifo_reset.9 \ usbdi.9 usb_fifo_softc.9 \ usbdi.9 usb_fifo_wakeup.9 .endif MLINKS+=vcount.9 count_dev.9 MLINKS+=vfsconf.9 vfs_modevent.9 \ vfsconf.9 vfs_register.9 \ vfsconf.9 vfs_unregister.9 MLINKS+=vfs_getopt.9 vfs_copyopt.9 \ vfs_getopt.9 vfs_filteropt.9 \ vfs_getopt.9 vfs_flagopt.9 \ vfs_getopt.9 vfs_getopts.9 \ vfs_getopt.9 vfs_scanopt.9 \ vfs_getopt.9 vfs_setopt.9 \ vfs_getopt.9 vfs_setopt_part.9 \ vfs_getopt.9 vfs_setopts.9 MLINKS+=vhold.9 vdrop.9 \ vhold.9 vdropl.9 \ vhold.9 vholdl.9 MLINKS+=vmem.9 vmem_add.9 \ vmem.9 vmem_alloc.9 \ vmem.9 vmem_create.9 \ vmem.9 vmem_destroy.9 \ vmem.9 vmem_free.9 \ vmem.9 vmem_xalloc.9 \ vmem.9 vmem_xfree.9 MLINKS+=vm_map_lock.9 vm_map_lock_downgrade.9 \ vm_map_lock.9 vm_map_lock_read.9 \ vm_map_lock.9 vm_map_lock_upgrade.9 \ vm_map_lock.9 vm_map_trylock.9 \ vm_map_lock.9 vm_map_trylock_read.9 \ vm_map_lock.9 vm_map_unlock.9 \ vm_map_lock.9 vm_map_unlock_read.9 MLINKS+=vm_map_lookup.9 vm_map_lookup_done.9 MLINKS+=vm_map_max.9 vm_map_min.9 \ vm_map_max.9 vm_map_pmap.9 MLINKS+=vm_map_stack.9 vm_map_growstack.9 MLINKS+=vm_map_wire.9 vm_map_unwire.9 MLINKS+=vm_page_bits.9 vm_page_clear_dirty.9 \ vm_page_bits.9 vm_page_dirty.9 \ vm_page_bits.9 vm_page_is_valid.9 \ vm_page_bits.9 vm_page_set_invalid.9 \ vm_page_bits.9 vm_page_set_validclean.9 \ vm_page_bits.9 vm_page_test_dirty.9 \ vm_page_bits.9 vm_page_undirty.9 \ vm_page_bits.9 vm_page_zero_invalid.9 MLINKS+=vm_page_busy.9 vm_page_busied.9 \ vm_page_busy.9 vm_page_busy_downgrade.9 \ vm_page_busy.9 vm_page_busy_sleep.9 \ vm_page_busy.9 vm_page_sbusied.9 \ vm_page_busy.9 vm_page_sbusy.9 \ vm_page_busy.9 vm_page_sleep_if_busy.9 \ vm_page_busy.9 vm_page_sunbusy.9 \ vm_page_busy.9 vm_page_trysbusy.9 \ vm_page_busy.9 vm_page_tryxbusy.9 \ vm_page_busy.9 vm_page_xbusied.9 \ vm_page_busy.9 vm_page_xbusy.9 \ vm_page_busy.9 vm_page_xunbusy.9 \ vm_page_busy.9 vm_page_assert_sbusied.9 \ vm_page_busy.9 vm_page_assert_unbusied.9 \ vm_page_busy.9 vm_page_assert_xbusied.9 MLINKS+=vm_page_aflag.9 vm_page_aflag_clear.9 \ vm_page_aflag.9 vm_page_aflag_set.9 \ vm_page_aflag.9 vm_page_reference.9 MLINKS+=vm_page_free.9 vm_page_free_toq.9 \ vm_page_free.9 vm_page_free_zero.9 \ vm_page_free.9 vm_page_try_to_free.9 MLINKS+=vm_page_hold.9 vm_page_unhold.9 MLINKS+=vm_page_insert.9 vm_page_remove.9 MLINKS+=vm_page_wire.9 vm_page_unwire.9 MLINKS+=VOP_ACCESS.9 VOP_ACCESSX.9 MLINKS+=VOP_ATTRIB.9 VOP_GETATTR.9 \ VOP_ATTRIB.9 VOP_SETATTR.9 MLINKS+=VOP_CREATE.9 VOP_MKDIR.9 \ VOP_CREATE.9 VOP_MKNOD.9 \ VOP_CREATE.9 VOP_SYMLINK.9 MLINKS+=VOP_GETPAGES.9 VOP_PUTPAGES.9 MLINKS+=VOP_INACTIVE.9 VOP_RECLAIM.9 MLINKS+=VOP_LOCK.9 vn_lock.9 \ VOP_LOCK.9 VOP_ISLOCKED.9 \ VOP_LOCK.9 VOP_UNLOCK.9 MLINKS+=VOP_OPENCLOSE.9 VOP_CLOSE.9 \ VOP_OPENCLOSE.9 VOP_OPEN.9 MLINKS+=VOP_RDWR.9 VOP_READ.9 \ VOP_RDWR.9 VOP_WRITE.9 MLINKS+=VOP_REMOVE.9 VOP_RMDIR.9 MLINKS+=vnet.9 vimage.9 MLINKS+=vref.9 VREF.9 MLINKS+=vrele.9 vput.9 \ vrele.9 vunref.9 MLINKS+=vslock.9 vsunlock.9 MLINKS+=zone.9 uma.9 \ zone.9 uma_find_refcnt.9 \ zone.9 uma_zalloc.9 \ zone.9 uma_zalloc_arg.9 \ zone.9 uma_zcreate.9 \ zone.9 uma_zdestroy.9 \ zone.9 uma_zfree.9 \ zone.9 uma_zfree_arg.9 \ zone.9 uma_zone_get_cur.9 \ zone.9 uma_zone_get_max.9 \ zone.9 uma_zone_set_max.9 \ zone.9 uma_zone_set_warning.9 \ zone.9 uma_zone_set_maxaction.9 .include Index: projects/vnet/share/man/man9/tcp_functions.9 =================================================================== --- projects/vnet/share/man/man9/tcp_functions.9 (nonexistent) +++ projects/vnet/share/man/man9/tcp_functions.9 (revision 302277) @@ -0,0 +1,285 @@ +.\" +.\" Copyright (c) 2016 Jonathan Looney +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR +.\" ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd June 28, 2016 +.Dt TCP_FUNCTIONS 9 +.Os +.Sh NAME +.Nm tcp_functions +.Nd Alternate TCP Stack Framework +.Sh SYNOPSIS +.In netinet/tcp.h +.In netinet/tcp_var.h +.Ft int +.Fn register_tcp_functions "struct tcp_function_block *blk" "int wait" +.Ft int +.Fn deregister_tcp_functions "struct tcp_function_block *blk" +.Sh DESCRIPTION +The +.Nm +framework allows a kernel developer to implement alternate TCP stacks. +The alternate stacks can be compiled in the kernel or can be implemented in +loadable kernel modules. +This functionality is intended to encourage experimentation with the TCP stack +and to allow alternate behaviors to be deployed for different TCP connections +on a single system. +.Pp +A system administrator can set a system default stack. +By default, all TCP connections will use the system default stack. +Additionally, users can specify a particular stack to use on a per-connection +basis. +(See +.Xr tcp 4 +for details on setting the system default stack, or selecting a specific stack +for a given connection.) +.Pp +This man page treats "TCP stacks" as synonymous with "function blocks". +This is intentional. +A "TCP stack" is a collection of functions that implement a set of behavior. +Therefore, an alternate "function block" defines an alternate "TCP stack". +.Pp +.Nm +modules must call the +.Fn register_tcp_functions +function during initialization and successfully call the +.Fn deregister_tcp_functions +function prior to allowing the module to be unloaded. +.Pp +The +.Fn register_tcp_functions +function requests that the system add a specified function block to the system. +.Pp +The +.Fn deregister_tcp_functions +function requests that the system remove a specified function block from the +system. +If the call fails because sockets are still using the specified function block, +the system will mark the function block as being in the process of being +removed. +This will prevent additional sockets from using the specified function block. +However, it will not impact sockets that are already using the function block. +.Pp +The +.Fa blk +argument is a pointer to a +.Vt "struct tcp_function_block" , +which is explained below (see +.Sx Function Block Structure ) . +The +.Fa wait +argument is used as the +.Fa flags +argument to +.Xr malloc 9 , +and must be set to one of the valid values defined in that man page. +.Ss Function Block Structure +The +.Fa blk argument is a pointer to a +.Vt "struct tcp_function_block" , +which has the following members: +.Bd -literal -offset indent +struct tcp_function_block { + char tfb_tcp_block_name[TCP_FUNCTION_NAME_LEN_MAX]; + int (*tfb_tcp_output)(struct tcpcb *); + void (*tfb_tcp_do_segment)(struct mbuf *, struct tcphdr *, + struct socket *, struct tcpcb *, + int, int, uint8_t, + int); + int (*tfb_tcp_ctloutput)(struct socket *so, + struct sockopt *sopt, + struct inpcb *inp, struct tcpcb *tp); + /* Optional memory allocation/free routine */ + void (*tfb_tcp_fb_init)(struct tcpcb *); + void (*tfb_tcp_fb_fini)(struct tcpcb *); + /* Optional timers, must define all if you define one */ + int (*tfb_tcp_timer_stop_all)(struct tcpcb *); + void (*tfb_tcp_timer_activate)(struct tcpcb *, + uint32_t, u_int); + int (*tfb_tcp_timer_active)(struct tcpcb *, uint32_t); + void (*tfb_tcp_timer_stop)(struct tcpcb *, uint32_t); + void (*tfb_tcp_rexmit_tmr)(struct tcpcb *); + volatile uint32_t tfb_refcnt; + uint32_t tfb_flags; +}; +.Ed +.Pp +The +.Va tfb_tcp_block_name +field identifies the unique name of the TCP stack, and should be no longer than +TCP_FUNCTION_NAME_LEN_MAX-1 characters in length. +.Pp +The +.Va tfb_tcp_output , +.Va tfb_tcp_do_segment , +and +.Va tfb_tcp_ctloutput +fields are pointers to functions that perform the equivalent actions +as the default +.Fn tcp_output , +.Fn tcp_do_segment , +and +.Fn tcp_default_ctloutput +functions, respectively. +Each of these function pointers must be non-NULL. +.Pp +If a TCP stack needs to initialize data when a socket first selects the TCP +stack (or, when the socket is first opened), it should set a non-NULL +pointer in the +.Va tfb_tcp_fb_init +field. +Likewise, if a TCP stack needs to cleanup data when a socket stops using the +TCP stack (or, when the socket is closed), it should set a non-NULL pointer +in the +.Va tfb_tcp_fb_fini +field. +.Pp +If the TCP stack implements additional timers, the TCP stack should set a +non-NULL pointer in the +.Va tfb_tcp_timer_stop_all , +.Va tfb_tcp_timer_activate , +.Va tfb_tcp_timer_active , +and +.Va tfb_tcp_timer_stop +fields. +These fields should all be +.Dv NULL +or should all contain pointers to functions. +The +.Va tfb_tcp_timer_activate , +.Va tfb_tcp_timer_active , +and +.Va tfb_tcp_timer_stop +functions will be called when the +.Fn tcp_timer_activate , +.Fn tcp_timer_active , +and +.Fn tcp_timer_stop +functions, respectively, are called with a timer type other than the standard +types. +The functions defined by the TCP stack have the same semantics (both for +arguments and return values) as the normal timer functions they supplement. +.Pp +Additionally, a stack may define its own actions to take when the retransmit +timer fires by setting a non-NULL function pointer in the +.Va tfb_tcp_rexmit_tmr +field. +This function is called very early in the process of handling a retransmit +timer. +However, care must be taken to ensure the retransmit timer leaves the +TCP control block in a valid state for the remainder of the retransmit +timer logic. +.Pp +The +.Va tfb_refcnt +and +.Va tfb_flags +fields are used by the kernel's TCP code and will be initialized when the +TCP stack is registered. +.Ss Requirements for Alternate TCP Stacks +If the TCP stack needs to store data beyond what is stored in the default +TCP control block, the TCP stack can initialize its own per-connection storage. +The +.Va t_fb_ptr +field in the +.Vt "struct tcpcb" +control block structure has been reserved to hold a pointer to this +per-connection storage. +If the TCP stack uses this alternate storage, it should understand that the +value of the +.Va t_fb_ptr +pointer may not be initialized to +.Dv NULL . +Therefore, it should use a +.Va tfb_tcp_fb_init +function to initialize this field. +Additionally, it should use a +.Va tfb_tcp_fb_fini +function to deallocate storage when the socket is closed. +.Pp +It is understood that alternate TCP stacks may keep different sets of data. +However, in order to ensure that data is available to both the user and the +rest of the system in a standardized format, alternate TCP stacks must +update all fields in the TCP control block to the greatest extent practical. +.Sh RETURN VALUES +The +.Fn register_tcp_functions +and +.Fn deregister_tcp_functions +functions return zero on success and non-zero on failure. +In particular, the +.Fn deregister_tcp_functions +will return +.Er EBUSY +until no more connections are using the specified TCP stack. +A module calling +.Fn deregister_tcp_functions +must be prepared to wait until all connections have stopped using the +specified TCP stack. +.Sh ERRORS +The +.Fn register_tcp_functions +function will fail if: +.Bl -tag -width Er +.It Bq Er EINVAL +Any of the members of the +.Fa blk +argument are set incorrectly. +.It Bq Er ENOMEM +The function could not allocate memory for its internal data. +.It Bq Er EALREADY +A function block is already registered with the same name. +.El +The +.Fn deregister_tcp_functions +function will fail if: +.Bl -tag -width Er +.It Bq Er EPERM +The +.Fa blk +argument references the kernel's compiled-in default function block. +.It Bq Er EBUSY +The function block is still in use by one or more sockets, or is defined as +the current default function block. +.It Bq Er ENOENT +The +.Fa blk +argument references a function block that is not currently registered. +.Sh SEE ALSO +.Xr malloc 9 , +.Xr tcp 4 +.Sh HISTORY +This framework first appeared in +.Fx 11.0 . +.Sh AUTHORS +.An -nosplit +The +.Nm +framework was written by +.An Randall Stewart Aq Mt rrs@FreeBSD.org . +.Pp +This manual page was written by +.An Jonathan Looney Aq Mt jtl@FreeBSD.org . Property changes on: projects/vnet/share/man/man9/tcp_functions.9 ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/vnet/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c =================================================================== --- projects/vnet/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c (revision 302276) +++ projects/vnet/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c (revision 302277) @@ -1,7164 +1,7233 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright (c) 2011, 2015 by Delphix. All rights reserved. * Copyright (c) 2014 by Saso Kiselkov. All rights reserved. * Copyright 2015 Nexenta Systems, Inc. All rights reserved. */ /* * DVA-based Adjustable Replacement Cache * * While much of the theory of operation used here is * based on the self-tuning, low overhead replacement cache * presented by Megiddo and Modha at FAST 2003, there are some * significant differences: * * 1. The Megiddo and Modha model assumes any page is evictable. * Pages in its cache cannot be "locked" into memory. This makes * the eviction algorithm simple: evict the last page in the list. * This also make the performance characteristics easy to reason * about. Our cache is not so simple. At any given moment, some * subset of the blocks in the cache are un-evictable because we * have handed out a reference to them. Blocks are only evictable * when there are no external references active. This makes * eviction far more problematic: we choose to evict the evictable * blocks that are the "lowest" in the list. * * There are times when it is not possible to evict the requested * space. In these circumstances we are unable to adjust the cache * size. To prevent the cache growing unbounded at these times we * implement a "cache throttle" that slows the flow of new data * into the cache until we can make space available. * * 2. The Megiddo and Modha model assumes a fixed cache size. * Pages are evicted when the cache is full and there is a cache * miss. Our model has a variable sized cache. It grows with * high use, but also tries to react to memory pressure from the * operating system: decreasing its size when system memory is * tight. * * 3. The Megiddo and Modha model assumes a fixed page size. All * elements of the cache are therefore exactly the same size. So * when adjusting the cache size following a cache miss, its simply * a matter of choosing a single page to evict. In our model, we * have variable sized cache blocks (rangeing from 512 bytes to * 128K bytes). We therefore choose a set of blocks to evict to make * space for a cache miss that approximates as closely as possible * the space used by the new block. * * See also: "ARC: A Self-Tuning, Low Overhead Replacement Cache" * by N. Megiddo & D. Modha, FAST 2003 */ /* * The locking model: * * A new reference to a cache buffer can be obtained in two * ways: 1) via a hash table lookup using the DVA as a key, * or 2) via one of the ARC lists. The arc_read() interface * uses method 1, while the internal arc algorithms for * adjusting the cache use method 2. We therefore provide two * types of locks: 1) the hash table lock array, and 2) the * arc list locks. * * Buffers do not have their own mutexes, rather they rely on the * hash table mutexes for the bulk of their protection (i.e. most * fields in the arc_buf_hdr_t are protected by these mutexes). * * buf_hash_find() returns the appropriate mutex (held) when it * locates the requested buffer in the hash table. It returns * NULL for the mutex if the buffer was not in the table. * * buf_hash_remove() expects the appropriate hash mutex to be * already held before it is invoked. * * Each arc state also has a mutex which is used to protect the * buffer list associated with the state. When attempting to * obtain a hash table lock while holding an arc list lock you * must use: mutex_tryenter() to avoid deadlock. Also note that * the active state mutex must be held before the ghost state mutex. * * Arc buffers may have an associated eviction callback function. * This function will be invoked prior to removing the buffer (e.g. * in arc_do_user_evicts()). Note however that the data associated * with the buffer may be evicted prior to the callback. The callback * must be made with *no locks held* (to prevent deadlock). Additionally, * the users of callbacks must ensure that their private data is * protected from simultaneous callbacks from arc_clear_callback() * and arc_do_user_evicts(). * * Note that the majority of the performance stats are manipulated * with atomic operations. * * The L2ARC uses the l2ad_mtx on each vdev for the following: * * - L2ARC buflist creation * - L2ARC buflist eviction * - L2ARC write completion, which walks L2ARC buflists * - ARC header destruction, as it removes from L2ARC buflists * - ARC header release, as it removes from L2ARC buflists */ #include #include #include #include #include #include #include #include #include #include #ifdef _KERNEL #include #include #endif #include #include #include #include #include #include #ifdef illumos #ifndef _KERNEL /* set with ZFS_DEBUG=watch, to enable watchpoints on frozen buffers */ boolean_t arc_watch = B_FALSE; int arc_procfd; #endif #endif /* illumos */ static kmutex_t arc_reclaim_lock; static kcondvar_t arc_reclaim_thread_cv; static boolean_t arc_reclaim_thread_exit; static kcondvar_t arc_reclaim_waiters_cv; static kmutex_t arc_user_evicts_lock; static kcondvar_t arc_user_evicts_cv; static boolean_t arc_user_evicts_thread_exit; static kmutex_t arc_dnlc_evicts_lock; static kcondvar_t arc_dnlc_evicts_cv; static boolean_t arc_dnlc_evicts_thread_exit; uint_t arc_reduce_dnlc_percent = 3; /* * The number of headers to evict in arc_evict_state_impl() before * dropping the sublist lock and evicting from another sublist. A lower * value means we're more likely to evict the "correct" header (i.e. the * oldest header in the arc state), but comes with higher overhead * (i.e. more invocations of arc_evict_state_impl()). */ int zfs_arc_evict_batch_limit = 10; /* * The number of sublists used for each of the arc state lists. If this * is not set to a suitable value by the user, it will be configured to * the number of CPUs on the system in arc_init(). */ int zfs_arc_num_sublists_per_state = 0; /* number of seconds before growing cache again */ static int arc_grow_retry = 60; /* shift of arc_c for calculating overflow limit in arc_get_data_buf */ int zfs_arc_overflow_shift = 8; /* shift of arc_c for calculating both min and max arc_p */ static int arc_p_min_shift = 4; /* log2(fraction of arc to reclaim) */ static int arc_shrink_shift = 7; /* * log2(fraction of ARC which must be free to allow growing). * I.e. If there is less than arc_c >> arc_no_grow_shift free memory, * when reading a new block into the ARC, we will evict an equal-sized block * from the ARC. * * This must be less than arc_shrink_shift, so that when we shrink the ARC, * we will still not allow it to grow. */ int arc_no_grow_shift = 5; /* * minimum lifespan of a prefetch block in clock ticks * (initialized in arc_init()) */ static int arc_min_prefetch_lifespan; /* * If this percent of memory is free, don't throttle. */ int arc_lotsfree_percent = 10; static int arc_dead; extern boolean_t zfs_prefetch_disable; /* * The arc has filled available memory and has now warmed up. */ static boolean_t arc_warm; /* * These tunables are for performance analysis. */ uint64_t zfs_arc_max; uint64_t zfs_arc_min; uint64_t zfs_arc_meta_limit = 0; uint64_t zfs_arc_meta_min = 0; int zfs_arc_grow_retry = 0; int zfs_arc_shrink_shift = 0; int zfs_arc_p_min_shift = 0; int zfs_disable_dup_eviction = 0; uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */ u_int zfs_arc_free_target = 0; +/* Absolute min for arc min / max is 16MB. */ +static uint64_t arc_abs_min = 16 << 20; + static int sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS); static int sysctl_vfs_zfs_arc_meta_limit(SYSCTL_HANDLER_ARGS); +static int sysctl_vfs_zfs_arc_max(SYSCTL_HANDLER_ARGS); +static int sysctl_vfs_zfs_arc_min(SYSCTL_HANDLER_ARGS); -#ifdef _KERNEL +#if defined(__FreeBSD__) && defined(_KERNEL) static void arc_free_target_init(void *unused __unused) { zfs_arc_free_target = vm_pageout_wakeup_thresh; } SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY, arc_free_target_init, NULL); TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit); TUNABLE_QUAD("vfs.zfs.arc_meta_min", &zfs_arc_meta_min); TUNABLE_INT("vfs.zfs.arc_shrink_shift", &zfs_arc_shrink_shift); SYSCTL_DECL(_vfs_zfs); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_max, CTLFLAG_RDTUN, &zfs_arc_max, 0, - "Maximum ARC size"); -SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_min, CTLFLAG_RDTUN, &zfs_arc_min, 0, - "Minimum ARC size"); +SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_max, CTLTYPE_U64 | CTLFLAG_RWTUN, + 0, sizeof(uint64_t), sysctl_vfs_zfs_arc_max, "QU", "Maximum ARC size"); +SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_min, CTLTYPE_U64 | CTLFLAG_RWTUN, + 0, sizeof(uint64_t), sysctl_vfs_zfs_arc_min, "QU", "Minimum ARC size"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_average_blocksize, CTLFLAG_RDTUN, &zfs_arc_average_blocksize, 0, "ARC average blocksize"); SYSCTL_INT(_vfs_zfs, OID_AUTO, arc_shrink_shift, CTLFLAG_RW, &arc_shrink_shift, 0, "log2(fraction of arc to reclaim)"); /* * We don't have a tunable for arc_free_target due to the dependency on * pagedaemon initialisation. */ SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_free_target, CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(u_int), sysctl_vfs_zfs_arc_free_target, "IU", "Desired number of free pages below which ARC triggers reclaim"); static int sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS) { u_int val; int err; val = zfs_arc_free_target; err = sysctl_handle_int(oidp, &val, 0, req); if (err != 0 || req->newptr == NULL) return (err); if (val < minfree) return (EINVAL); if (val > vm_cnt.v_page_count) return (EINVAL); zfs_arc_free_target = val; return (0); } /* * Must be declared here, before the definition of corresponding kstat * macro which uses the same names will confuse the compiler. */ SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_meta_limit, CTLTYPE_U64 | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(uint64_t), sysctl_vfs_zfs_arc_meta_limit, "QU", "ARC metadata limit"); #endif /* * Note that buffers can be in one of 6 states: * ARC_anon - anonymous (discussed below) * ARC_mru - recently used, currently cached * ARC_mru_ghost - recentely used, no longer in cache * ARC_mfu - frequently used, currently cached * ARC_mfu_ghost - frequently used, no longer in cache * ARC_l2c_only - exists in L2ARC but not other states * When there are no active references to the buffer, they are * are linked onto a list in one of these arc states. These are * the only buffers that can be evicted or deleted. Within each * state there are multiple lists, one for meta-data and one for * non-meta-data. Meta-data (indirect blocks, blocks of dnodes, * etc.) is tracked separately so that it can be managed more * explicitly: favored over data, limited explicitly. * * Anonymous buffers are buffers that are not associated with * a DVA. These are buffers that hold dirty block copies * before they are written to stable storage. By definition, * they are "ref'd" and are considered part of arc_mru * that cannot be freed. Generally, they will aquire a DVA * as they are written and migrate onto the arc_mru list. * * The ARC_l2c_only state is for buffers that are in the second * level ARC but no longer in any of the ARC_m* lists. The second * level ARC itself may also contain buffers that are in any of * the ARC_m* states - meaning that a buffer can exist in two * places. The reason for the ARC_l2c_only state is to keep the * buffer header in the hash table, so that reads that hit the * second level ARC benefit from these fast lookups. */ typedef struct arc_state { /* * list of evictable buffers */ multilist_t arcs_list[ARC_BUFC_NUMTYPES]; /* * total amount of evictable data in this state */ uint64_t arcs_lsize[ARC_BUFC_NUMTYPES]; /* * total amount of data in this state; this includes: evictable, * non-evictable, ARC_BUFC_DATA, and ARC_BUFC_METADATA. */ refcount_t arcs_size; } arc_state_t; /* The 6 states: */ static arc_state_t ARC_anon; static arc_state_t ARC_mru; static arc_state_t ARC_mru_ghost; static arc_state_t ARC_mfu; static arc_state_t ARC_mfu_ghost; static arc_state_t ARC_l2c_only; typedef struct arc_stats { kstat_named_t arcstat_hits; kstat_named_t arcstat_misses; kstat_named_t arcstat_demand_data_hits; kstat_named_t arcstat_demand_data_misses; kstat_named_t arcstat_demand_metadata_hits; kstat_named_t arcstat_demand_metadata_misses; kstat_named_t arcstat_prefetch_data_hits; kstat_named_t arcstat_prefetch_data_misses; kstat_named_t arcstat_prefetch_metadata_hits; kstat_named_t arcstat_prefetch_metadata_misses; kstat_named_t arcstat_mru_hits; kstat_named_t arcstat_mru_ghost_hits; kstat_named_t arcstat_mfu_hits; kstat_named_t arcstat_mfu_ghost_hits; kstat_named_t arcstat_allocated; kstat_named_t arcstat_deleted; /* * Number of buffers that could not be evicted because the hash lock * was held by another thread. The lock may not necessarily be held * by something using the same buffer, since hash locks are shared * by multiple buffers. */ kstat_named_t arcstat_mutex_miss; /* * Number of buffers skipped because they have I/O in progress, are * indrect prefetch buffers that have not lived long enough, or are * not from the spa we're trying to evict from. */ kstat_named_t arcstat_evict_skip; /* * Number of times arc_evict_state() was unable to evict enough * buffers to reach it's target amount. */ kstat_named_t arcstat_evict_not_enough; kstat_named_t arcstat_evict_l2_cached; kstat_named_t arcstat_evict_l2_eligible; kstat_named_t arcstat_evict_l2_ineligible; kstat_named_t arcstat_evict_l2_skip; kstat_named_t arcstat_hash_elements; kstat_named_t arcstat_hash_elements_max; kstat_named_t arcstat_hash_collisions; kstat_named_t arcstat_hash_chains; kstat_named_t arcstat_hash_chain_max; kstat_named_t arcstat_p; kstat_named_t arcstat_c; kstat_named_t arcstat_c_min; kstat_named_t arcstat_c_max; kstat_named_t arcstat_size; /* * Number of bytes consumed by internal ARC structures necessary * for tracking purposes; these structures are not actually * backed by ARC buffers. This includes arc_buf_hdr_t structures * (allocated via arc_buf_hdr_t_full and arc_buf_hdr_t_l2only * caches), and arc_buf_t structures (allocated via arc_buf_t * cache). */ kstat_named_t arcstat_hdr_size; /* * Number of bytes consumed by ARC buffers of type equal to * ARC_BUFC_DATA. This is generally consumed by buffers backing * on disk user data (e.g. plain file contents). */ kstat_named_t arcstat_data_size; /* * Number of bytes consumed by ARC buffers of type equal to * ARC_BUFC_METADATA. This is generally consumed by buffers * backing on disk data that is used for internal ZFS * structures (e.g. ZAP, dnode, indirect blocks, etc). */ kstat_named_t arcstat_metadata_size; /* * Number of bytes consumed by various buffers and structures * not actually backed with ARC buffers. This includes bonus * buffers (allocated directly via zio_buf_* functions), * dmu_buf_impl_t structures (allocated via dmu_buf_impl_t * cache), and dnode_t structures (allocated via dnode_t cache). */ kstat_named_t arcstat_other_size; /* * Total number of bytes consumed by ARC buffers residing in the * arc_anon state. This includes *all* buffers in the arc_anon * state; e.g. data, metadata, evictable, and unevictable buffers * are all included in this value. */ kstat_named_t arcstat_anon_size; /* * Number of bytes consumed by ARC buffers that meet the * following criteria: backing buffers of type ARC_BUFC_DATA, * residing in the arc_anon state, and are eligible for eviction * (e.g. have no outstanding holds on the buffer). */ kstat_named_t arcstat_anon_evictable_data; /* * Number of bytes consumed by ARC buffers that meet the * following criteria: backing buffers of type ARC_BUFC_METADATA, * residing in the arc_anon state, and are eligible for eviction * (e.g. have no outstanding holds on the buffer). */ kstat_named_t arcstat_anon_evictable_metadata; /* * Total number of bytes consumed by ARC buffers residing in the * arc_mru state. This includes *all* buffers in the arc_mru * state; e.g. data, metadata, evictable, and unevictable buffers * are all included in this value. */ kstat_named_t arcstat_mru_size; /* * Number of bytes consumed by ARC buffers that meet the * following criteria: backing buffers of type ARC_BUFC_DATA, * residing in the arc_mru state, and are eligible for eviction * (e.g. have no outstanding holds on the buffer). */ kstat_named_t arcstat_mru_evictable_data; /* * Number of bytes consumed by ARC buffers that meet the * following criteria: backing buffers of type ARC_BUFC_METADATA, * residing in the arc_mru state, and are eligible for eviction * (e.g. have no outstanding holds on the buffer). */ kstat_named_t arcstat_mru_evictable_metadata; /* * Total number of bytes that *would have been* consumed by ARC * buffers in the arc_mru_ghost state. The key thing to note * here, is the fact that this size doesn't actually indicate * RAM consumption. The ghost lists only consist of headers and * don't actually have ARC buffers linked off of these headers. * Thus, *if* the headers had associated ARC buffers, these * buffers *would have* consumed this number of bytes. */ kstat_named_t arcstat_mru_ghost_size; /* * Number of bytes that *would have been* consumed by ARC * buffers that are eligible for eviction, of type * ARC_BUFC_DATA, and linked off the arc_mru_ghost state. */ kstat_named_t arcstat_mru_ghost_evictable_data; /* * Number of bytes that *would have been* consumed by ARC * buffers that are eligible for eviction, of type * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state. */ kstat_named_t arcstat_mru_ghost_evictable_metadata; /* * Total number of bytes consumed by ARC buffers residing in the * arc_mfu state. This includes *all* buffers in the arc_mfu * state; e.g. data, metadata, evictable, and unevictable buffers * are all included in this value. */ kstat_named_t arcstat_mfu_size; /* * Number of bytes consumed by ARC buffers that are eligible for * eviction, of type ARC_BUFC_DATA, and reside in the arc_mfu * state. */ kstat_named_t arcstat_mfu_evictable_data; /* * Number of bytes consumed by ARC buffers that are eligible for * eviction, of type ARC_BUFC_METADATA, and reside in the * arc_mfu state. */ kstat_named_t arcstat_mfu_evictable_metadata; /* * Total number of bytes that *would have been* consumed by ARC * buffers in the arc_mfu_ghost state. See the comment above * arcstat_mru_ghost_size for more details. */ kstat_named_t arcstat_mfu_ghost_size; /* * Number of bytes that *would have been* consumed by ARC * buffers that are eligible for eviction, of type * ARC_BUFC_DATA, and linked off the arc_mfu_ghost state. */ kstat_named_t arcstat_mfu_ghost_evictable_data; /* * Number of bytes that *would have been* consumed by ARC * buffers that are eligible for eviction, of type * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state. */ kstat_named_t arcstat_mfu_ghost_evictable_metadata; kstat_named_t arcstat_l2_hits; kstat_named_t arcstat_l2_misses; kstat_named_t arcstat_l2_feeds; kstat_named_t arcstat_l2_rw_clash; kstat_named_t arcstat_l2_read_bytes; kstat_named_t arcstat_l2_write_bytes; kstat_named_t arcstat_l2_writes_sent; kstat_named_t arcstat_l2_writes_done; kstat_named_t arcstat_l2_writes_error; kstat_named_t arcstat_l2_writes_lock_retry; kstat_named_t arcstat_l2_evict_lock_retry; kstat_named_t arcstat_l2_evict_reading; kstat_named_t arcstat_l2_evict_l1cached; kstat_named_t arcstat_l2_free_on_write; kstat_named_t arcstat_l2_cdata_free_on_write; kstat_named_t arcstat_l2_abort_lowmem; kstat_named_t arcstat_l2_cksum_bad; kstat_named_t arcstat_l2_io_error; kstat_named_t arcstat_l2_size; kstat_named_t arcstat_l2_asize; kstat_named_t arcstat_l2_hdr_size; kstat_named_t arcstat_l2_compress_successes; kstat_named_t arcstat_l2_compress_zeros; kstat_named_t arcstat_l2_compress_failures; kstat_named_t arcstat_l2_padding_needed; kstat_named_t arcstat_l2_write_trylock_fail; kstat_named_t arcstat_l2_write_passed_headroom; kstat_named_t arcstat_l2_write_spa_mismatch; kstat_named_t arcstat_l2_write_in_l2; kstat_named_t arcstat_l2_write_hdr_io_in_progress; kstat_named_t arcstat_l2_write_not_cacheable; kstat_named_t arcstat_l2_write_full; kstat_named_t arcstat_l2_write_buffer_iter; kstat_named_t arcstat_l2_write_pios; kstat_named_t arcstat_l2_write_buffer_bytes_scanned; kstat_named_t arcstat_l2_write_buffer_list_iter; kstat_named_t arcstat_l2_write_buffer_list_null_iter; kstat_named_t arcstat_memory_throttle_count; kstat_named_t arcstat_duplicate_buffers; kstat_named_t arcstat_duplicate_buffers_size; kstat_named_t arcstat_duplicate_reads; kstat_named_t arcstat_meta_used; kstat_named_t arcstat_meta_limit; kstat_named_t arcstat_meta_max; kstat_named_t arcstat_meta_min; kstat_named_t arcstat_sync_wait_for_async; kstat_named_t arcstat_demand_hit_predictive_prefetch; } arc_stats_t; static arc_stats_t arc_stats = { { "hits", KSTAT_DATA_UINT64 }, { "misses", KSTAT_DATA_UINT64 }, { "demand_data_hits", KSTAT_DATA_UINT64 }, { "demand_data_misses", KSTAT_DATA_UINT64 }, { "demand_metadata_hits", KSTAT_DATA_UINT64 }, { "demand_metadata_misses", KSTAT_DATA_UINT64 }, { "prefetch_data_hits", KSTAT_DATA_UINT64 }, { "prefetch_data_misses", KSTAT_DATA_UINT64 }, { "prefetch_metadata_hits", KSTAT_DATA_UINT64 }, { "prefetch_metadata_misses", KSTAT_DATA_UINT64 }, { "mru_hits", KSTAT_DATA_UINT64 }, { "mru_ghost_hits", KSTAT_DATA_UINT64 }, { "mfu_hits", KSTAT_DATA_UINT64 }, { "mfu_ghost_hits", KSTAT_DATA_UINT64 }, { "allocated", KSTAT_DATA_UINT64 }, { "deleted", KSTAT_DATA_UINT64 }, { "mutex_miss", KSTAT_DATA_UINT64 }, { "evict_skip", KSTAT_DATA_UINT64 }, { "evict_not_enough", KSTAT_DATA_UINT64 }, { "evict_l2_cached", KSTAT_DATA_UINT64 }, { "evict_l2_eligible", KSTAT_DATA_UINT64 }, { "evict_l2_ineligible", KSTAT_DATA_UINT64 }, { "evict_l2_skip", KSTAT_DATA_UINT64 }, { "hash_elements", KSTAT_DATA_UINT64 }, { "hash_elements_max", KSTAT_DATA_UINT64 }, { "hash_collisions", KSTAT_DATA_UINT64 }, { "hash_chains", KSTAT_DATA_UINT64 }, { "hash_chain_max", KSTAT_DATA_UINT64 }, { "p", KSTAT_DATA_UINT64 }, { "c", KSTAT_DATA_UINT64 }, { "c_min", KSTAT_DATA_UINT64 }, { "c_max", KSTAT_DATA_UINT64 }, { "size", KSTAT_DATA_UINT64 }, { "hdr_size", KSTAT_DATA_UINT64 }, { "data_size", KSTAT_DATA_UINT64 }, { "metadata_size", KSTAT_DATA_UINT64 }, { "other_size", KSTAT_DATA_UINT64 }, { "anon_size", KSTAT_DATA_UINT64 }, { "anon_evictable_data", KSTAT_DATA_UINT64 }, { "anon_evictable_metadata", KSTAT_DATA_UINT64 }, { "mru_size", KSTAT_DATA_UINT64 }, { "mru_evictable_data", KSTAT_DATA_UINT64 }, { "mru_evictable_metadata", KSTAT_DATA_UINT64 }, { "mru_ghost_size", KSTAT_DATA_UINT64 }, { "mru_ghost_evictable_data", KSTAT_DATA_UINT64 }, { "mru_ghost_evictable_metadata", KSTAT_DATA_UINT64 }, { "mfu_size", KSTAT_DATA_UINT64 }, { "mfu_evictable_data", KSTAT_DATA_UINT64 }, { "mfu_evictable_metadata", KSTAT_DATA_UINT64 }, { "mfu_ghost_size", KSTAT_DATA_UINT64 }, { "mfu_ghost_evictable_data", KSTAT_DATA_UINT64 }, { "mfu_ghost_evictable_metadata", KSTAT_DATA_UINT64 }, { "l2_hits", KSTAT_DATA_UINT64 }, { "l2_misses", KSTAT_DATA_UINT64 }, { "l2_feeds", KSTAT_DATA_UINT64 }, { "l2_rw_clash", KSTAT_DATA_UINT64 }, { "l2_read_bytes", KSTAT_DATA_UINT64 }, { "l2_write_bytes", KSTAT_DATA_UINT64 }, { "l2_writes_sent", KSTAT_DATA_UINT64 }, { "l2_writes_done", KSTAT_DATA_UINT64 }, { "l2_writes_error", KSTAT_DATA_UINT64 }, { "l2_writes_lock_retry", KSTAT_DATA_UINT64 }, { "l2_evict_lock_retry", KSTAT_DATA_UINT64 }, { "l2_evict_reading", KSTAT_DATA_UINT64 }, { "l2_evict_l1cached", KSTAT_DATA_UINT64 }, { "l2_free_on_write", KSTAT_DATA_UINT64 }, { "l2_cdata_free_on_write", KSTAT_DATA_UINT64 }, { "l2_abort_lowmem", KSTAT_DATA_UINT64 }, { "l2_cksum_bad", KSTAT_DATA_UINT64 }, { "l2_io_error", KSTAT_DATA_UINT64 }, { "l2_size", KSTAT_DATA_UINT64 }, { "l2_asize", KSTAT_DATA_UINT64 }, { "l2_hdr_size", KSTAT_DATA_UINT64 }, { "l2_compress_successes", KSTAT_DATA_UINT64 }, { "l2_compress_zeros", KSTAT_DATA_UINT64 }, { "l2_compress_failures", KSTAT_DATA_UINT64 }, { "l2_padding_needed", KSTAT_DATA_UINT64 }, { "l2_write_trylock_fail", KSTAT_DATA_UINT64 }, { "l2_write_passed_headroom", KSTAT_DATA_UINT64 }, { "l2_write_spa_mismatch", KSTAT_DATA_UINT64 }, { "l2_write_in_l2", KSTAT_DATA_UINT64 }, { "l2_write_io_in_progress", KSTAT_DATA_UINT64 }, { "l2_write_not_cacheable", KSTAT_DATA_UINT64 }, { "l2_write_full", KSTAT_DATA_UINT64 }, { "l2_write_buffer_iter", KSTAT_DATA_UINT64 }, { "l2_write_pios", KSTAT_DATA_UINT64 }, { "l2_write_buffer_bytes_scanned", KSTAT_DATA_UINT64 }, { "l2_write_buffer_list_iter", KSTAT_DATA_UINT64 }, { "l2_write_buffer_list_null_iter", KSTAT_DATA_UINT64 }, { "memory_throttle_count", KSTAT_DATA_UINT64 }, { "duplicate_buffers", KSTAT_DATA_UINT64 }, { "duplicate_buffers_size", KSTAT_DATA_UINT64 }, { "duplicate_reads", KSTAT_DATA_UINT64 }, { "arc_meta_used", KSTAT_DATA_UINT64 }, { "arc_meta_limit", KSTAT_DATA_UINT64 }, { "arc_meta_max", KSTAT_DATA_UINT64 }, { "arc_meta_min", KSTAT_DATA_UINT64 }, { "sync_wait_for_async", KSTAT_DATA_UINT64 }, { "demand_hit_predictive_prefetch", KSTAT_DATA_UINT64 }, }; #define ARCSTAT(stat) (arc_stats.stat.value.ui64) #define ARCSTAT_INCR(stat, val) \ atomic_add_64(&arc_stats.stat.value.ui64, (val)) #define ARCSTAT_BUMP(stat) ARCSTAT_INCR(stat, 1) #define ARCSTAT_BUMPDOWN(stat) ARCSTAT_INCR(stat, -1) #define ARCSTAT_MAX(stat, val) { \ uint64_t m; \ while ((val) > (m = arc_stats.stat.value.ui64) && \ (m != atomic_cas_64(&arc_stats.stat.value.ui64, m, (val)))) \ continue; \ } #define ARCSTAT_MAXSTAT(stat) \ ARCSTAT_MAX(stat##_max, arc_stats.stat.value.ui64) /* * We define a macro to allow ARC hits/misses to be easily broken down by * two separate conditions, giving a total of four different subtypes for * each of hits and misses (so eight statistics total). */ #define ARCSTAT_CONDSTAT(cond1, stat1, notstat1, cond2, stat2, notstat2, stat) \ if (cond1) { \ if (cond2) { \ ARCSTAT_BUMP(arcstat_##stat1##_##stat2##_##stat); \ } else { \ ARCSTAT_BUMP(arcstat_##stat1##_##notstat2##_##stat); \ } \ } else { \ if (cond2) { \ ARCSTAT_BUMP(arcstat_##notstat1##_##stat2##_##stat); \ } else { \ ARCSTAT_BUMP(arcstat_##notstat1##_##notstat2##_##stat);\ } \ } kstat_t *arc_ksp; static arc_state_t *arc_anon; static arc_state_t *arc_mru; static arc_state_t *arc_mru_ghost; static arc_state_t *arc_mfu; static arc_state_t *arc_mfu_ghost; static arc_state_t *arc_l2c_only; /* * There are several ARC variables that are critical to export as kstats -- * but we don't want to have to grovel around in the kstat whenever we wish to * manipulate them. For these variables, we therefore define them to be in * terms of the statistic variable. This assures that we are not introducing * the possibility of inconsistency by having shadow copies of the variables, * while still allowing the code to be readable. */ #define arc_size ARCSTAT(arcstat_size) /* actual total arc size */ #define arc_p ARCSTAT(arcstat_p) /* target size of MRU */ #define arc_c ARCSTAT(arcstat_c) /* target size of cache */ #define arc_c_min ARCSTAT(arcstat_c_min) /* min target cache size */ #define arc_c_max ARCSTAT(arcstat_c_max) /* max target cache size */ #define arc_meta_limit ARCSTAT(arcstat_meta_limit) /* max size for metadata */ #define arc_meta_min ARCSTAT(arcstat_meta_min) /* min size for metadata */ #define arc_meta_used ARCSTAT(arcstat_meta_used) /* size of metadata */ #define arc_meta_max ARCSTAT(arcstat_meta_max) /* max size of metadata */ #define L2ARC_IS_VALID_COMPRESS(_c_) \ ((_c_) == ZIO_COMPRESS_LZ4 || (_c_) == ZIO_COMPRESS_EMPTY) static int arc_no_grow; /* Don't try to grow cache size */ static uint64_t arc_tempreserve; static uint64_t arc_loaned_bytes; typedef struct arc_callback arc_callback_t; struct arc_callback { void *acb_private; arc_done_func_t *acb_done; arc_buf_t *acb_buf; zio_t *acb_zio_dummy; arc_callback_t *acb_next; }; typedef struct arc_write_callback arc_write_callback_t; struct arc_write_callback { void *awcb_private; arc_done_func_t *awcb_ready; arc_done_func_t *awcb_physdone; arc_done_func_t *awcb_done; arc_buf_t *awcb_buf; }; /* * ARC buffers are separated into multiple structs as a memory saving measure: * - Common fields struct, always defined, and embedded within it: * - L2-only fields, always allocated but undefined when not in L2ARC * - L1-only fields, only allocated when in L1ARC * * Buffer in L1 Buffer only in L2 * +------------------------+ +------------------------+ * | arc_buf_hdr_t | | arc_buf_hdr_t | * | | | | * | | | | * | | | | * +------------------------+ +------------------------+ * | l2arc_buf_hdr_t | | l2arc_buf_hdr_t | * | (undefined if L1-only) | | | * +------------------------+ +------------------------+ * | l1arc_buf_hdr_t | * | | * | | * | | * | | * +------------------------+ * * Because it's possible for the L2ARC to become extremely large, we can wind * up eating a lot of memory in L2ARC buffer headers, so the size of a header * is minimized by only allocating the fields necessary for an L1-cached buffer * when a header is actually in the L1 cache. The sub-headers (l1arc_buf_hdr and * l2arc_buf_hdr) are embedded rather than allocated separately to save a couple * words in pointers. arc_hdr_realloc() is used to switch a header between * these two allocation states. */ typedef struct l1arc_buf_hdr { kmutex_t b_freeze_lock; #ifdef ZFS_DEBUG /* * used for debugging wtih kmem_flags - by allocating and freeing * b_thawed when the buffer is thawed, we get a record of the stack * trace that thawed it. */ void *b_thawed; #endif arc_buf_t *b_buf; uint32_t b_datacnt; /* for waiting on writes to complete */ kcondvar_t b_cv; /* protected by arc state mutex */ arc_state_t *b_state; multilist_node_t b_arc_node; /* updated atomically */ clock_t b_arc_access; /* self protecting */ refcount_t b_refcnt; arc_callback_t *b_acb; /* temporary buffer holder for in-flight compressed or padded data */ void *b_tmp_cdata; } l1arc_buf_hdr_t; typedef struct l2arc_dev l2arc_dev_t; typedef struct l2arc_buf_hdr { /* protected by arc_buf_hdr mutex */ l2arc_dev_t *b_dev; /* L2ARC device */ uint64_t b_daddr; /* disk address, offset byte */ /* real alloc'd buffer size depending on b_compress applied */ int32_t b_asize; uint8_t b_compress; list_node_t b_l2node; } l2arc_buf_hdr_t; struct arc_buf_hdr { /* protected by hash lock */ dva_t b_dva; uint64_t b_birth; /* * Even though this checksum is only set/verified when a buffer is in * the L1 cache, it needs to be in the set of common fields because it * must be preserved from the time before a buffer is written out to * L2ARC until after it is read back in. */ zio_cksum_t *b_freeze_cksum; arc_buf_hdr_t *b_hash_next; arc_flags_t b_flags; /* immutable */ int32_t b_size; uint64_t b_spa; /* L2ARC fields. Undefined when not in L2ARC. */ l2arc_buf_hdr_t b_l2hdr; /* L1ARC fields. Undefined when in l2arc_only state */ l1arc_buf_hdr_t b_l1hdr; }; -#ifdef _KERNEL +#if defined(__FreeBSD__) && defined(_KERNEL) static int sysctl_vfs_zfs_arc_meta_limit(SYSCTL_HANDLER_ARGS) { uint64_t val; int err; val = arc_meta_limit; err = sysctl_handle_64(oidp, &val, 0, req); if (err != 0 || req->newptr == NULL) return (err); if (val <= 0 || val > arc_c_max) return (EINVAL); arc_meta_limit = val; return (0); } + +static int +sysctl_vfs_zfs_arc_max(SYSCTL_HANDLER_ARGS) +{ + uint64_t val; + int err; + + val = zfs_arc_max; + err = sysctl_handle_64(oidp, &val, 0, req); + if (err != 0 || req->newptr == NULL) + return (err); + + if (val < arc_abs_min || val > kmem_size()) + return (EINVAL); + if (val < arc_c_min) + return (EINVAL); + if (zfs_arc_meta_limit > 0 && val < zfs_arc_meta_limit) + return (EINVAL); + + arc_c_max = val; + + arc_c = arc_c_max; + arc_p = (arc_c >> 1); + + if (zfs_arc_meta_limit == 0) { + /* limit meta-data to 1/4 of the arc capacity */ + arc_meta_limit = arc_c_max / 4; + } + + /* if kmem_flags are set, lets try to use less memory */ + if (kmem_debugging()) + arc_c = arc_c / 2; + + zfs_arc_max = arc_c; + + return (0); +} + +static int +sysctl_vfs_zfs_arc_min(SYSCTL_HANDLER_ARGS) +{ + uint64_t val; + int err; + + val = zfs_arc_min; + err = sysctl_handle_64(oidp, &val, 0, req); + if (err != 0 || req->newptr == NULL) + return (err); + + if (val < arc_abs_min || val > arc_c_max) + return (EINVAL); + + arc_c_min = val; + + if (zfs_arc_meta_min == 0) + arc_meta_min = arc_c_min / 2; + + if (arc_c < arc_c_min) + arc_c = arc_c_min; + + zfs_arc_min = arc_c_min; + + return (0); +} #endif static arc_buf_t *arc_eviction_list; static arc_buf_hdr_t arc_eviction_hdr; #define GHOST_STATE(state) \ ((state) == arc_mru_ghost || (state) == arc_mfu_ghost || \ (state) == arc_l2c_only) #define HDR_IN_HASH_TABLE(hdr) ((hdr)->b_flags & ARC_FLAG_IN_HASH_TABLE) #define HDR_IO_IN_PROGRESS(hdr) ((hdr)->b_flags & ARC_FLAG_IO_IN_PROGRESS) #define HDR_IO_ERROR(hdr) ((hdr)->b_flags & ARC_FLAG_IO_ERROR) #define HDR_PREFETCH(hdr) ((hdr)->b_flags & ARC_FLAG_PREFETCH) #define HDR_FREED_IN_READ(hdr) ((hdr)->b_flags & ARC_FLAG_FREED_IN_READ) #define HDR_BUF_AVAILABLE(hdr) ((hdr)->b_flags & ARC_FLAG_BUF_AVAILABLE) #define HDR_L2CACHE(hdr) ((hdr)->b_flags & ARC_FLAG_L2CACHE) #define HDR_L2COMPRESS(hdr) ((hdr)->b_flags & ARC_FLAG_L2COMPRESS) #define HDR_L2_READING(hdr) \ (((hdr)->b_flags & ARC_FLAG_IO_IN_PROGRESS) && \ ((hdr)->b_flags & ARC_FLAG_HAS_L2HDR)) #define HDR_L2_WRITING(hdr) ((hdr)->b_flags & ARC_FLAG_L2_WRITING) #define HDR_L2_EVICTED(hdr) ((hdr)->b_flags & ARC_FLAG_L2_EVICTED) #define HDR_L2_WRITE_HEAD(hdr) ((hdr)->b_flags & ARC_FLAG_L2_WRITE_HEAD) #define HDR_ISTYPE_METADATA(hdr) \ ((hdr)->b_flags & ARC_FLAG_BUFC_METADATA) #define HDR_ISTYPE_DATA(hdr) (!HDR_ISTYPE_METADATA(hdr)) #define HDR_HAS_L1HDR(hdr) ((hdr)->b_flags & ARC_FLAG_HAS_L1HDR) #define HDR_HAS_L2HDR(hdr) ((hdr)->b_flags & ARC_FLAG_HAS_L2HDR) /* * Other sizes */ #define HDR_FULL_SIZE ((int64_t)sizeof (arc_buf_hdr_t)) #define HDR_L2ONLY_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_l1hdr)) /* * Hash table routines */ #define HT_LOCK_PAD CACHE_LINE_SIZE struct ht_lock { kmutex_t ht_lock; #ifdef _KERNEL unsigned char pad[(HT_LOCK_PAD - sizeof (kmutex_t))]; #endif }; #define BUF_LOCKS 256 typedef struct buf_hash_table { uint64_t ht_mask; arc_buf_hdr_t **ht_table; struct ht_lock ht_locks[BUF_LOCKS] __aligned(CACHE_LINE_SIZE); } buf_hash_table_t; static buf_hash_table_t buf_hash_table; #define BUF_HASH_INDEX(spa, dva, birth) \ (buf_hash(spa, dva, birth) & buf_hash_table.ht_mask) #define BUF_HASH_LOCK_NTRY(idx) (buf_hash_table.ht_locks[idx & (BUF_LOCKS-1)]) #define BUF_HASH_LOCK(idx) (&(BUF_HASH_LOCK_NTRY(idx).ht_lock)) #define HDR_LOCK(hdr) \ (BUF_HASH_LOCK(BUF_HASH_INDEX(hdr->b_spa, &hdr->b_dva, hdr->b_birth))) uint64_t zfs_crc64_table[256]; /* * Level 2 ARC */ #define L2ARC_WRITE_SIZE (8 * 1024 * 1024) /* initial write max */ #define L2ARC_HEADROOM 2 /* num of writes */ /* * If we discover during ARC scan any buffers to be compressed, we boost * our headroom for the next scanning cycle by this percentage multiple. */ #define L2ARC_HEADROOM_BOOST 200 #define L2ARC_FEED_SECS 1 /* caching interval secs */ #define L2ARC_FEED_MIN_MS 200 /* min caching interval ms */ /* * Used to distinguish headers that are being process by * l2arc_write_buffers(), but have yet to be assigned to a l2arc disk * address. This can happen when the header is added to the l2arc's list * of buffers to write in the first stage of l2arc_write_buffers(), but * has not yet been written out which happens in the second stage of * l2arc_write_buffers(). */ #define L2ARC_ADDR_UNSET ((uint64_t)(-1)) #define l2arc_writes_sent ARCSTAT(arcstat_l2_writes_sent) #define l2arc_writes_done ARCSTAT(arcstat_l2_writes_done) /* L2ARC Performance Tunables */ uint64_t l2arc_write_max = L2ARC_WRITE_SIZE; /* default max write size */ uint64_t l2arc_write_boost = L2ARC_WRITE_SIZE; /* extra write during warmup */ uint64_t l2arc_headroom = L2ARC_HEADROOM; /* number of dev writes */ uint64_t l2arc_headroom_boost = L2ARC_HEADROOM_BOOST; uint64_t l2arc_feed_secs = L2ARC_FEED_SECS; /* interval seconds */ uint64_t l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval milliseconds */ boolean_t l2arc_noprefetch = B_TRUE; /* don't cache prefetch bufs */ boolean_t l2arc_feed_again = B_TRUE; /* turbo warmup */ boolean_t l2arc_norw = B_TRUE; /* no reads during writes */ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_max, CTLFLAG_RW, &l2arc_write_max, 0, "max write size"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_boost, CTLFLAG_RW, &l2arc_write_boost, 0, "extra write during warmup"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_headroom, CTLFLAG_RW, &l2arc_headroom, 0, "number of dev writes"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_secs, CTLFLAG_RW, &l2arc_feed_secs, 0, "interval seconds"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_min_ms, CTLFLAG_RW, &l2arc_feed_min_ms, 0, "min interval milliseconds"); SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_noprefetch, CTLFLAG_RW, &l2arc_noprefetch, 0, "don't cache prefetch bufs"); SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_feed_again, CTLFLAG_RW, &l2arc_feed_again, 0, "turbo warmup"); SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_norw, CTLFLAG_RW, &l2arc_norw, 0, "no reads during writes"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_size, CTLFLAG_RD, &ARC_anon.arcs_size.rc_count, 0, "size of anonymous state"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_metadata_lsize, CTLFLAG_RD, &ARC_anon.arcs_lsize[ARC_BUFC_METADATA], 0, "size of anonymous state"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_data_lsize, CTLFLAG_RD, &ARC_anon.arcs_lsize[ARC_BUFC_DATA], 0, "size of anonymous state"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_size, CTLFLAG_RD, &ARC_mru.arcs_size.rc_count, 0, "size of mru state"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_metadata_lsize, CTLFLAG_RD, &ARC_mru.arcs_lsize[ARC_BUFC_METADATA], 0, "size of metadata in mru state"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_data_lsize, CTLFLAG_RD, &ARC_mru.arcs_lsize[ARC_BUFC_DATA], 0, "size of data in mru state"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_size, CTLFLAG_RD, &ARC_mru_ghost.arcs_size.rc_count, 0, "size of mru ghost state"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_metadata_lsize, CTLFLAG_RD, &ARC_mru_ghost.arcs_lsize[ARC_BUFC_METADATA], 0, "size of metadata in mru ghost state"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_data_lsize, CTLFLAG_RD, &ARC_mru_ghost.arcs_lsize[ARC_BUFC_DATA], 0, "size of data in mru ghost state"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_size, CTLFLAG_RD, &ARC_mfu.arcs_size.rc_count, 0, "size of mfu state"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_metadata_lsize, CTLFLAG_RD, &ARC_mfu.arcs_lsize[ARC_BUFC_METADATA], 0, "size of metadata in mfu state"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_data_lsize, CTLFLAG_RD, &ARC_mfu.arcs_lsize[ARC_BUFC_DATA], 0, "size of data in mfu state"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_size, CTLFLAG_RD, &ARC_mfu_ghost.arcs_size.rc_count, 0, "size of mfu ghost state"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_metadata_lsize, CTLFLAG_RD, &ARC_mfu_ghost.arcs_lsize[ARC_BUFC_METADATA], 0, "size of metadata in mfu ghost state"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_data_lsize, CTLFLAG_RD, &ARC_mfu_ghost.arcs_lsize[ARC_BUFC_DATA], 0, "size of data in mfu ghost state"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2c_only_size, CTLFLAG_RD, &ARC_l2c_only.arcs_size.rc_count, 0, "size of mru state"); /* * L2ARC Internals */ struct l2arc_dev { vdev_t *l2ad_vdev; /* vdev */ spa_t *l2ad_spa; /* spa */ uint64_t l2ad_hand; /* next write location */ uint64_t l2ad_start; /* first addr on device */ uint64_t l2ad_end; /* last addr on device */ boolean_t l2ad_first; /* first sweep through */ boolean_t l2ad_writing; /* currently writing */ kmutex_t l2ad_mtx; /* lock for buffer list */ list_t l2ad_buflist; /* buffer list */ list_node_t l2ad_node; /* device list node */ refcount_t l2ad_alloc; /* allocated bytes */ }; static list_t L2ARC_dev_list; /* device list */ static list_t *l2arc_dev_list; /* device list pointer */ static kmutex_t l2arc_dev_mtx; /* device list mutex */ static l2arc_dev_t *l2arc_dev_last; /* last device used */ static list_t L2ARC_free_on_write; /* free after write buf list */ static list_t *l2arc_free_on_write; /* free after write list ptr */ static kmutex_t l2arc_free_on_write_mtx; /* mutex for list */ static uint64_t l2arc_ndev; /* number of devices */ typedef struct l2arc_read_callback { arc_buf_t *l2rcb_buf; /* read buffer */ spa_t *l2rcb_spa; /* spa */ blkptr_t l2rcb_bp; /* original blkptr */ zbookmark_phys_t l2rcb_zb; /* original bookmark */ int l2rcb_flags; /* original flags */ enum zio_compress l2rcb_compress; /* applied compress */ void *l2rcb_data; /* temporary buffer */ } l2arc_read_callback_t; typedef struct l2arc_write_callback { l2arc_dev_t *l2wcb_dev; /* device info */ arc_buf_hdr_t *l2wcb_head; /* head of write buflist */ } l2arc_write_callback_t; typedef struct l2arc_data_free { /* protected by l2arc_free_on_write_mtx */ void *l2df_data; size_t l2df_size; void (*l2df_func)(void *, size_t); list_node_t l2df_list_node; } l2arc_data_free_t; static kmutex_t l2arc_feed_thr_lock; static kcondvar_t l2arc_feed_thr_cv; static uint8_t l2arc_thread_exit; static void arc_get_data_buf(arc_buf_t *); static void arc_access(arc_buf_hdr_t *, kmutex_t *); static boolean_t arc_is_overflowing(); static void arc_buf_watch(arc_buf_t *); static arc_buf_contents_t arc_buf_type(arc_buf_hdr_t *); static uint32_t arc_bufc_to_flags(arc_buf_contents_t); static boolean_t l2arc_write_eligible(uint64_t, arc_buf_hdr_t *); static void l2arc_read_done(zio_t *); static boolean_t l2arc_transform_buf(arc_buf_hdr_t *, boolean_t); static void l2arc_decompress_zio(zio_t *, arc_buf_hdr_t *, enum zio_compress); static void l2arc_release_cdata_buf(arc_buf_hdr_t *); static void l2arc_trim(const arc_buf_hdr_t *hdr) { l2arc_dev_t *dev = hdr->b_l2hdr.b_dev; ASSERT(HDR_HAS_L2HDR(hdr)); ASSERT(MUTEX_HELD(&dev->l2ad_mtx)); if (hdr->b_l2hdr.b_daddr == L2ARC_ADDR_UNSET) return; if (hdr->b_l2hdr.b_asize != 0) { trim_map_free(dev->l2ad_vdev, hdr->b_l2hdr.b_daddr, hdr->b_l2hdr.b_asize, 0); } else { ASSERT3U(hdr->b_l2hdr.b_compress, ==, ZIO_COMPRESS_EMPTY); } } static uint64_t buf_hash(uint64_t spa, const dva_t *dva, uint64_t birth) { uint8_t *vdva = (uint8_t *)dva; uint64_t crc = -1ULL; int i; ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY); for (i = 0; i < sizeof (dva_t); i++) crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ vdva[i]) & 0xFF]; crc ^= (spa>>8) ^ birth; return (crc); } #define BUF_EMPTY(buf) \ ((buf)->b_dva.dva_word[0] == 0 && \ (buf)->b_dva.dva_word[1] == 0) #define BUF_EQUAL(spa, dva, birth, buf) \ ((buf)->b_dva.dva_word[0] == (dva)->dva_word[0]) && \ ((buf)->b_dva.dva_word[1] == (dva)->dva_word[1]) && \ ((buf)->b_birth == birth) && ((buf)->b_spa == spa) static void buf_discard_identity(arc_buf_hdr_t *hdr) { hdr->b_dva.dva_word[0] = 0; hdr->b_dva.dva_word[1] = 0; hdr->b_birth = 0; } static arc_buf_hdr_t * buf_hash_find(uint64_t spa, const blkptr_t *bp, kmutex_t **lockp) { const dva_t *dva = BP_IDENTITY(bp); uint64_t birth = BP_PHYSICAL_BIRTH(bp); uint64_t idx = BUF_HASH_INDEX(spa, dva, birth); kmutex_t *hash_lock = BUF_HASH_LOCK(idx); arc_buf_hdr_t *hdr; mutex_enter(hash_lock); for (hdr = buf_hash_table.ht_table[idx]; hdr != NULL; hdr = hdr->b_hash_next) { if (BUF_EQUAL(spa, dva, birth, hdr)) { *lockp = hash_lock; return (hdr); } } mutex_exit(hash_lock); *lockp = NULL; return (NULL); } /* * Insert an entry into the hash table. If there is already an element * equal to elem in the hash table, then the already existing element * will be returned and the new element will not be inserted. * Otherwise returns NULL. * If lockp == NULL, the caller is assumed to already hold the hash lock. */ static arc_buf_hdr_t * buf_hash_insert(arc_buf_hdr_t *hdr, kmutex_t **lockp) { uint64_t idx = BUF_HASH_INDEX(hdr->b_spa, &hdr->b_dva, hdr->b_birth); kmutex_t *hash_lock = BUF_HASH_LOCK(idx); arc_buf_hdr_t *fhdr; uint32_t i; ASSERT(!DVA_IS_EMPTY(&hdr->b_dva)); ASSERT(hdr->b_birth != 0); ASSERT(!HDR_IN_HASH_TABLE(hdr)); if (lockp != NULL) { *lockp = hash_lock; mutex_enter(hash_lock); } else { ASSERT(MUTEX_HELD(hash_lock)); } for (fhdr = buf_hash_table.ht_table[idx], i = 0; fhdr != NULL; fhdr = fhdr->b_hash_next, i++) { if (BUF_EQUAL(hdr->b_spa, &hdr->b_dva, hdr->b_birth, fhdr)) return (fhdr); } hdr->b_hash_next = buf_hash_table.ht_table[idx]; buf_hash_table.ht_table[idx] = hdr; hdr->b_flags |= ARC_FLAG_IN_HASH_TABLE; /* collect some hash table performance data */ if (i > 0) { ARCSTAT_BUMP(arcstat_hash_collisions); if (i == 1) ARCSTAT_BUMP(arcstat_hash_chains); ARCSTAT_MAX(arcstat_hash_chain_max, i); } ARCSTAT_BUMP(arcstat_hash_elements); ARCSTAT_MAXSTAT(arcstat_hash_elements); return (NULL); } static void buf_hash_remove(arc_buf_hdr_t *hdr) { arc_buf_hdr_t *fhdr, **hdrp; uint64_t idx = BUF_HASH_INDEX(hdr->b_spa, &hdr->b_dva, hdr->b_birth); ASSERT(MUTEX_HELD(BUF_HASH_LOCK(idx))); ASSERT(HDR_IN_HASH_TABLE(hdr)); hdrp = &buf_hash_table.ht_table[idx]; while ((fhdr = *hdrp) != hdr) { ASSERT(fhdr != NULL); hdrp = &fhdr->b_hash_next; } *hdrp = hdr->b_hash_next; hdr->b_hash_next = NULL; hdr->b_flags &= ~ARC_FLAG_IN_HASH_TABLE; /* collect some hash table performance data */ ARCSTAT_BUMPDOWN(arcstat_hash_elements); if (buf_hash_table.ht_table[idx] && buf_hash_table.ht_table[idx]->b_hash_next == NULL) ARCSTAT_BUMPDOWN(arcstat_hash_chains); } /* * Global data structures and functions for the buf kmem cache. */ static kmem_cache_t *hdr_full_cache; static kmem_cache_t *hdr_l2only_cache; static kmem_cache_t *buf_cache; static void buf_fini(void) { int i; kmem_free(buf_hash_table.ht_table, (buf_hash_table.ht_mask + 1) * sizeof (void *)); for (i = 0; i < BUF_LOCKS; i++) mutex_destroy(&buf_hash_table.ht_locks[i].ht_lock); kmem_cache_destroy(hdr_full_cache); kmem_cache_destroy(hdr_l2only_cache); kmem_cache_destroy(buf_cache); } /* * Constructor callback - called when the cache is empty * and a new buf is requested. */ /* ARGSUSED */ static int hdr_full_cons(void *vbuf, void *unused, int kmflag) { arc_buf_hdr_t *hdr = vbuf; bzero(hdr, HDR_FULL_SIZE); cv_init(&hdr->b_l1hdr.b_cv, NULL, CV_DEFAULT, NULL); refcount_create(&hdr->b_l1hdr.b_refcnt); mutex_init(&hdr->b_l1hdr.b_freeze_lock, NULL, MUTEX_DEFAULT, NULL); multilist_link_init(&hdr->b_l1hdr.b_arc_node); arc_space_consume(HDR_FULL_SIZE, ARC_SPACE_HDRS); return (0); } /* ARGSUSED */ static int hdr_l2only_cons(void *vbuf, void *unused, int kmflag) { arc_buf_hdr_t *hdr = vbuf; bzero(hdr, HDR_L2ONLY_SIZE); arc_space_consume(HDR_L2ONLY_SIZE, ARC_SPACE_L2HDRS); return (0); } /* ARGSUSED */ static int buf_cons(void *vbuf, void *unused, int kmflag) { arc_buf_t *buf = vbuf; bzero(buf, sizeof (arc_buf_t)); mutex_init(&buf->b_evict_lock, NULL, MUTEX_DEFAULT, NULL); arc_space_consume(sizeof (arc_buf_t), ARC_SPACE_HDRS); return (0); } /* * Destructor callback - called when a cached buf is * no longer required. */ /* ARGSUSED */ static void hdr_full_dest(void *vbuf, void *unused) { arc_buf_hdr_t *hdr = vbuf; ASSERT(BUF_EMPTY(hdr)); cv_destroy(&hdr->b_l1hdr.b_cv); refcount_destroy(&hdr->b_l1hdr.b_refcnt); mutex_destroy(&hdr->b_l1hdr.b_freeze_lock); ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node)); arc_space_return(HDR_FULL_SIZE, ARC_SPACE_HDRS); } /* ARGSUSED */ static void hdr_l2only_dest(void *vbuf, void *unused) { arc_buf_hdr_t *hdr = vbuf; ASSERT(BUF_EMPTY(hdr)); arc_space_return(HDR_L2ONLY_SIZE, ARC_SPACE_L2HDRS); } /* ARGSUSED */ static void buf_dest(void *vbuf, void *unused) { arc_buf_t *buf = vbuf; mutex_destroy(&buf->b_evict_lock); arc_space_return(sizeof (arc_buf_t), ARC_SPACE_HDRS); } /* * Reclaim callback -- invoked when memory is low. */ /* ARGSUSED */ static void hdr_recl(void *unused) { dprintf("hdr_recl called\n"); /* * umem calls the reclaim func when we destroy the buf cache, * which is after we do arc_fini(). */ if (!arc_dead) cv_signal(&arc_reclaim_thread_cv); } static void buf_init(void) { uint64_t *ct; uint64_t hsize = 1ULL << 12; int i, j; /* * The hash table is big enough to fill all of physical memory * with an average block size of zfs_arc_average_blocksize (default 8K). * By default, the table will take up * totalmem * sizeof(void*) / 8K (1MB per GB with 8-byte pointers). */ while (hsize * zfs_arc_average_blocksize < (uint64_t)physmem * PAGESIZE) hsize <<= 1; retry: buf_hash_table.ht_mask = hsize - 1; buf_hash_table.ht_table = kmem_zalloc(hsize * sizeof (void*), KM_NOSLEEP); if (buf_hash_table.ht_table == NULL) { ASSERT(hsize > (1ULL << 8)); hsize >>= 1; goto retry; } hdr_full_cache = kmem_cache_create("arc_buf_hdr_t_full", HDR_FULL_SIZE, 0, hdr_full_cons, hdr_full_dest, hdr_recl, NULL, NULL, 0); hdr_l2only_cache = kmem_cache_create("arc_buf_hdr_t_l2only", HDR_L2ONLY_SIZE, 0, hdr_l2only_cons, hdr_l2only_dest, hdr_recl, NULL, NULL, 0); buf_cache = kmem_cache_create("arc_buf_t", sizeof (arc_buf_t), 0, buf_cons, buf_dest, NULL, NULL, NULL, 0); for (i = 0; i < 256; i++) for (ct = zfs_crc64_table + i, *ct = i, j = 8; j > 0; j--) *ct = (*ct >> 1) ^ (-(*ct & 1) & ZFS_CRC64_POLY); for (i = 0; i < BUF_LOCKS; i++) { mutex_init(&buf_hash_table.ht_locks[i].ht_lock, NULL, MUTEX_DEFAULT, NULL); } } /* * Transition between the two allocation states for the arc_buf_hdr struct. * The arc_buf_hdr struct can be allocated with (hdr_full_cache) or without * (hdr_l2only_cache) the fields necessary for the L1 cache - the smaller * version is used when a cache buffer is only in the L2ARC in order to reduce * memory usage. */ static arc_buf_hdr_t * arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new) { ASSERT(HDR_HAS_L2HDR(hdr)); arc_buf_hdr_t *nhdr; l2arc_dev_t *dev = hdr->b_l2hdr.b_dev; ASSERT((old == hdr_full_cache && new == hdr_l2only_cache) || (old == hdr_l2only_cache && new == hdr_full_cache)); nhdr = kmem_cache_alloc(new, KM_PUSHPAGE); ASSERT(MUTEX_HELD(HDR_LOCK(hdr))); buf_hash_remove(hdr); bcopy(hdr, nhdr, HDR_L2ONLY_SIZE); if (new == hdr_full_cache) { nhdr->b_flags |= ARC_FLAG_HAS_L1HDR; /* * arc_access and arc_change_state need to be aware that a * header has just come out of L2ARC, so we set its state to * l2c_only even though it's about to change. */ nhdr->b_l1hdr.b_state = arc_l2c_only; /* Verify previous threads set to NULL before freeing */ ASSERT3P(nhdr->b_l1hdr.b_tmp_cdata, ==, NULL); } else { ASSERT(hdr->b_l1hdr.b_buf == NULL); ASSERT0(hdr->b_l1hdr.b_datacnt); /* * If we've reached here, We must have been called from * arc_evict_hdr(), as such we should have already been * removed from any ghost list we were previously on * (which protects us from racing with arc_evict_state), * thus no locking is needed during this check. */ ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node)); /* * A buffer must not be moved into the arc_l2c_only * state if it's not finished being written out to the * l2arc device. Otherwise, the b_l1hdr.b_tmp_cdata field * might try to be accessed, even though it was removed. */ VERIFY(!HDR_L2_WRITING(hdr)); VERIFY3P(hdr->b_l1hdr.b_tmp_cdata, ==, NULL); #ifdef ZFS_DEBUG if (hdr->b_l1hdr.b_thawed != NULL) { kmem_free(hdr->b_l1hdr.b_thawed, 1); hdr->b_l1hdr.b_thawed = NULL; } #endif nhdr->b_flags &= ~ARC_FLAG_HAS_L1HDR; } /* * The header has been reallocated so we need to re-insert it into any * lists it was on. */ (void) buf_hash_insert(nhdr, NULL); ASSERT(list_link_active(&hdr->b_l2hdr.b_l2node)); mutex_enter(&dev->l2ad_mtx); /* * We must place the realloc'ed header back into the list at * the same spot. Otherwise, if it's placed earlier in the list, * l2arc_write_buffers() could find it during the function's * write phase, and try to write it out to the l2arc. */ list_insert_after(&dev->l2ad_buflist, hdr, nhdr); list_remove(&dev->l2ad_buflist, hdr); mutex_exit(&dev->l2ad_mtx); /* * Since we're using the pointer address as the tag when * incrementing and decrementing the l2ad_alloc refcount, we * must remove the old pointer (that we're about to destroy) and * add the new pointer to the refcount. Otherwise we'd remove * the wrong pointer address when calling arc_hdr_destroy() later. */ (void) refcount_remove_many(&dev->l2ad_alloc, hdr->b_l2hdr.b_asize, hdr); (void) refcount_add_many(&dev->l2ad_alloc, nhdr->b_l2hdr.b_asize, nhdr); buf_discard_identity(hdr); hdr->b_freeze_cksum = NULL; kmem_cache_free(old, hdr); return (nhdr); } #define ARC_MINTIME (hz>>4) /* 62 ms */ static void arc_cksum_verify(arc_buf_t *buf) { zio_cksum_t zc; if (!(zfs_flags & ZFS_DEBUG_MODIFY)) return; mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock); if (buf->b_hdr->b_freeze_cksum == NULL || HDR_IO_ERROR(buf->b_hdr)) { mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock); return; } fletcher_2_native(buf->b_data, buf->b_hdr->b_size, NULL, &zc); if (!ZIO_CHECKSUM_EQUAL(*buf->b_hdr->b_freeze_cksum, zc)) panic("buffer modified while frozen!"); mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock); } static int arc_cksum_equal(arc_buf_t *buf) { zio_cksum_t zc; int equal; mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock); fletcher_2_native(buf->b_data, buf->b_hdr->b_size, NULL, &zc); equal = ZIO_CHECKSUM_EQUAL(*buf->b_hdr->b_freeze_cksum, zc); mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock); return (equal); } static void arc_cksum_compute(arc_buf_t *buf, boolean_t force) { if (!force && !(zfs_flags & ZFS_DEBUG_MODIFY)) return; mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock); if (buf->b_hdr->b_freeze_cksum != NULL) { mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock); return; } buf->b_hdr->b_freeze_cksum = kmem_alloc(sizeof (zio_cksum_t), KM_SLEEP); fletcher_2_native(buf->b_data, buf->b_hdr->b_size, NULL, buf->b_hdr->b_freeze_cksum); mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock); #ifdef illumos arc_buf_watch(buf); #endif } #ifdef illumos #ifndef _KERNEL typedef struct procctl { long cmd; prwatch_t prwatch; } procctl_t; #endif /* ARGSUSED */ static void arc_buf_unwatch(arc_buf_t *buf) { #ifndef _KERNEL if (arc_watch) { int result; procctl_t ctl; ctl.cmd = PCWATCH; ctl.prwatch.pr_vaddr = (uintptr_t)buf->b_data; ctl.prwatch.pr_size = 0; ctl.prwatch.pr_wflags = 0; result = write(arc_procfd, &ctl, sizeof (ctl)); ASSERT3U(result, ==, sizeof (ctl)); } #endif } /* ARGSUSED */ static void arc_buf_watch(arc_buf_t *buf) { #ifndef _KERNEL if (arc_watch) { int result; procctl_t ctl; ctl.cmd = PCWATCH; ctl.prwatch.pr_vaddr = (uintptr_t)buf->b_data; ctl.prwatch.pr_size = buf->b_hdr->b_size; ctl.prwatch.pr_wflags = WA_WRITE; result = write(arc_procfd, &ctl, sizeof (ctl)); ASSERT3U(result, ==, sizeof (ctl)); } #endif } #endif /* illumos */ static arc_buf_contents_t arc_buf_type(arc_buf_hdr_t *hdr) { if (HDR_ISTYPE_METADATA(hdr)) { return (ARC_BUFC_METADATA); } else { return (ARC_BUFC_DATA); } } static uint32_t arc_bufc_to_flags(arc_buf_contents_t type) { switch (type) { case ARC_BUFC_DATA: /* metadata field is 0 if buffer contains normal data */ return (0); case ARC_BUFC_METADATA: return (ARC_FLAG_BUFC_METADATA); default: break; } panic("undefined ARC buffer type!"); return ((uint32_t)-1); } void arc_buf_thaw(arc_buf_t *buf) { if (zfs_flags & ZFS_DEBUG_MODIFY) { if (buf->b_hdr->b_l1hdr.b_state != arc_anon) panic("modifying non-anon buffer!"); if (HDR_IO_IN_PROGRESS(buf->b_hdr)) panic("modifying buffer while i/o in progress!"); arc_cksum_verify(buf); } mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock); if (buf->b_hdr->b_freeze_cksum != NULL) { kmem_free(buf->b_hdr->b_freeze_cksum, sizeof (zio_cksum_t)); buf->b_hdr->b_freeze_cksum = NULL; } #ifdef ZFS_DEBUG if (zfs_flags & ZFS_DEBUG_MODIFY) { if (buf->b_hdr->b_l1hdr.b_thawed != NULL) kmem_free(buf->b_hdr->b_l1hdr.b_thawed, 1); buf->b_hdr->b_l1hdr.b_thawed = kmem_alloc(1, KM_SLEEP); } #endif mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock); #ifdef illumos arc_buf_unwatch(buf); #endif } void arc_buf_freeze(arc_buf_t *buf) { kmutex_t *hash_lock; if (!(zfs_flags & ZFS_DEBUG_MODIFY)) return; hash_lock = HDR_LOCK(buf->b_hdr); mutex_enter(hash_lock); ASSERT(buf->b_hdr->b_freeze_cksum != NULL || buf->b_hdr->b_l1hdr.b_state == arc_anon); arc_cksum_compute(buf, B_FALSE); mutex_exit(hash_lock); } static void add_reference(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, void *tag) { ASSERT(HDR_HAS_L1HDR(hdr)); ASSERT(MUTEX_HELD(hash_lock)); arc_state_t *state = hdr->b_l1hdr.b_state; if ((refcount_add(&hdr->b_l1hdr.b_refcnt, tag) == 1) && (state != arc_anon)) { /* We don't use the L2-only state list. */ if (state != arc_l2c_only) { arc_buf_contents_t type = arc_buf_type(hdr); uint64_t delta = hdr->b_size * hdr->b_l1hdr.b_datacnt; multilist_t *list = &state->arcs_list[type]; uint64_t *size = &state->arcs_lsize[type]; multilist_remove(list, hdr); if (GHOST_STATE(state)) { ASSERT0(hdr->b_l1hdr.b_datacnt); ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); delta = hdr->b_size; } ASSERT(delta > 0); ASSERT3U(*size, >=, delta); atomic_add_64(size, -delta); } /* remove the prefetch flag if we get a reference */ hdr->b_flags &= ~ARC_FLAG_PREFETCH; } } static int remove_reference(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, void *tag) { int cnt; arc_state_t *state = hdr->b_l1hdr.b_state; ASSERT(HDR_HAS_L1HDR(hdr)); ASSERT(state == arc_anon || MUTEX_HELD(hash_lock)); ASSERT(!GHOST_STATE(state)); /* * arc_l2c_only counts as a ghost state so we don't need to explicitly * check to prevent usage of the arc_l2c_only list. */ if (((cnt = refcount_remove(&hdr->b_l1hdr.b_refcnt, tag)) == 0) && (state != arc_anon)) { arc_buf_contents_t type = arc_buf_type(hdr); multilist_t *list = &state->arcs_list[type]; uint64_t *size = &state->arcs_lsize[type]; multilist_insert(list, hdr); ASSERT(hdr->b_l1hdr.b_datacnt > 0); atomic_add_64(size, hdr->b_size * hdr->b_l1hdr.b_datacnt); } return (cnt); } /* * Move the supplied buffer to the indicated state. The hash lock * for the buffer must be held by the caller. */ static void arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr, kmutex_t *hash_lock) { arc_state_t *old_state; int64_t refcnt; uint32_t datacnt; uint64_t from_delta, to_delta; arc_buf_contents_t buftype = arc_buf_type(hdr); /* * We almost always have an L1 hdr here, since we call arc_hdr_realloc() * in arc_read() when bringing a buffer out of the L2ARC. However, the * L1 hdr doesn't always exist when we change state to arc_anon before * destroying a header, in which case reallocating to add the L1 hdr is * pointless. */ if (HDR_HAS_L1HDR(hdr)) { old_state = hdr->b_l1hdr.b_state; refcnt = refcount_count(&hdr->b_l1hdr.b_refcnt); datacnt = hdr->b_l1hdr.b_datacnt; } else { old_state = arc_l2c_only; refcnt = 0; datacnt = 0; } ASSERT(MUTEX_HELD(hash_lock)); ASSERT3P(new_state, !=, old_state); ASSERT(refcnt == 0 || datacnt > 0); ASSERT(!GHOST_STATE(new_state) || datacnt == 0); ASSERT(old_state != arc_anon || datacnt <= 1); from_delta = to_delta = datacnt * hdr->b_size; /* * If this buffer is evictable, transfer it from the * old state list to the new state list. */ if (refcnt == 0) { if (old_state != arc_anon && old_state != arc_l2c_only) { uint64_t *size = &old_state->arcs_lsize[buftype]; ASSERT(HDR_HAS_L1HDR(hdr)); multilist_remove(&old_state->arcs_list[buftype], hdr); /* * If prefetching out of the ghost cache, * we will have a non-zero datacnt. */ if (GHOST_STATE(old_state) && datacnt == 0) { /* ghost elements have a ghost size */ ASSERT(hdr->b_l1hdr.b_buf == NULL); from_delta = hdr->b_size; } ASSERT3U(*size, >=, from_delta); atomic_add_64(size, -from_delta); } if (new_state != arc_anon && new_state != arc_l2c_only) { uint64_t *size = &new_state->arcs_lsize[buftype]; /* * An L1 header always exists here, since if we're * moving to some L1-cached state (i.e. not l2c_only or * anonymous), we realloc the header to add an L1hdr * beforehand. */ ASSERT(HDR_HAS_L1HDR(hdr)); multilist_insert(&new_state->arcs_list[buftype], hdr); /* ghost elements have a ghost size */ if (GHOST_STATE(new_state)) { ASSERT0(datacnt); ASSERT(hdr->b_l1hdr.b_buf == NULL); to_delta = hdr->b_size; } atomic_add_64(size, to_delta); } } ASSERT(!BUF_EMPTY(hdr)); if (new_state == arc_anon && HDR_IN_HASH_TABLE(hdr)) buf_hash_remove(hdr); /* adjust state sizes (ignore arc_l2c_only) */ if (to_delta && new_state != arc_l2c_only) { ASSERT(HDR_HAS_L1HDR(hdr)); if (GHOST_STATE(new_state)) { ASSERT0(datacnt); /* * We moving a header to a ghost state, we first * remove all arc buffers. Thus, we'll have a * datacnt of zero, and no arc buffer to use for * the reference. As a result, we use the arc * header pointer for the reference. */ (void) refcount_add_many(&new_state->arcs_size, hdr->b_size, hdr); } else { ASSERT3U(datacnt, !=, 0); /* * Each individual buffer holds a unique reference, * thus we must remove each of these references one * at a time. */ for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL; buf = buf->b_next) { (void) refcount_add_many(&new_state->arcs_size, hdr->b_size, buf); } } } if (from_delta && old_state != arc_l2c_only) { ASSERT(HDR_HAS_L1HDR(hdr)); if (GHOST_STATE(old_state)) { /* * When moving a header off of a ghost state, * there's the possibility for datacnt to be * non-zero. This is because we first add the * arc buffer to the header prior to changing * the header's state. Since we used the header * for the reference when putting the header on * the ghost state, we must balance that and use * the header when removing off the ghost state * (even though datacnt is non zero). */ IMPLY(datacnt == 0, new_state == arc_anon || new_state == arc_l2c_only); (void) refcount_remove_many(&old_state->arcs_size, hdr->b_size, hdr); } else { ASSERT3P(datacnt, !=, 0); /* * Each individual buffer holds a unique reference, * thus we must remove each of these references one * at a time. */ for (arc_buf_t *buf = hdr->b_l1hdr.b_buf; buf != NULL; buf = buf->b_next) { (void) refcount_remove_many( &old_state->arcs_size, hdr->b_size, buf); } } } if (HDR_HAS_L1HDR(hdr)) hdr->b_l1hdr.b_state = new_state; /* * L2 headers should never be on the L2 state list since they don't * have L1 headers allocated. */ ASSERT(multilist_is_empty(&arc_l2c_only->arcs_list[ARC_BUFC_DATA]) && multilist_is_empty(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA])); } void arc_space_consume(uint64_t space, arc_space_type_t type) { ASSERT(type >= 0 && type < ARC_SPACE_NUMTYPES); switch (type) { case ARC_SPACE_DATA: ARCSTAT_INCR(arcstat_data_size, space); break; case ARC_SPACE_META: ARCSTAT_INCR(arcstat_metadata_size, space); break; case ARC_SPACE_OTHER: ARCSTAT_INCR(arcstat_other_size, space); break; case ARC_SPACE_HDRS: ARCSTAT_INCR(arcstat_hdr_size, space); break; case ARC_SPACE_L2HDRS: ARCSTAT_INCR(arcstat_l2_hdr_size, space); break; } if (type != ARC_SPACE_DATA) ARCSTAT_INCR(arcstat_meta_used, space); atomic_add_64(&arc_size, space); } void arc_space_return(uint64_t space, arc_space_type_t type) { ASSERT(type >= 0 && type < ARC_SPACE_NUMTYPES); switch (type) { case ARC_SPACE_DATA: ARCSTAT_INCR(arcstat_data_size, -space); break; case ARC_SPACE_META: ARCSTAT_INCR(arcstat_metadata_size, -space); break; case ARC_SPACE_OTHER: ARCSTAT_INCR(arcstat_other_size, -space); break; case ARC_SPACE_HDRS: ARCSTAT_INCR(arcstat_hdr_size, -space); break; case ARC_SPACE_L2HDRS: ARCSTAT_INCR(arcstat_l2_hdr_size, -space); break; } if (type != ARC_SPACE_DATA) { ASSERT(arc_meta_used >= space); if (arc_meta_max < arc_meta_used) arc_meta_max = arc_meta_used; ARCSTAT_INCR(arcstat_meta_used, -space); } ASSERT(arc_size >= space); atomic_add_64(&arc_size, -space); } arc_buf_t * arc_buf_alloc(spa_t *spa, int32_t size, void *tag, arc_buf_contents_t type) { arc_buf_hdr_t *hdr; arc_buf_t *buf; ASSERT3U(size, >, 0); hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE); ASSERT(BUF_EMPTY(hdr)); ASSERT3P(hdr->b_freeze_cksum, ==, NULL); hdr->b_size = size; hdr->b_spa = spa_load_guid(spa); buf = kmem_cache_alloc(buf_cache, KM_PUSHPAGE); buf->b_hdr = hdr; buf->b_data = NULL; buf->b_efunc = NULL; buf->b_private = NULL; buf->b_next = NULL; hdr->b_flags = arc_bufc_to_flags(type); hdr->b_flags |= ARC_FLAG_HAS_L1HDR; hdr->b_l1hdr.b_buf = buf; hdr->b_l1hdr.b_state = arc_anon; hdr->b_l1hdr.b_arc_access = 0; hdr->b_l1hdr.b_datacnt = 1; hdr->b_l1hdr.b_tmp_cdata = NULL; arc_get_data_buf(buf); ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); (void) refcount_add(&hdr->b_l1hdr.b_refcnt, tag); return (buf); } static char *arc_onloan_tag = "onloan"; /* * Loan out an anonymous arc buffer. Loaned buffers are not counted as in * flight data by arc_tempreserve_space() until they are "returned". Loaned * buffers must be returned to the arc before they can be used by the DMU or * freed. */ arc_buf_t * arc_loan_buf(spa_t *spa, int size) { arc_buf_t *buf; buf = arc_buf_alloc(spa, size, arc_onloan_tag, ARC_BUFC_DATA); atomic_add_64(&arc_loaned_bytes, size); return (buf); } /* * Return a loaned arc buffer to the arc. */ void arc_return_buf(arc_buf_t *buf, void *tag) { arc_buf_hdr_t *hdr = buf->b_hdr; ASSERT(buf->b_data != NULL); ASSERT(HDR_HAS_L1HDR(hdr)); (void) refcount_add(&hdr->b_l1hdr.b_refcnt, tag); (void) refcount_remove(&hdr->b_l1hdr.b_refcnt, arc_onloan_tag); atomic_add_64(&arc_loaned_bytes, -hdr->b_size); } /* Detach an arc_buf from a dbuf (tag) */ void arc_loan_inuse_buf(arc_buf_t *buf, void *tag) { arc_buf_hdr_t *hdr = buf->b_hdr; ASSERT(buf->b_data != NULL); ASSERT(HDR_HAS_L1HDR(hdr)); (void) refcount_add(&hdr->b_l1hdr.b_refcnt, arc_onloan_tag); (void) refcount_remove(&hdr->b_l1hdr.b_refcnt, tag); buf->b_efunc = NULL; buf->b_private = NULL; atomic_add_64(&arc_loaned_bytes, hdr->b_size); } static arc_buf_t * arc_buf_clone(arc_buf_t *from) { arc_buf_t *buf; arc_buf_hdr_t *hdr = from->b_hdr; uint64_t size = hdr->b_size; ASSERT(HDR_HAS_L1HDR(hdr)); ASSERT(hdr->b_l1hdr.b_state != arc_anon); buf = kmem_cache_alloc(buf_cache, KM_PUSHPAGE); buf->b_hdr = hdr; buf->b_data = NULL; buf->b_efunc = NULL; buf->b_private = NULL; buf->b_next = hdr->b_l1hdr.b_buf; hdr->b_l1hdr.b_buf = buf; arc_get_data_buf(buf); bcopy(from->b_data, buf->b_data, size); /* * This buffer already exists in the arc so create a duplicate * copy for the caller. If the buffer is associated with user data * then track the size and number of duplicates. These stats will be * updated as duplicate buffers are created and destroyed. */ if (HDR_ISTYPE_DATA(hdr)) { ARCSTAT_BUMP(arcstat_duplicate_buffers); ARCSTAT_INCR(arcstat_duplicate_buffers_size, size); } hdr->b_l1hdr.b_datacnt += 1; return (buf); } void arc_buf_add_ref(arc_buf_t *buf, void* tag) { arc_buf_hdr_t *hdr; kmutex_t *hash_lock; /* * Check to see if this buffer is evicted. Callers * must verify b_data != NULL to know if the add_ref * was successful. */ mutex_enter(&buf->b_evict_lock); if (buf->b_data == NULL) { mutex_exit(&buf->b_evict_lock); return; } hash_lock = HDR_LOCK(buf->b_hdr); mutex_enter(hash_lock); hdr = buf->b_hdr; ASSERT(HDR_HAS_L1HDR(hdr)); ASSERT3P(hash_lock, ==, HDR_LOCK(hdr)); mutex_exit(&buf->b_evict_lock); ASSERT(hdr->b_l1hdr.b_state == arc_mru || hdr->b_l1hdr.b_state == arc_mfu); add_reference(hdr, hash_lock, tag); DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr); arc_access(hdr, hash_lock); mutex_exit(hash_lock); ARCSTAT_BUMP(arcstat_hits); ARCSTAT_CONDSTAT(!HDR_PREFETCH(hdr), demand, prefetch, !HDR_ISTYPE_METADATA(hdr), data, metadata, hits); } static void arc_buf_free_on_write(void *data, size_t size, void (*free_func)(void *, size_t)) { l2arc_data_free_t *df; df = kmem_alloc(sizeof (*df), KM_SLEEP); df->l2df_data = data; df->l2df_size = size; df->l2df_func = free_func; mutex_enter(&l2arc_free_on_write_mtx); list_insert_head(l2arc_free_on_write, df); mutex_exit(&l2arc_free_on_write_mtx); } /* * Free the arc data buffer. If it is an l2arc write in progress, * the buffer is placed on l2arc_free_on_write to be freed later. */ static void arc_buf_data_free(arc_buf_t *buf, void (*free_func)(void *, size_t)) { arc_buf_hdr_t *hdr = buf->b_hdr; if (HDR_L2_WRITING(hdr)) { arc_buf_free_on_write(buf->b_data, hdr->b_size, free_func); ARCSTAT_BUMP(arcstat_l2_free_on_write); } else { free_func(buf->b_data, hdr->b_size); } } static void arc_buf_l2_cdata_free(arc_buf_hdr_t *hdr) { size_t align, asize, len; ASSERT(HDR_HAS_L2HDR(hdr)); ASSERT(MUTEX_HELD(&hdr->b_l2hdr.b_dev->l2ad_mtx)); /* * The b_tmp_cdata field is linked off of the b_l1hdr, so if * that doesn't exist, the header is in the arc_l2c_only state, * and there isn't anything to free (it's already been freed). */ if (!HDR_HAS_L1HDR(hdr)) return; /* * The header isn't being written to the l2arc device, thus it * shouldn't have a b_tmp_cdata to free. */ if (!HDR_L2_WRITING(hdr)) { ASSERT3P(hdr->b_l1hdr.b_tmp_cdata, ==, NULL); return; } /* * The bufer has been chosen for writing to L2ARC, but it's * not being written just yet. In other words, * b_tmp_cdata points to exactly the same buffer as b_data, * l2arc_transform_buf hasn't been called. */ if (hdr->b_l2hdr.b_daddr == L2ARC_ADDR_UNSET) { ASSERT3P(hdr->b_l1hdr.b_tmp_cdata, ==, hdr->b_l1hdr.b_buf->b_data); ASSERT3U(hdr->b_l2hdr.b_compress, ==, ZIO_COMPRESS_OFF); hdr->b_l1hdr.b_tmp_cdata = NULL; return; } /* * There's nothing to free since the buffer was all zero's and * compressed to a zero length buffer. */ if (hdr->b_l2hdr.b_compress == ZIO_COMPRESS_EMPTY) { ASSERT3P(hdr->b_l1hdr.b_tmp_cdata, ==, NULL); return; } /* * Nothing to do if the temporary buffer was not required. */ if (hdr->b_l1hdr.b_tmp_cdata == NULL) return; ARCSTAT_BUMP(arcstat_l2_cdata_free_on_write); len = hdr->b_size; align = (size_t)1 << hdr->b_l2hdr.b_dev->l2ad_vdev->vdev_ashift; asize = P2ROUNDUP(len, align); arc_buf_free_on_write(hdr->b_l1hdr.b_tmp_cdata, asize, zio_data_buf_free); hdr->b_l1hdr.b_tmp_cdata = NULL; } /* * Free up buf->b_data and if 'remove' is set, then pull the * arc_buf_t off of the the arc_buf_hdr_t's list and free it. */ static void arc_buf_destroy(arc_buf_t *buf, boolean_t remove) { arc_buf_t **bufp; /* free up data associated with the buf */ if (buf->b_data != NULL) { arc_state_t *state = buf->b_hdr->b_l1hdr.b_state; uint64_t size = buf->b_hdr->b_size; arc_buf_contents_t type = arc_buf_type(buf->b_hdr); arc_cksum_verify(buf); #ifdef illumos arc_buf_unwatch(buf); #endif if (type == ARC_BUFC_METADATA) { arc_buf_data_free(buf, zio_buf_free); arc_space_return(size, ARC_SPACE_META); } else { ASSERT(type == ARC_BUFC_DATA); arc_buf_data_free(buf, zio_data_buf_free); arc_space_return(size, ARC_SPACE_DATA); } /* protected by hash lock, if in the hash table */ if (multilist_link_active(&buf->b_hdr->b_l1hdr.b_arc_node)) { uint64_t *cnt = &state->arcs_lsize[type]; ASSERT(refcount_is_zero( &buf->b_hdr->b_l1hdr.b_refcnt)); ASSERT(state != arc_anon && state != arc_l2c_only); ASSERT3U(*cnt, >=, size); atomic_add_64(cnt, -size); } (void) refcount_remove_many(&state->arcs_size, size, buf); buf->b_data = NULL; /* * If we're destroying a duplicate buffer make sure * that the appropriate statistics are updated. */ if (buf->b_hdr->b_l1hdr.b_datacnt > 1 && HDR_ISTYPE_DATA(buf->b_hdr)) { ARCSTAT_BUMPDOWN(arcstat_duplicate_buffers); ARCSTAT_INCR(arcstat_duplicate_buffers_size, -size); } ASSERT(buf->b_hdr->b_l1hdr.b_datacnt > 0); buf->b_hdr->b_l1hdr.b_datacnt -= 1; } /* only remove the buf if requested */ if (!remove) return; /* remove the buf from the hdr list */ for (bufp = &buf->b_hdr->b_l1hdr.b_buf; *bufp != buf; bufp = &(*bufp)->b_next) continue; *bufp = buf->b_next; buf->b_next = NULL; ASSERT(buf->b_efunc == NULL); /* clean up the buf */ buf->b_hdr = NULL; kmem_cache_free(buf_cache, buf); } static void arc_hdr_l2hdr_destroy(arc_buf_hdr_t *hdr) { l2arc_buf_hdr_t *l2hdr = &hdr->b_l2hdr; l2arc_dev_t *dev = l2hdr->b_dev; ASSERT(MUTEX_HELD(&dev->l2ad_mtx)); ASSERT(HDR_HAS_L2HDR(hdr)); list_remove(&dev->l2ad_buflist, hdr); /* * We don't want to leak the b_tmp_cdata buffer that was * allocated in l2arc_write_buffers() */ arc_buf_l2_cdata_free(hdr); /* * If the l2hdr's b_daddr is equal to L2ARC_ADDR_UNSET, then * this header is being processed by l2arc_write_buffers() (i.e. * it's in the first stage of l2arc_write_buffers()). * Re-affirming that truth here, just to serve as a reminder. If * b_daddr does not equal L2ARC_ADDR_UNSET, then the header may or * may not have its HDR_L2_WRITING flag set. (the write may have * completed, in which case HDR_L2_WRITING will be false and the * b_daddr field will point to the address of the buffer on disk). */ IMPLY(l2hdr->b_daddr == L2ARC_ADDR_UNSET, HDR_L2_WRITING(hdr)); /* * If b_daddr is equal to L2ARC_ADDR_UNSET, we're racing with * l2arc_write_buffers(). Since we've just removed this header * from the l2arc buffer list, this header will never reach the * second stage of l2arc_write_buffers(), which increments the * accounting stats for this header. Thus, we must be careful * not to decrement them for this header either. */ if (l2hdr->b_daddr != L2ARC_ADDR_UNSET) { ARCSTAT_INCR(arcstat_l2_asize, -l2hdr->b_asize); ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size); vdev_space_update(dev->l2ad_vdev, -l2hdr->b_asize, 0, 0); (void) refcount_remove_many(&dev->l2ad_alloc, l2hdr->b_asize, hdr); } hdr->b_flags &= ~ARC_FLAG_HAS_L2HDR; } static void arc_hdr_destroy(arc_buf_hdr_t *hdr) { if (HDR_HAS_L1HDR(hdr)) { ASSERT(hdr->b_l1hdr.b_buf == NULL || hdr->b_l1hdr.b_datacnt > 0); ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon); } ASSERT(!HDR_IO_IN_PROGRESS(hdr)); ASSERT(!HDR_IN_HASH_TABLE(hdr)); if (HDR_HAS_L2HDR(hdr)) { l2arc_dev_t *dev = hdr->b_l2hdr.b_dev; boolean_t buflist_held = MUTEX_HELD(&dev->l2ad_mtx); if (!buflist_held) mutex_enter(&dev->l2ad_mtx); /* * Even though we checked this conditional above, we * need to check this again now that we have the * l2ad_mtx. This is because we could be racing with * another thread calling l2arc_evict() which might have * destroyed this header's L2 portion as we were waiting * to acquire the l2ad_mtx. If that happens, we don't * want to re-destroy the header's L2 portion. */ if (HDR_HAS_L2HDR(hdr)) { l2arc_trim(hdr); arc_hdr_l2hdr_destroy(hdr); } if (!buflist_held) mutex_exit(&dev->l2ad_mtx); } if (!BUF_EMPTY(hdr)) buf_discard_identity(hdr); if (hdr->b_freeze_cksum != NULL) { kmem_free(hdr->b_freeze_cksum, sizeof (zio_cksum_t)); hdr->b_freeze_cksum = NULL; } if (HDR_HAS_L1HDR(hdr)) { while (hdr->b_l1hdr.b_buf) { arc_buf_t *buf = hdr->b_l1hdr.b_buf; if (buf->b_efunc != NULL) { mutex_enter(&arc_user_evicts_lock); mutex_enter(&buf->b_evict_lock); ASSERT(buf->b_hdr != NULL); arc_buf_destroy(hdr->b_l1hdr.b_buf, FALSE); hdr->b_l1hdr.b_buf = buf->b_next; buf->b_hdr = &arc_eviction_hdr; buf->b_next = arc_eviction_list; arc_eviction_list = buf; mutex_exit(&buf->b_evict_lock); cv_signal(&arc_user_evicts_cv); mutex_exit(&arc_user_evicts_lock); } else { arc_buf_destroy(hdr->b_l1hdr.b_buf, TRUE); } } #ifdef ZFS_DEBUG if (hdr->b_l1hdr.b_thawed != NULL) { kmem_free(hdr->b_l1hdr.b_thawed, 1); hdr->b_l1hdr.b_thawed = NULL; } #endif } ASSERT3P(hdr->b_hash_next, ==, NULL); if (HDR_HAS_L1HDR(hdr)) { ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node)); ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL); kmem_cache_free(hdr_full_cache, hdr); } else { kmem_cache_free(hdr_l2only_cache, hdr); } } void arc_buf_free(arc_buf_t *buf, void *tag) { arc_buf_hdr_t *hdr = buf->b_hdr; int hashed = hdr->b_l1hdr.b_state != arc_anon; ASSERT(buf->b_efunc == NULL); ASSERT(buf->b_data != NULL); if (hashed) { kmutex_t *hash_lock = HDR_LOCK(hdr); mutex_enter(hash_lock); hdr = buf->b_hdr; ASSERT3P(hash_lock, ==, HDR_LOCK(hdr)); (void) remove_reference(hdr, hash_lock, tag); if (hdr->b_l1hdr.b_datacnt > 1) { arc_buf_destroy(buf, TRUE); } else { ASSERT(buf == hdr->b_l1hdr.b_buf); ASSERT(buf->b_efunc == NULL); hdr->b_flags |= ARC_FLAG_BUF_AVAILABLE; } mutex_exit(hash_lock); } else if (HDR_IO_IN_PROGRESS(hdr)) { int destroy_hdr; /* * We are in the middle of an async write. Don't destroy * this buffer unless the write completes before we finish * decrementing the reference count. */ mutex_enter(&arc_user_evicts_lock); (void) remove_reference(hdr, NULL, tag); ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); destroy_hdr = !HDR_IO_IN_PROGRESS(hdr); mutex_exit(&arc_user_evicts_lock); if (destroy_hdr) arc_hdr_destroy(hdr); } else { if (remove_reference(hdr, NULL, tag) > 0) arc_buf_destroy(buf, TRUE); else arc_hdr_destroy(hdr); } } boolean_t arc_buf_remove_ref(arc_buf_t *buf, void* tag) { arc_buf_hdr_t *hdr = buf->b_hdr; kmutex_t *hash_lock = HDR_LOCK(hdr); boolean_t no_callback = (buf->b_efunc == NULL); if (hdr->b_l1hdr.b_state == arc_anon) { ASSERT(hdr->b_l1hdr.b_datacnt == 1); arc_buf_free(buf, tag); return (no_callback); } mutex_enter(hash_lock); hdr = buf->b_hdr; ASSERT(hdr->b_l1hdr.b_datacnt > 0); ASSERT3P(hash_lock, ==, HDR_LOCK(hdr)); ASSERT(hdr->b_l1hdr.b_state != arc_anon); ASSERT(buf->b_data != NULL); (void) remove_reference(hdr, hash_lock, tag); if (hdr->b_l1hdr.b_datacnt > 1) { if (no_callback) arc_buf_destroy(buf, TRUE); } else if (no_callback) { ASSERT(hdr->b_l1hdr.b_buf == buf && buf->b_next == NULL); ASSERT(buf->b_efunc == NULL); hdr->b_flags |= ARC_FLAG_BUF_AVAILABLE; } ASSERT(no_callback || hdr->b_l1hdr.b_datacnt > 1 || refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); mutex_exit(hash_lock); return (no_callback); } int32_t arc_buf_size(arc_buf_t *buf) { return (buf->b_hdr->b_size); } /* * Called from the DMU to determine if the current buffer should be * evicted. In order to ensure proper locking, the eviction must be initiated * from the DMU. Return true if the buffer is associated with user data and * duplicate buffers still exist. */ boolean_t arc_buf_eviction_needed(arc_buf_t *buf) { arc_buf_hdr_t *hdr; boolean_t evict_needed = B_FALSE; if (zfs_disable_dup_eviction) return (B_FALSE); mutex_enter(&buf->b_evict_lock); hdr = buf->b_hdr; if (hdr == NULL) { /* * We are in arc_do_user_evicts(); let that function * perform the eviction. */ ASSERT(buf->b_data == NULL); mutex_exit(&buf->b_evict_lock); return (B_FALSE); } else if (buf->b_data == NULL) { /* * We have already been added to the arc eviction list; * recommend eviction. */ ASSERT3P(hdr, ==, &arc_eviction_hdr); mutex_exit(&buf->b_evict_lock); return (B_TRUE); } if (hdr->b_l1hdr.b_datacnt > 1 && HDR_ISTYPE_DATA(hdr)) evict_needed = B_TRUE; mutex_exit(&buf->b_evict_lock); return (evict_needed); } /* * Evict the arc_buf_hdr that is provided as a parameter. The resultant * state of the header is dependent on it's state prior to entering this * function. The following transitions are possible: * * - arc_mru -> arc_mru_ghost * - arc_mfu -> arc_mfu_ghost * - arc_mru_ghost -> arc_l2c_only * - arc_mru_ghost -> deleted * - arc_mfu_ghost -> arc_l2c_only * - arc_mfu_ghost -> deleted */ static int64_t arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) { arc_state_t *evicted_state, *state; int64_t bytes_evicted = 0; ASSERT(MUTEX_HELD(hash_lock)); ASSERT(HDR_HAS_L1HDR(hdr)); state = hdr->b_l1hdr.b_state; if (GHOST_STATE(state)) { ASSERT(!HDR_IO_IN_PROGRESS(hdr)); ASSERT(hdr->b_l1hdr.b_buf == NULL); /* * l2arc_write_buffers() relies on a header's L1 portion * (i.e. it's b_tmp_cdata field) during it's write phase. * Thus, we cannot push a header onto the arc_l2c_only * state (removing it's L1 piece) until the header is * done being written to the l2arc. */ if (HDR_HAS_L2HDR(hdr) && HDR_L2_WRITING(hdr)) { ARCSTAT_BUMP(arcstat_evict_l2_skip); return (bytes_evicted); } ARCSTAT_BUMP(arcstat_deleted); bytes_evicted += hdr->b_size; DTRACE_PROBE1(arc__delete, arc_buf_hdr_t *, hdr); if (HDR_HAS_L2HDR(hdr)) { /* * This buffer is cached on the 2nd Level ARC; * don't destroy the header. */ arc_change_state(arc_l2c_only, hdr, hash_lock); /* * dropping from L1+L2 cached to L2-only, * realloc to remove the L1 header. */ hdr = arc_hdr_realloc(hdr, hdr_full_cache, hdr_l2only_cache); } else { arc_change_state(arc_anon, hdr, hash_lock); arc_hdr_destroy(hdr); } return (bytes_evicted); } ASSERT(state == arc_mru || state == arc_mfu); evicted_state = (state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost; /* prefetch buffers have a minimum lifespan */ if (HDR_IO_IN_PROGRESS(hdr) || ((hdr->b_flags & (ARC_FLAG_PREFETCH | ARC_FLAG_INDIRECT)) && ddi_get_lbolt() - hdr->b_l1hdr.b_arc_access < arc_min_prefetch_lifespan)) { ARCSTAT_BUMP(arcstat_evict_skip); return (bytes_evicted); } ASSERT0(refcount_count(&hdr->b_l1hdr.b_refcnt)); ASSERT3U(hdr->b_l1hdr.b_datacnt, >, 0); while (hdr->b_l1hdr.b_buf) { arc_buf_t *buf = hdr->b_l1hdr.b_buf; if (!mutex_tryenter(&buf->b_evict_lock)) { ARCSTAT_BUMP(arcstat_mutex_miss); break; } if (buf->b_data != NULL) bytes_evicted += hdr->b_size; if (buf->b_efunc != NULL) { mutex_enter(&arc_user_evicts_lock); arc_buf_destroy(buf, FALSE); hdr->b_l1hdr.b_buf = buf->b_next; buf->b_hdr = &arc_eviction_hdr; buf->b_next = arc_eviction_list; arc_eviction_list = buf; cv_signal(&arc_user_evicts_cv); mutex_exit(&arc_user_evicts_lock); mutex_exit(&buf->b_evict_lock); } else { mutex_exit(&buf->b_evict_lock); arc_buf_destroy(buf, TRUE); } } if (HDR_HAS_L2HDR(hdr)) { ARCSTAT_INCR(arcstat_evict_l2_cached, hdr->b_size); } else { if (l2arc_write_eligible(hdr->b_spa, hdr)) ARCSTAT_INCR(arcstat_evict_l2_eligible, hdr->b_size); else ARCSTAT_INCR(arcstat_evict_l2_ineligible, hdr->b_size); } if (hdr->b_l1hdr.b_datacnt == 0) { arc_change_state(evicted_state, hdr, hash_lock); ASSERT(HDR_IN_HASH_TABLE(hdr)); hdr->b_flags |= ARC_FLAG_IN_HASH_TABLE; hdr->b_flags &= ~ARC_FLAG_BUF_AVAILABLE; DTRACE_PROBE1(arc__evict, arc_buf_hdr_t *, hdr); } return (bytes_evicted); } static uint64_t arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker, uint64_t spa, int64_t bytes) { multilist_sublist_t *mls; uint64_t bytes_evicted = 0; arc_buf_hdr_t *hdr; kmutex_t *hash_lock; int evict_count = 0; ASSERT3P(marker, !=, NULL); IMPLY(bytes < 0, bytes == ARC_EVICT_ALL); mls = multilist_sublist_lock(ml, idx); for (hdr = multilist_sublist_prev(mls, marker); hdr != NULL; hdr = multilist_sublist_prev(mls, marker)) { if ((bytes != ARC_EVICT_ALL && bytes_evicted >= bytes) || (evict_count >= zfs_arc_evict_batch_limit)) break; /* * To keep our iteration location, move the marker * forward. Since we're not holding hdr's hash lock, we * must be very careful and not remove 'hdr' from the * sublist. Otherwise, other consumers might mistake the * 'hdr' as not being on a sublist when they call the * multilist_link_active() function (they all rely on * the hash lock protecting concurrent insertions and * removals). multilist_sublist_move_forward() was * specifically implemented to ensure this is the case * (only 'marker' will be removed and re-inserted). */ multilist_sublist_move_forward(mls, marker); /* * The only case where the b_spa field should ever be * zero, is the marker headers inserted by * arc_evict_state(). It's possible for multiple threads * to be calling arc_evict_state() concurrently (e.g. * dsl_pool_close() and zio_inject_fault()), so we must * skip any markers we see from these other threads. */ if (hdr->b_spa == 0) continue; /* we're only interested in evicting buffers of a certain spa */ if (spa != 0 && hdr->b_spa != spa) { ARCSTAT_BUMP(arcstat_evict_skip); continue; } hash_lock = HDR_LOCK(hdr); /* * We aren't calling this function from any code path * that would already be holding a hash lock, so we're * asserting on this assumption to be defensive in case * this ever changes. Without this check, it would be * possible to incorrectly increment arcstat_mutex_miss * below (e.g. if the code changed such that we called * this function with a hash lock held). */ ASSERT(!MUTEX_HELD(hash_lock)); if (mutex_tryenter(hash_lock)) { uint64_t evicted = arc_evict_hdr(hdr, hash_lock); mutex_exit(hash_lock); bytes_evicted += evicted; /* * If evicted is zero, arc_evict_hdr() must have * decided to skip this header, don't increment * evict_count in this case. */ if (evicted != 0) evict_count++; /* * If arc_size isn't overflowing, signal any * threads that might happen to be waiting. * * For each header evicted, we wake up a single * thread. If we used cv_broadcast, we could * wake up "too many" threads causing arc_size * to significantly overflow arc_c; since * arc_get_data_buf() doesn't check for overflow * when it's woken up (it doesn't because it's * possible for the ARC to be overflowing while * full of un-evictable buffers, and the * function should proceed in this case). * * If threads are left sleeping, due to not * using cv_broadcast, they will be woken up * just before arc_reclaim_thread() sleeps. */ mutex_enter(&arc_reclaim_lock); if (!arc_is_overflowing()) cv_signal(&arc_reclaim_waiters_cv); mutex_exit(&arc_reclaim_lock); } else { ARCSTAT_BUMP(arcstat_mutex_miss); } } multilist_sublist_unlock(mls); return (bytes_evicted); } /* * Evict buffers from the given arc state, until we've removed the * specified number of bytes. Move the removed buffers to the * appropriate evict state. * * This function makes a "best effort". It skips over any buffers * it can't get a hash_lock on, and so, may not catch all candidates. * It may also return without evicting as much space as requested. * * If bytes is specified using the special value ARC_EVICT_ALL, this * will evict all available (i.e. unlocked and evictable) buffers from * the given arc state; which is used by arc_flush(). */ static uint64_t arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes, arc_buf_contents_t type) { uint64_t total_evicted = 0; multilist_t *ml = &state->arcs_list[type]; int num_sublists; arc_buf_hdr_t **markers; IMPLY(bytes < 0, bytes == ARC_EVICT_ALL); num_sublists = multilist_get_num_sublists(ml); /* * If we've tried to evict from each sublist, made some * progress, but still have not hit the target number of bytes * to evict, we want to keep trying. The markers allow us to * pick up where we left off for each individual sublist, rather * than starting from the tail each time. */ markers = kmem_zalloc(sizeof (*markers) * num_sublists, KM_SLEEP); for (int i = 0; i < num_sublists; i++) { markers[i] = kmem_cache_alloc(hdr_full_cache, KM_SLEEP); /* * A b_spa of 0 is used to indicate that this header is * a marker. This fact is used in arc_adjust_type() and * arc_evict_state_impl(). */ markers[i]->b_spa = 0; multilist_sublist_t *mls = multilist_sublist_lock(ml, i); multilist_sublist_insert_tail(mls, markers[i]); multilist_sublist_unlock(mls); } /* * While we haven't hit our target number of bytes to evict, or * we're evicting all available buffers. */ while (total_evicted < bytes || bytes == ARC_EVICT_ALL) { /* * Start eviction using a randomly selected sublist, * this is to try and evenly balance eviction across all * sublists. Always starting at the same sublist * (e.g. index 0) would cause evictions to favor certain * sublists over others. */ int sublist_idx = multilist_get_random_index(ml); uint64_t scan_evicted = 0; for (int i = 0; i < num_sublists; i++) { uint64_t bytes_remaining; uint64_t bytes_evicted; if (bytes == ARC_EVICT_ALL) bytes_remaining = ARC_EVICT_ALL; else if (total_evicted < bytes) bytes_remaining = bytes - total_evicted; else break; bytes_evicted = arc_evict_state_impl(ml, sublist_idx, markers[sublist_idx], spa, bytes_remaining); scan_evicted += bytes_evicted; total_evicted += bytes_evicted; /* we've reached the end, wrap to the beginning */ if (++sublist_idx >= num_sublists) sublist_idx = 0; } /* * If we didn't evict anything during this scan, we have * no reason to believe we'll evict more during another * scan, so break the loop. */ if (scan_evicted == 0) { /* This isn't possible, let's make that obvious */ ASSERT3S(bytes, !=, 0); /* * When bytes is ARC_EVICT_ALL, the only way to * break the loop is when scan_evicted is zero. * In that case, we actually have evicted enough, * so we don't want to increment the kstat. */ if (bytes != ARC_EVICT_ALL) { ASSERT3S(total_evicted, <, bytes); ARCSTAT_BUMP(arcstat_evict_not_enough); } break; } } for (int i = 0; i < num_sublists; i++) { multilist_sublist_t *mls = multilist_sublist_lock(ml, i); multilist_sublist_remove(mls, markers[i]); multilist_sublist_unlock(mls); kmem_cache_free(hdr_full_cache, markers[i]); } kmem_free(markers, sizeof (*markers) * num_sublists); return (total_evicted); } /* * Flush all "evictable" data of the given type from the arc state * specified. This will not evict any "active" buffers (i.e. referenced). * * When 'retry' is set to FALSE, the function will make a single pass * over the state and evict any buffers that it can. Since it doesn't * continually retry the eviction, it might end up leaving some buffers * in the ARC due to lock misses. * * When 'retry' is set to TRUE, the function will continually retry the * eviction until *all* evictable buffers have been removed from the * state. As a result, if concurrent insertions into the state are * allowed (e.g. if the ARC isn't shutting down), this function might * wind up in an infinite loop, continually trying to evict buffers. */ static uint64_t arc_flush_state(arc_state_t *state, uint64_t spa, arc_buf_contents_t type, boolean_t retry) { uint64_t evicted = 0; while (state->arcs_lsize[type] != 0) { evicted += arc_evict_state(state, spa, ARC_EVICT_ALL, type); if (!retry) break; } return (evicted); } /* * Evict the specified number of bytes from the state specified, * restricting eviction to the spa and type given. This function * prevents us from trying to evict more from a state's list than * is "evictable", and to skip evicting altogether when passed a * negative value for "bytes". In contrast, arc_evict_state() will * evict everything it can, when passed a negative value for "bytes". */ static uint64_t arc_adjust_impl(arc_state_t *state, uint64_t spa, int64_t bytes, arc_buf_contents_t type) { int64_t delta; if (bytes > 0 && state->arcs_lsize[type] > 0) { delta = MIN(state->arcs_lsize[type], bytes); return (arc_evict_state(state, spa, delta, type)); } return (0); } /* * Evict metadata buffers from the cache, such that arc_meta_used is * capped by the arc_meta_limit tunable. */ static uint64_t arc_adjust_meta(void) { uint64_t total_evicted = 0; int64_t target; /* * If we're over the meta limit, we want to evict enough * metadata to get back under the meta limit. We don't want to * evict so much that we drop the MRU below arc_p, though. If * we're over the meta limit more than we're over arc_p, we * evict some from the MRU here, and some from the MFU below. */ target = MIN((int64_t)(arc_meta_used - arc_meta_limit), (int64_t)(refcount_count(&arc_anon->arcs_size) + refcount_count(&arc_mru->arcs_size) - arc_p)); total_evicted += arc_adjust_impl(arc_mru, 0, target, ARC_BUFC_METADATA); /* * Similar to the above, we want to evict enough bytes to get us * below the meta limit, but not so much as to drop us below the * space alloted to the MFU (which is defined as arc_c - arc_p). */ target = MIN((int64_t)(arc_meta_used - arc_meta_limit), (int64_t)(refcount_count(&arc_mfu->arcs_size) - (arc_c - arc_p))); total_evicted += arc_adjust_impl(arc_mfu, 0, target, ARC_BUFC_METADATA); return (total_evicted); } /* * Return the type of the oldest buffer in the given arc state * * This function will select a random sublist of type ARC_BUFC_DATA and * a random sublist of type ARC_BUFC_METADATA. The tail of each sublist * is compared, and the type which contains the "older" buffer will be * returned. */ static arc_buf_contents_t arc_adjust_type(arc_state_t *state) { multilist_t *data_ml = &state->arcs_list[ARC_BUFC_DATA]; multilist_t *meta_ml = &state->arcs_list[ARC_BUFC_METADATA]; int data_idx = multilist_get_random_index(data_ml); int meta_idx = multilist_get_random_index(meta_ml); multilist_sublist_t *data_mls; multilist_sublist_t *meta_mls; arc_buf_contents_t type; arc_buf_hdr_t *data_hdr; arc_buf_hdr_t *meta_hdr; /* * We keep the sublist lock until we're finished, to prevent * the headers from being destroyed via arc_evict_state(). */ data_mls = multilist_sublist_lock(data_ml, data_idx); meta_mls = multilist_sublist_lock(meta_ml, meta_idx); /* * These two loops are to ensure we skip any markers that * might be at the tail of the lists due to arc_evict_state(). */ for (data_hdr = multilist_sublist_tail(data_mls); data_hdr != NULL; data_hdr = multilist_sublist_prev(data_mls, data_hdr)) { if (data_hdr->b_spa != 0) break; } for (meta_hdr = multilist_sublist_tail(meta_mls); meta_hdr != NULL; meta_hdr = multilist_sublist_prev(meta_mls, meta_hdr)) { if (meta_hdr->b_spa != 0) break; } if (data_hdr == NULL && meta_hdr == NULL) { type = ARC_BUFC_DATA; } else if (data_hdr == NULL) { ASSERT3P(meta_hdr, !=, NULL); type = ARC_BUFC_METADATA; } else if (meta_hdr == NULL) { ASSERT3P(data_hdr, !=, NULL); type = ARC_BUFC_DATA; } else { ASSERT3P(data_hdr, !=, NULL); ASSERT3P(meta_hdr, !=, NULL); /* The headers can't be on the sublist without an L1 header */ ASSERT(HDR_HAS_L1HDR(data_hdr)); ASSERT(HDR_HAS_L1HDR(meta_hdr)); if (data_hdr->b_l1hdr.b_arc_access < meta_hdr->b_l1hdr.b_arc_access) { type = ARC_BUFC_DATA; } else { type = ARC_BUFC_METADATA; } } multilist_sublist_unlock(meta_mls); multilist_sublist_unlock(data_mls); return (type); } /* * Evict buffers from the cache, such that arc_size is capped by arc_c. */ static uint64_t arc_adjust(void) { uint64_t total_evicted = 0; uint64_t bytes; int64_t target; /* * If we're over arc_meta_limit, we want to correct that before * potentially evicting data buffers below. */ total_evicted += arc_adjust_meta(); /* * Adjust MRU size * * If we're over the target cache size, we want to evict enough * from the list to get back to our target size. We don't want * to evict too much from the MRU, such that it drops below * arc_p. So, if we're over our target cache size more than * the MRU is over arc_p, we'll evict enough to get back to * arc_p here, and then evict more from the MFU below. */ target = MIN((int64_t)(arc_size - arc_c), (int64_t)(refcount_count(&arc_anon->arcs_size) + refcount_count(&arc_mru->arcs_size) + arc_meta_used - arc_p)); /* * If we're below arc_meta_min, always prefer to evict data. * Otherwise, try to satisfy the requested number of bytes to * evict from the type which contains older buffers; in an * effort to keep newer buffers in the cache regardless of their * type. If we cannot satisfy the number of bytes from this * type, spill over into the next type. */ if (arc_adjust_type(arc_mru) == ARC_BUFC_METADATA && arc_meta_used > arc_meta_min) { bytes = arc_adjust_impl(arc_mru, 0, target, ARC_BUFC_METADATA); total_evicted += bytes; /* * If we couldn't evict our target number of bytes from * metadata, we try to get the rest from data. */ target -= bytes; total_evicted += arc_adjust_impl(arc_mru, 0, target, ARC_BUFC_DATA); } else { bytes = arc_adjust_impl(arc_mru, 0, target, ARC_BUFC_DATA); total_evicted += bytes; /* * If we couldn't evict our target number of bytes from * data, we try to get the rest from metadata. */ target -= bytes; total_evicted += arc_adjust_impl(arc_mru, 0, target, ARC_BUFC_METADATA); } /* * Adjust MFU size * * Now that we've tried to evict enough from the MRU to get its * size back to arc_p, if we're still above the target cache * size, we evict the rest from the MFU. */ target = arc_size - arc_c; if (arc_adjust_type(arc_mfu) == ARC_BUFC_METADATA && arc_meta_used > arc_meta_min) { bytes = arc_adjust_impl(arc_mfu, 0, target, ARC_BUFC_METADATA); total_evicted += bytes; /* * If we couldn't evict our target number of bytes from * metadata, we try to get the rest from data. */ target -= bytes; total_evicted += arc_adjust_impl(arc_mfu, 0, target, ARC_BUFC_DATA); } else { bytes = arc_adjust_impl(arc_mfu, 0, target, ARC_BUFC_DATA); total_evicted += bytes; /* * If we couldn't evict our target number of bytes from * data, we try to get the rest from data. */ target -= bytes; total_evicted += arc_adjust_impl(arc_mfu, 0, target, ARC_BUFC_METADATA); } /* * Adjust ghost lists * * In addition to the above, the ARC also defines target values * for the ghost lists. The sum of the mru list and mru ghost * list should never exceed the target size of the cache, and * the sum of the mru list, mfu list, mru ghost list, and mfu * ghost list should never exceed twice the target size of the * cache. The following logic enforces these limits on the ghost * caches, and evicts from them as needed. */ target = refcount_count(&arc_mru->arcs_size) + refcount_count(&arc_mru_ghost->arcs_size) - arc_c; bytes = arc_adjust_impl(arc_mru_ghost, 0, target, ARC_BUFC_DATA); total_evicted += bytes; target -= bytes; total_evicted += arc_adjust_impl(arc_mru_ghost, 0, target, ARC_BUFC_METADATA); /* * We assume the sum of the mru list and mfu list is less than * or equal to arc_c (we enforced this above), which means we * can use the simpler of the two equations below: * * mru + mfu + mru ghost + mfu ghost <= 2 * arc_c * mru ghost + mfu ghost <= arc_c */ target = refcount_count(&arc_mru_ghost->arcs_size) + refcount_count(&arc_mfu_ghost->arcs_size) - arc_c; bytes = arc_adjust_impl(arc_mfu_ghost, 0, target, ARC_BUFC_DATA); total_evicted += bytes; target -= bytes; total_evicted += arc_adjust_impl(arc_mfu_ghost, 0, target, ARC_BUFC_METADATA); return (total_evicted); } static void arc_do_user_evicts(void) { mutex_enter(&arc_user_evicts_lock); while (arc_eviction_list != NULL) { arc_buf_t *buf = arc_eviction_list; arc_eviction_list = buf->b_next; mutex_enter(&buf->b_evict_lock); buf->b_hdr = NULL; mutex_exit(&buf->b_evict_lock); mutex_exit(&arc_user_evicts_lock); if (buf->b_efunc != NULL) VERIFY0(buf->b_efunc(buf->b_private)); buf->b_efunc = NULL; buf->b_private = NULL; kmem_cache_free(buf_cache, buf); mutex_enter(&arc_user_evicts_lock); } mutex_exit(&arc_user_evicts_lock); } void arc_flush(spa_t *spa, boolean_t retry) { uint64_t guid = 0; /* * If retry is TRUE, a spa must not be specified since we have * no good way to determine if all of a spa's buffers have been * evicted from an arc state. */ ASSERT(!retry || spa == 0); if (spa != NULL) guid = spa_load_guid(spa); (void) arc_flush_state(arc_mru, guid, ARC_BUFC_DATA, retry); (void) arc_flush_state(arc_mru, guid, ARC_BUFC_METADATA, retry); (void) arc_flush_state(arc_mfu, guid, ARC_BUFC_DATA, retry); (void) arc_flush_state(arc_mfu, guid, ARC_BUFC_METADATA, retry); (void) arc_flush_state(arc_mru_ghost, guid, ARC_BUFC_DATA, retry); (void) arc_flush_state(arc_mru_ghost, guid, ARC_BUFC_METADATA, retry); (void) arc_flush_state(arc_mfu_ghost, guid, ARC_BUFC_DATA, retry); (void) arc_flush_state(arc_mfu_ghost, guid, ARC_BUFC_METADATA, retry); arc_do_user_evicts(); ASSERT(spa || arc_eviction_list == NULL); } void arc_shrink(int64_t to_free) { if (arc_c > arc_c_min) { DTRACE_PROBE4(arc__shrink, uint64_t, arc_c, uint64_t, arc_c_min, uint64_t, arc_p, uint64_t, to_free); if (arc_c > arc_c_min + to_free) atomic_add_64(&arc_c, -to_free); else arc_c = arc_c_min; atomic_add_64(&arc_p, -(arc_p >> arc_shrink_shift)); if (arc_c > arc_size) arc_c = MAX(arc_size, arc_c_min); if (arc_p > arc_c) arc_p = (arc_c >> 1); DTRACE_PROBE2(arc__shrunk, uint64_t, arc_c, uint64_t, arc_p); ASSERT(arc_c >= arc_c_min); ASSERT((int64_t)arc_p >= 0); } if (arc_size > arc_c) { DTRACE_PROBE2(arc__shrink_adjust, uint64_t, arc_size, uint64_t, arc_c); (void) arc_adjust(); } } static long needfree = 0; typedef enum free_memory_reason_t { FMR_UNKNOWN, FMR_NEEDFREE, FMR_LOTSFREE, FMR_SWAPFS_MINFREE, FMR_PAGES_PP_MAXIMUM, FMR_HEAP_ARENA, FMR_ZIO_ARENA, FMR_ZIO_FRAG, } free_memory_reason_t; int64_t last_free_memory; free_memory_reason_t last_free_reason; /* * Additional reserve of pages for pp_reserve. */ int64_t arc_pages_pp_reserve = 64; /* * Additional reserve of pages for swapfs. */ int64_t arc_swapfs_reserve = 64; /* * Return the amount of memory that can be consumed before reclaim will be * needed. Positive if there is sufficient free memory, negative indicates * the amount of memory that needs to be freed up. */ static int64_t arc_available_memory(void) { int64_t lowest = INT64_MAX; int64_t n; free_memory_reason_t r = FMR_UNKNOWN; #ifdef _KERNEL if (needfree > 0) { n = PAGESIZE * (-needfree); if (n < lowest) { lowest = n; r = FMR_NEEDFREE; } } /* * Cooperate with pagedaemon when it's time for it to scan * and reclaim some pages. */ n = PAGESIZE * ((int64_t)freemem - zfs_arc_free_target); if (n < lowest) { lowest = n; r = FMR_LOTSFREE; } #ifdef illumos /* * check that we're out of range of the pageout scanner. It starts to * schedule paging if freemem is less than lotsfree and needfree. * lotsfree is the high-water mark for pageout, and needfree is the * number of needed free pages. We add extra pages here to make sure * the scanner doesn't start up while we're freeing memory. */ n = PAGESIZE * (freemem - lotsfree - needfree - desfree); if (n < lowest) { lowest = n; r = FMR_LOTSFREE; } /* * check to make sure that swapfs has enough space so that anon * reservations can still succeed. anon_resvmem() checks that the * availrmem is greater than swapfs_minfree, and the number of reserved * swap pages. We also add a bit of extra here just to prevent * circumstances from getting really dire. */ n = PAGESIZE * (availrmem - swapfs_minfree - swapfs_reserve - desfree - arc_swapfs_reserve); if (n < lowest) { lowest = n; r = FMR_SWAPFS_MINFREE; } /* * Check that we have enough availrmem that memory locking (e.g., via * mlock(3C) or memcntl(2)) can still succeed. (pages_pp_maximum * stores the number of pages that cannot be locked; when availrmem * drops below pages_pp_maximum, page locking mechanisms such as * page_pp_lock() will fail.) */ n = PAGESIZE * (availrmem - pages_pp_maximum - arc_pages_pp_reserve); if (n < lowest) { lowest = n; r = FMR_PAGES_PP_MAXIMUM; } #endif /* illumos */ #if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC) /* * If we're on an i386 platform, it's possible that we'll exhaust the * kernel heap space before we ever run out of available physical * memory. Most checks of the size of the heap_area compare against * tune.t_minarmem, which is the minimum available real memory that we * can have in the system. However, this is generally fixed at 25 pages * which is so low that it's useless. In this comparison, we seek to * calculate the total heap-size, and reclaim if more than 3/4ths of the * heap is allocated. (Or, in the calculation, if less than 1/4th is * free) */ n = (int64_t)vmem_size(heap_arena, VMEM_FREE) - (vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC) >> 2); if (n < lowest) { lowest = n; r = FMR_HEAP_ARENA; } #define zio_arena NULL #else #define zio_arena heap_arena #endif /* * If zio data pages are being allocated out of a separate heap segment, * then enforce that the size of available vmem for this arena remains * above about 1/16th free. * * Note: The 1/16th arena free requirement was put in place * to aggressively evict memory from the arc in order to avoid * memory fragmentation issues. */ if (zio_arena != NULL) { n = (int64_t)vmem_size(zio_arena, VMEM_FREE) - (vmem_size(zio_arena, VMEM_ALLOC) >> 4); if (n < lowest) { lowest = n; r = FMR_ZIO_ARENA; } } /* * Above limits know nothing about real level of KVA fragmentation. * Start aggressive reclamation if too little sequential KVA left. */ if (lowest > 0) { n = (vmem_size(heap_arena, VMEM_MAXFREE) < zfs_max_recordsize) ? -((int64_t)vmem_size(heap_arena, VMEM_ALLOC) >> 4) : INT64_MAX; if (n < lowest) { lowest = n; r = FMR_ZIO_FRAG; } } #else /* _KERNEL */ /* Every 100 calls, free a small amount */ if (spa_get_random(100) == 0) lowest = -1024; #endif /* _KERNEL */ last_free_memory = lowest; last_free_reason = r; DTRACE_PROBE2(arc__available_memory, int64_t, lowest, int, r); return (lowest); } /* * Determine if the system is under memory pressure and is asking * to reclaim memory. A return value of TRUE indicates that the system * is under memory pressure and that the arc should adjust accordingly. */ static boolean_t arc_reclaim_needed(void) { return (arc_available_memory() < 0); } extern kmem_cache_t *zio_buf_cache[]; extern kmem_cache_t *zio_data_buf_cache[]; extern kmem_cache_t *range_seg_cache; static __noinline void arc_kmem_reap_now(void) { size_t i; kmem_cache_t *prev_cache = NULL; kmem_cache_t *prev_data_cache = NULL; DTRACE_PROBE(arc__kmem_reap_start); #ifdef _KERNEL if (arc_meta_used >= arc_meta_limit) { /* * We are exceeding our meta-data cache limit. * Purge some DNLC entries to release holds on meta-data. */ dnlc_reduce_cache((void *)(uintptr_t)arc_reduce_dnlc_percent); } #if defined(__i386) /* * Reclaim unused memory from all kmem caches. */ kmem_reap(); #endif #endif for (i = 0; i < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; i++) { if (zio_buf_cache[i] != prev_cache) { prev_cache = zio_buf_cache[i]; kmem_cache_reap_now(zio_buf_cache[i]); } if (zio_data_buf_cache[i] != prev_data_cache) { prev_data_cache = zio_data_buf_cache[i]; kmem_cache_reap_now(zio_data_buf_cache[i]); } } kmem_cache_reap_now(buf_cache); kmem_cache_reap_now(hdr_full_cache); kmem_cache_reap_now(hdr_l2only_cache); kmem_cache_reap_now(range_seg_cache); #ifdef illumos if (zio_arena != NULL) { /* * Ask the vmem arena to reclaim unused memory from its * quantum caches. */ vmem_qcache_reap(zio_arena); } #endif DTRACE_PROBE(arc__kmem_reap_end); } /* * Threads can block in arc_get_data_buf() waiting for this thread to evict * enough data and signal them to proceed. When this happens, the threads in * arc_get_data_buf() are sleeping while holding the hash lock for their * particular arc header. Thus, we must be careful to never sleep on a * hash lock in this thread. This is to prevent the following deadlock: * * - Thread A sleeps on CV in arc_get_data_buf() holding hash lock "L", * waiting for the reclaim thread to signal it. * * - arc_reclaim_thread() tries to acquire hash lock "L" using mutex_enter, * fails, and goes to sleep forever. * * This possible deadlock is avoided by always acquiring a hash lock * using mutex_tryenter() from arc_reclaim_thread(). */ static void arc_reclaim_thread(void *dummy __unused) { hrtime_t growtime = 0; callb_cpr_t cpr; CALLB_CPR_INIT(&cpr, &arc_reclaim_lock, callb_generic_cpr, FTAG); mutex_enter(&arc_reclaim_lock); while (!arc_reclaim_thread_exit) { int64_t free_memory = arc_available_memory(); uint64_t evicted = 0; mutex_exit(&arc_reclaim_lock); if (free_memory < 0) { arc_no_grow = B_TRUE; arc_warm = B_TRUE; /* * Wait at least zfs_grow_retry (default 60) seconds * before considering growing. */ growtime = gethrtime() + SEC2NSEC(arc_grow_retry); arc_kmem_reap_now(); /* * If we are still low on memory, shrink the ARC * so that we have arc_shrink_min free space. */ free_memory = arc_available_memory(); int64_t to_free = (arc_c >> arc_shrink_shift) - free_memory; if (to_free > 0) { #ifdef _KERNEL to_free = MAX(to_free, ptob(needfree)); #endif arc_shrink(to_free); } } else if (free_memory < arc_c >> arc_no_grow_shift) { arc_no_grow = B_TRUE; } else if (gethrtime() >= growtime) { arc_no_grow = B_FALSE; } evicted = arc_adjust(); mutex_enter(&arc_reclaim_lock); /* * If evicted is zero, we couldn't evict anything via * arc_adjust(). This could be due to hash lock * collisions, but more likely due to the majority of * arc buffers being unevictable. Therefore, even if * arc_size is above arc_c, another pass is unlikely to * be helpful and could potentially cause us to enter an * infinite loop. */ if (arc_size <= arc_c || evicted == 0) { #ifdef _KERNEL needfree = 0; #endif /* * We're either no longer overflowing, or we * can't evict anything more, so we should wake * up any threads before we go to sleep. */ cv_broadcast(&arc_reclaim_waiters_cv); /* * Block until signaled, or after one second (we * might need to perform arc_kmem_reap_now() * even if we aren't being signalled) */ CALLB_CPR_SAFE_BEGIN(&cpr); (void) cv_timedwait_hires(&arc_reclaim_thread_cv, &arc_reclaim_lock, SEC2NSEC(1), MSEC2NSEC(1), 0); CALLB_CPR_SAFE_END(&cpr, &arc_reclaim_lock); } } arc_reclaim_thread_exit = FALSE; cv_broadcast(&arc_reclaim_thread_cv); CALLB_CPR_EXIT(&cpr); /* drops arc_reclaim_lock */ thread_exit(); } static void arc_user_evicts_thread(void *dummy __unused) { callb_cpr_t cpr; CALLB_CPR_INIT(&cpr, &arc_user_evicts_lock, callb_generic_cpr, FTAG); mutex_enter(&arc_user_evicts_lock); while (!arc_user_evicts_thread_exit) { mutex_exit(&arc_user_evicts_lock); arc_do_user_evicts(); /* * This is necessary in order for the mdb ::arc dcmd to * show up to date information. Since the ::arc command * does not call the kstat's update function, without * this call, the command may show stale stats for the * anon, mru, mru_ghost, mfu, and mfu_ghost lists. Even * with this change, the data might be up to 1 second * out of date; but that should suffice. The arc_state_t * structures can be queried directly if more accurate * information is needed. */ if (arc_ksp != NULL) arc_ksp->ks_update(arc_ksp, KSTAT_READ); mutex_enter(&arc_user_evicts_lock); /* * Block until signaled, or after one second (we need to * call the arc's kstat update function regularly). */ CALLB_CPR_SAFE_BEGIN(&cpr); (void) cv_timedwait(&arc_user_evicts_cv, &arc_user_evicts_lock, hz); CALLB_CPR_SAFE_END(&cpr, &arc_user_evicts_lock); } arc_user_evicts_thread_exit = FALSE; cv_broadcast(&arc_user_evicts_cv); CALLB_CPR_EXIT(&cpr); /* drops arc_user_evicts_lock */ thread_exit(); } static u_int arc_dnlc_evicts_arg; extern struct vfsops zfs_vfsops; static void arc_dnlc_evicts_thread(void *dummy __unused) { callb_cpr_t cpr; u_int percent; CALLB_CPR_INIT(&cpr, &arc_dnlc_evicts_lock, callb_generic_cpr, FTAG); mutex_enter(&arc_dnlc_evicts_lock); while (!arc_dnlc_evicts_thread_exit) { CALLB_CPR_SAFE_BEGIN(&cpr); (void) cv_wait(&arc_dnlc_evicts_cv, &arc_dnlc_evicts_lock); CALLB_CPR_SAFE_END(&cpr, &arc_dnlc_evicts_lock); if (arc_dnlc_evicts_arg != 0) { percent = arc_dnlc_evicts_arg; mutex_exit(&arc_dnlc_evicts_lock); #ifdef _KERNEL vnlru_free(desiredvnodes * percent / 100, &zfs_vfsops); #endif mutex_enter(&arc_dnlc_evicts_lock); /* * Clear our token only after vnlru_free() * pass is done, to avoid false queueing of * the requests. */ arc_dnlc_evicts_arg = 0; } } arc_dnlc_evicts_thread_exit = FALSE; cv_broadcast(&arc_dnlc_evicts_cv); CALLB_CPR_EXIT(&cpr); thread_exit(); } void dnlc_reduce_cache(void *arg) { u_int percent; percent = (u_int)(uintptr_t)arg; mutex_enter(&arc_dnlc_evicts_lock); if (arc_dnlc_evicts_arg == 0) { arc_dnlc_evicts_arg = percent; cv_broadcast(&arc_dnlc_evicts_cv); } mutex_exit(&arc_dnlc_evicts_lock); } /* * Adapt arc info given the number of bytes we are trying to add and * the state that we are comming from. This function is only called * when we are adding new content to the cache. */ static void arc_adapt(int bytes, arc_state_t *state) { int mult; uint64_t arc_p_min = (arc_c >> arc_p_min_shift); int64_t mrug_size = refcount_count(&arc_mru_ghost->arcs_size); int64_t mfug_size = refcount_count(&arc_mfu_ghost->arcs_size); if (state == arc_l2c_only) return; ASSERT(bytes > 0); /* * Adapt the target size of the MRU list: * - if we just hit in the MRU ghost list, then increase * the target size of the MRU list. * - if we just hit in the MFU ghost list, then increase * the target size of the MFU list by decreasing the * target size of the MRU list. */ if (state == arc_mru_ghost) { mult = (mrug_size >= mfug_size) ? 1 : (mfug_size / mrug_size); mult = MIN(mult, 10); /* avoid wild arc_p adjustment */ arc_p = MIN(arc_c - arc_p_min, arc_p + bytes * mult); } else if (state == arc_mfu_ghost) { uint64_t delta; mult = (mfug_size >= mrug_size) ? 1 : (mrug_size / mfug_size); mult = MIN(mult, 10); delta = MIN(bytes * mult, arc_p); arc_p = MAX(arc_p_min, arc_p - delta); } ASSERT((int64_t)arc_p >= 0); if (arc_reclaim_needed()) { cv_signal(&arc_reclaim_thread_cv); return; } if (arc_no_grow) return; if (arc_c >= arc_c_max) return; /* * If we're within (2 * maxblocksize) bytes of the target * cache size, increment the target cache size */ if (arc_size > arc_c - (2ULL << SPA_MAXBLOCKSHIFT)) { DTRACE_PROBE1(arc__inc_adapt, int, bytes); atomic_add_64(&arc_c, (int64_t)bytes); if (arc_c > arc_c_max) arc_c = arc_c_max; else if (state == arc_anon) atomic_add_64(&arc_p, (int64_t)bytes); if (arc_p > arc_c) arc_p = arc_c; } ASSERT((int64_t)arc_p >= 0); } /* * Check if arc_size has grown past our upper threshold, determined by * zfs_arc_overflow_shift. */ static boolean_t arc_is_overflowing(void) { /* Always allow at least one block of overflow */ uint64_t overflow = MAX(SPA_MAXBLOCKSIZE, arc_c >> zfs_arc_overflow_shift); return (arc_size >= arc_c + overflow); } /* * The buffer, supplied as the first argument, needs a data block. If we * are hitting the hard limit for the cache size, we must sleep, waiting * for the eviction thread to catch up. If we're past the target size * but below the hard limit, we'll only signal the reclaim thread and * continue on. */ static void arc_get_data_buf(arc_buf_t *buf) { arc_state_t *state = buf->b_hdr->b_l1hdr.b_state; uint64_t size = buf->b_hdr->b_size; arc_buf_contents_t type = arc_buf_type(buf->b_hdr); arc_adapt(size, state); /* * If arc_size is currently overflowing, and has grown past our * upper limit, we must be adding data faster than the evict * thread can evict. Thus, to ensure we don't compound the * problem by adding more data and forcing arc_size to grow even * further past it's target size, we halt and wait for the * eviction thread to catch up. * * It's also possible that the reclaim thread is unable to evict * enough buffers to get arc_size below the overflow limit (e.g. * due to buffers being un-evictable, or hash lock collisions). * In this case, we want to proceed regardless if we're * overflowing; thus we don't use a while loop here. */ if (arc_is_overflowing()) { mutex_enter(&arc_reclaim_lock); /* * Now that we've acquired the lock, we may no longer be * over the overflow limit, lets check. * * We're ignoring the case of spurious wake ups. If that * were to happen, it'd let this thread consume an ARC * buffer before it should have (i.e. before we're under * the overflow limit and were signalled by the reclaim * thread). As long as that is a rare occurrence, it * shouldn't cause any harm. */ if (arc_is_overflowing()) { cv_signal(&arc_reclaim_thread_cv); cv_wait(&arc_reclaim_waiters_cv, &arc_reclaim_lock); } mutex_exit(&arc_reclaim_lock); } if (type == ARC_BUFC_METADATA) { buf->b_data = zio_buf_alloc(size); arc_space_consume(size, ARC_SPACE_META); } else { ASSERT(type == ARC_BUFC_DATA); buf->b_data = zio_data_buf_alloc(size); arc_space_consume(size, ARC_SPACE_DATA); } /* * Update the state size. Note that ghost states have a * "ghost size" and so don't need to be updated. */ if (!GHOST_STATE(buf->b_hdr->b_l1hdr.b_state)) { arc_buf_hdr_t *hdr = buf->b_hdr; arc_state_t *state = hdr->b_l1hdr.b_state; (void) refcount_add_many(&state->arcs_size, size, buf); /* * If this is reached via arc_read, the link is * protected by the hash lock. If reached via * arc_buf_alloc, the header should not be accessed by * any other thread. And, if reached via arc_read_done, * the hash lock will protect it if it's found in the * hash table; otherwise no other thread should be * trying to [add|remove]_reference it. */ if (multilist_link_active(&hdr->b_l1hdr.b_arc_node)) { ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); atomic_add_64(&hdr->b_l1hdr.b_state->arcs_lsize[type], size); } /* * If we are growing the cache, and we are adding anonymous * data, and we have outgrown arc_p, update arc_p */ if (arc_size < arc_c && hdr->b_l1hdr.b_state == arc_anon && (refcount_count(&arc_anon->arcs_size) + refcount_count(&arc_mru->arcs_size) > arc_p)) arc_p = MIN(arc_c, arc_p + size); } ARCSTAT_BUMP(arcstat_allocated); } /* * This routine is called whenever a buffer is accessed. * NOTE: the hash lock is dropped in this function. */ static void arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock) { clock_t now; ASSERT(MUTEX_HELD(hash_lock)); ASSERT(HDR_HAS_L1HDR(hdr)); if (hdr->b_l1hdr.b_state == arc_anon) { /* * This buffer is not in the cache, and does not * appear in our "ghost" list. Add the new buffer * to the MRU state. */ ASSERT0(hdr->b_l1hdr.b_arc_access); hdr->b_l1hdr.b_arc_access = ddi_get_lbolt(); DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, hdr); arc_change_state(arc_mru, hdr, hash_lock); } else if (hdr->b_l1hdr.b_state == arc_mru) { now = ddi_get_lbolt(); /* * If this buffer is here because of a prefetch, then either: * - clear the flag if this is a "referencing" read * (any subsequent access will bump this into the MFU state). * or * - move the buffer to the head of the list if this is * another prefetch (to make it less likely to be evicted). */ if (HDR_PREFETCH(hdr)) { if (refcount_count(&hdr->b_l1hdr.b_refcnt) == 0) { /* link protected by hash lock */ ASSERT(multilist_link_active( &hdr->b_l1hdr.b_arc_node)); } else { hdr->b_flags &= ~ARC_FLAG_PREFETCH; ARCSTAT_BUMP(arcstat_mru_hits); } hdr->b_l1hdr.b_arc_access = now; return; } /* * This buffer has been "accessed" only once so far, * but it is still in the cache. Move it to the MFU * state. */ if (now > hdr->b_l1hdr.b_arc_access + ARC_MINTIME) { /* * More than 125ms have passed since we * instantiated this buffer. Move it to the * most frequently used state. */ hdr->b_l1hdr.b_arc_access = now; DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr); arc_change_state(arc_mfu, hdr, hash_lock); } ARCSTAT_BUMP(arcstat_mru_hits); } else if (hdr->b_l1hdr.b_state == arc_mru_ghost) { arc_state_t *new_state; /* * This buffer has been "accessed" recently, but * was evicted from the cache. Move it to the * MFU state. */ if (HDR_PREFETCH(hdr)) { new_state = arc_mru; if (refcount_count(&hdr->b_l1hdr.b_refcnt) > 0) hdr->b_flags &= ~ARC_FLAG_PREFETCH; DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, hdr); } else { new_state = arc_mfu; DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr); } hdr->b_l1hdr.b_arc_access = ddi_get_lbolt(); arc_change_state(new_state, hdr, hash_lock); ARCSTAT_BUMP(arcstat_mru_ghost_hits); } else if (hdr->b_l1hdr.b_state == arc_mfu) { /* * This buffer has been accessed more than once and is * still in the cache. Keep it in the MFU state. * * NOTE: an add_reference() that occurred when we did * the arc_read() will have kicked this off the list. * If it was a prefetch, we will explicitly move it to * the head of the list now. */ if ((HDR_PREFETCH(hdr)) != 0) { ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); /* link protected by hash_lock */ ASSERT(multilist_link_active(&hdr->b_l1hdr.b_arc_node)); } ARCSTAT_BUMP(arcstat_mfu_hits); hdr->b_l1hdr.b_arc_access = ddi_get_lbolt(); } else if (hdr->b_l1hdr.b_state == arc_mfu_ghost) { arc_state_t *new_state = arc_mfu; /* * This buffer has been accessed more than once but has * been evicted from the cache. Move it back to the * MFU state. */ if (HDR_PREFETCH(hdr)) { /* * This is a prefetch access... * move this block back to the MRU state. */ ASSERT0(refcount_count(&hdr->b_l1hdr.b_refcnt)); new_state = arc_mru; } hdr->b_l1hdr.b_arc_access = ddi_get_lbolt(); DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr); arc_change_state(new_state, hdr, hash_lock); ARCSTAT_BUMP(arcstat_mfu_ghost_hits); } else if (hdr->b_l1hdr.b_state == arc_l2c_only) { /* * This buffer is on the 2nd Level ARC. */ hdr->b_l1hdr.b_arc_access = ddi_get_lbolt(); DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, hdr); arc_change_state(arc_mfu, hdr, hash_lock); } else { ASSERT(!"invalid arc state"); } } /* a generic arc_done_func_t which you can use */ /* ARGSUSED */ void arc_bcopy_func(zio_t *zio, arc_buf_t *buf, void *arg) { if (zio == NULL || zio->io_error == 0) bcopy(buf->b_data, arg, buf->b_hdr->b_size); VERIFY(arc_buf_remove_ref(buf, arg)); } /* a generic arc_done_func_t */ void arc_getbuf_func(zio_t *zio, arc_buf_t *buf, void *arg) { arc_buf_t **bufp = arg; if (zio && zio->io_error) { VERIFY(arc_buf_remove_ref(buf, arg)); *bufp = NULL; } else { *bufp = buf; ASSERT(buf->b_data); } } static void arc_read_done(zio_t *zio) { arc_buf_hdr_t *hdr; arc_buf_t *buf; arc_buf_t *abuf; /* buffer we're assigning to callback */ kmutex_t *hash_lock = NULL; arc_callback_t *callback_list, *acb; int freeable = FALSE; buf = zio->io_private; hdr = buf->b_hdr; /* * The hdr was inserted into hash-table and removed from lists * prior to starting I/O. We should find this header, since * it's in the hash table, and it should be legit since it's * not possible to evict it during the I/O. The only possible * reason for it not to be found is if we were freed during the * read. */ if (HDR_IN_HASH_TABLE(hdr)) { ASSERT3U(hdr->b_birth, ==, BP_PHYSICAL_BIRTH(zio->io_bp)); ASSERT3U(hdr->b_dva.dva_word[0], ==, BP_IDENTITY(zio->io_bp)->dva_word[0]); ASSERT3U(hdr->b_dva.dva_word[1], ==, BP_IDENTITY(zio->io_bp)->dva_word[1]); arc_buf_hdr_t *found = buf_hash_find(hdr->b_spa, zio->io_bp, &hash_lock); ASSERT((found == NULL && HDR_FREED_IN_READ(hdr) && hash_lock == NULL) || (found == hdr && DVA_EQUAL(&hdr->b_dva, BP_IDENTITY(zio->io_bp))) || (found == hdr && HDR_L2_READING(hdr))); } hdr->b_flags &= ~ARC_FLAG_L2_EVICTED; if (l2arc_noprefetch && HDR_PREFETCH(hdr)) hdr->b_flags &= ~ARC_FLAG_L2CACHE; /* byteswap if necessary */ callback_list = hdr->b_l1hdr.b_acb; ASSERT(callback_list != NULL); if (BP_SHOULD_BYTESWAP(zio->io_bp) && zio->io_error == 0) { dmu_object_byteswap_t bswap = DMU_OT_BYTESWAP(BP_GET_TYPE(zio->io_bp)); arc_byteswap_func_t *func = BP_GET_LEVEL(zio->io_bp) > 0 ? byteswap_uint64_array : dmu_ot_byteswap[bswap].ob_func; func(buf->b_data, hdr->b_size); } arc_cksum_compute(buf, B_FALSE); #ifdef illumos arc_buf_watch(buf); #endif if (hash_lock && zio->io_error == 0 && hdr->b_l1hdr.b_state == arc_anon) { /* * Only call arc_access on anonymous buffers. This is because * if we've issued an I/O for an evicted buffer, we've already * called arc_access (to prevent any simultaneous readers from * getting confused). */ arc_access(hdr, hash_lock); } /* create copies of the data buffer for the callers */ abuf = buf; for (acb = callback_list; acb; acb = acb->acb_next) { if (acb->acb_done) { if (abuf == NULL) { ARCSTAT_BUMP(arcstat_duplicate_reads); abuf = arc_buf_clone(buf); } acb->acb_buf = abuf; abuf = NULL; } } hdr->b_l1hdr.b_acb = NULL; hdr->b_flags &= ~ARC_FLAG_IO_IN_PROGRESS; ASSERT(!HDR_BUF_AVAILABLE(hdr)); if (abuf == buf) { ASSERT(buf->b_efunc == NULL); ASSERT(hdr->b_l1hdr.b_datacnt == 1); hdr->b_flags |= ARC_FLAG_BUF_AVAILABLE; } ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt) || callback_list != NULL); if (zio->io_error != 0) { hdr->b_flags |= ARC_FLAG_IO_ERROR; if (hdr->b_l1hdr.b_state != arc_anon) arc_change_state(arc_anon, hdr, hash_lock); if (HDR_IN_HASH_TABLE(hdr)) buf_hash_remove(hdr); freeable = refcount_is_zero(&hdr->b_l1hdr.b_refcnt); } /* * Broadcast before we drop the hash_lock to avoid the possibility * that the hdr (and hence the cv) might be freed before we get to * the cv_broadcast(). */ cv_broadcast(&hdr->b_l1hdr.b_cv); if (hash_lock != NULL) { mutex_exit(hash_lock); } else { /* * This block was freed while we waited for the read to * complete. It has been removed from the hash table and * moved to the anonymous state (so that it won't show up * in the cache). */ ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon); freeable = refcount_is_zero(&hdr->b_l1hdr.b_refcnt); } /* execute each callback and free its structure */ while ((acb = callback_list) != NULL) { if (acb->acb_done) acb->acb_done(zio, acb->acb_buf, acb->acb_private); if (acb->acb_zio_dummy != NULL) { acb->acb_zio_dummy->io_error = zio->io_error; zio_nowait(acb->acb_zio_dummy); } callback_list = acb->acb_next; kmem_free(acb, sizeof (arc_callback_t)); } if (freeable) arc_hdr_destroy(hdr); } /* * "Read" the block at the specified DVA (in bp) via the * cache. If the block is found in the cache, invoke the provided * callback immediately and return. Note that the `zio' parameter * in the callback will be NULL in this case, since no IO was * required. If the block is not in the cache pass the read request * on to the spa with a substitute callback function, so that the * requested block will be added to the cache. * * If a read request arrives for a block that has a read in-progress, * either wait for the in-progress read to complete (and return the * results); or, if this is a read with a "done" func, add a record * to the read to invoke the "done" func when the read completes, * and return; or just return. * * arc_read_done() will invoke all the requested "done" functions * for readers of this block. */ int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done, void *private, zio_priority_t priority, int zio_flags, arc_flags_t *arc_flags, const zbookmark_phys_t *zb) { arc_buf_hdr_t *hdr = NULL; arc_buf_t *buf = NULL; kmutex_t *hash_lock = NULL; zio_t *rzio; uint64_t guid = spa_load_guid(spa); ASSERT(!BP_IS_EMBEDDED(bp) || BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA); top: if (!BP_IS_EMBEDDED(bp)) { /* * Embedded BP's have no DVA and require no I/O to "read". * Create an anonymous arc buf to back it. */ hdr = buf_hash_find(guid, bp, &hash_lock); } if (hdr != NULL && HDR_HAS_L1HDR(hdr) && hdr->b_l1hdr.b_datacnt > 0) { *arc_flags |= ARC_FLAG_CACHED; if (HDR_IO_IN_PROGRESS(hdr)) { if ((hdr->b_flags & ARC_FLAG_PRIO_ASYNC_READ) && priority == ZIO_PRIORITY_SYNC_READ) { /* * This sync read must wait for an * in-progress async read (e.g. a predictive * prefetch). Async reads are queued * separately at the vdev_queue layer, so * this is a form of priority inversion. * Ideally, we would "inherit" the demand * i/o's priority by moving the i/o from * the async queue to the synchronous queue, * but there is currently no mechanism to do * so. Track this so that we can evaluate * the magnitude of this potential performance * problem. * * Note that if the prefetch i/o is already * active (has been issued to the device), * the prefetch improved performance, because * we issued it sooner than we would have * without the prefetch. */ DTRACE_PROBE1(arc__sync__wait__for__async, arc_buf_hdr_t *, hdr); ARCSTAT_BUMP(arcstat_sync_wait_for_async); } if (hdr->b_flags & ARC_FLAG_PREDICTIVE_PREFETCH) { hdr->b_flags &= ~ARC_FLAG_PREDICTIVE_PREFETCH; } if (*arc_flags & ARC_FLAG_WAIT) { cv_wait(&hdr->b_l1hdr.b_cv, hash_lock); mutex_exit(hash_lock); goto top; } ASSERT(*arc_flags & ARC_FLAG_NOWAIT); if (done) { arc_callback_t *acb = NULL; acb = kmem_zalloc(sizeof (arc_callback_t), KM_SLEEP); acb->acb_done = done; acb->acb_private = private; if (pio != NULL) acb->acb_zio_dummy = zio_null(pio, spa, NULL, NULL, NULL, zio_flags); ASSERT(acb->acb_done != NULL); acb->acb_next = hdr->b_l1hdr.b_acb; hdr->b_l1hdr.b_acb = acb; add_reference(hdr, hash_lock, private); mutex_exit(hash_lock); return (0); } mutex_exit(hash_lock); return (0); } ASSERT(hdr->b_l1hdr.b_state == arc_mru || hdr->b_l1hdr.b_state == arc_mfu); if (done) { if (hdr->b_flags & ARC_FLAG_PREDICTIVE_PREFETCH) { /* * This is a demand read which does not have to * wait for i/o because we did a predictive * prefetch i/o for it, which has completed. */ DTRACE_PROBE1( arc__demand__hit__predictive__prefetch, arc_buf_hdr_t *, hdr); ARCSTAT_BUMP( arcstat_demand_hit_predictive_prefetch); hdr->b_flags &= ~ARC_FLAG_PREDICTIVE_PREFETCH; } add_reference(hdr, hash_lock, private); /* * If this block is already in use, create a new * copy of the data so that we will be guaranteed * that arc_release() will always succeed. */ buf = hdr->b_l1hdr.b_buf; ASSERT(buf); ASSERT(buf->b_data); if (HDR_BUF_AVAILABLE(hdr)) { ASSERT(buf->b_efunc == NULL); hdr->b_flags &= ~ARC_FLAG_BUF_AVAILABLE; } else { buf = arc_buf_clone(buf); } } else if (*arc_flags & ARC_FLAG_PREFETCH && refcount_count(&hdr->b_l1hdr.b_refcnt) == 0) { hdr->b_flags |= ARC_FLAG_PREFETCH; } DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr); arc_access(hdr, hash_lock); if (*arc_flags & ARC_FLAG_L2CACHE) hdr->b_flags |= ARC_FLAG_L2CACHE; if (*arc_flags & ARC_FLAG_L2COMPRESS) hdr->b_flags |= ARC_FLAG_L2COMPRESS; mutex_exit(hash_lock); ARCSTAT_BUMP(arcstat_hits); ARCSTAT_CONDSTAT(!HDR_PREFETCH(hdr), demand, prefetch, !HDR_ISTYPE_METADATA(hdr), data, metadata, hits); if (done) done(NULL, buf, private); } else { uint64_t size = BP_GET_LSIZE(bp); arc_callback_t *acb; vdev_t *vd = NULL; uint64_t addr = 0; boolean_t devw = B_FALSE; enum zio_compress b_compress = ZIO_COMPRESS_OFF; int32_t b_asize = 0; if (hdr == NULL) { /* this block is not in the cache */ arc_buf_hdr_t *exists = NULL; arc_buf_contents_t type = BP_GET_BUFC_TYPE(bp); buf = arc_buf_alloc(spa, size, private, type); hdr = buf->b_hdr; if (!BP_IS_EMBEDDED(bp)) { hdr->b_dva = *BP_IDENTITY(bp); hdr->b_birth = BP_PHYSICAL_BIRTH(bp); exists = buf_hash_insert(hdr, &hash_lock); } if (exists != NULL) { /* somebody beat us to the hash insert */ mutex_exit(hash_lock); buf_discard_identity(hdr); (void) arc_buf_remove_ref(buf, private); goto top; /* restart the IO request */ } /* * If there is a callback, we pass our reference to * it; otherwise we remove our reference. */ if (done == NULL) { (void) remove_reference(hdr, hash_lock, private); } if (*arc_flags & ARC_FLAG_PREFETCH) hdr->b_flags |= ARC_FLAG_PREFETCH; if (*arc_flags & ARC_FLAG_L2CACHE) hdr->b_flags |= ARC_FLAG_L2CACHE; if (*arc_flags & ARC_FLAG_L2COMPRESS) hdr->b_flags |= ARC_FLAG_L2COMPRESS; if (BP_GET_LEVEL(bp) > 0) hdr->b_flags |= ARC_FLAG_INDIRECT; } else { /* * This block is in the ghost cache. If it was L2-only * (and thus didn't have an L1 hdr), we realloc the * header to add an L1 hdr. */ if (!HDR_HAS_L1HDR(hdr)) { hdr = arc_hdr_realloc(hdr, hdr_l2only_cache, hdr_full_cache); } ASSERT(GHOST_STATE(hdr->b_l1hdr.b_state)); ASSERT(!HDR_IO_IN_PROGRESS(hdr)); ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL); /* * If there is a callback, we pass a reference to it. */ if (done != NULL) add_reference(hdr, hash_lock, private); if (*arc_flags & ARC_FLAG_PREFETCH) hdr->b_flags |= ARC_FLAG_PREFETCH; if (*arc_flags & ARC_FLAG_L2CACHE) hdr->b_flags |= ARC_FLAG_L2CACHE; if (*arc_flags & ARC_FLAG_L2COMPRESS) hdr->b_flags |= ARC_FLAG_L2COMPRESS; buf = kmem_cache_alloc(buf_cache, KM_PUSHPAGE); buf->b_hdr = hdr; buf->b_data = NULL; buf->b_efunc = NULL; buf->b_private = NULL; buf->b_next = NULL; hdr->b_l1hdr.b_buf = buf; ASSERT0(hdr->b_l1hdr.b_datacnt); hdr->b_l1hdr.b_datacnt = 1; arc_get_data_buf(buf); arc_access(hdr, hash_lock); } if (*arc_flags & ARC_FLAG_PREDICTIVE_PREFETCH) hdr->b_flags |= ARC_FLAG_PREDICTIVE_PREFETCH; ASSERT(!GHOST_STATE(hdr->b_l1hdr.b_state)); acb = kmem_zalloc(sizeof (arc_callback_t), KM_SLEEP); acb->acb_done = done; acb->acb_private = private; ASSERT(hdr->b_l1hdr.b_acb == NULL); hdr->b_l1hdr.b_acb = acb; hdr->b_flags |= ARC_FLAG_IO_IN_PROGRESS; if (HDR_HAS_L2HDR(hdr) && (vd = hdr->b_l2hdr.b_dev->l2ad_vdev) != NULL) { devw = hdr->b_l2hdr.b_dev->l2ad_writing; addr = hdr->b_l2hdr.b_daddr; b_compress = hdr->b_l2hdr.b_compress; b_asize = hdr->b_l2hdr.b_asize; /* * Lock out device removal. */ if (vdev_is_dead(vd) || !spa_config_tryenter(spa, SCL_L2ARC, vd, RW_READER)) vd = NULL; } if (hash_lock != NULL) mutex_exit(hash_lock); /* * At this point, we have a level 1 cache miss. Try again in * L2ARC if possible. */ ASSERT3U(hdr->b_size, ==, size); DTRACE_PROBE4(arc__miss, arc_buf_hdr_t *, hdr, blkptr_t *, bp, uint64_t, size, zbookmark_phys_t *, zb); ARCSTAT_BUMP(arcstat_misses); ARCSTAT_CONDSTAT(!HDR_PREFETCH(hdr), demand, prefetch, !HDR_ISTYPE_METADATA(hdr), data, metadata, misses); #ifdef _KERNEL #ifdef RACCT if (racct_enable) { PROC_LOCK(curproc); racct_add_force(curproc, RACCT_READBPS, size); racct_add_force(curproc, RACCT_READIOPS, 1); PROC_UNLOCK(curproc); } #endif /* RACCT */ curthread->td_ru.ru_inblock++; #endif if (priority == ZIO_PRIORITY_ASYNC_READ) hdr->b_flags |= ARC_FLAG_PRIO_ASYNC_READ; else hdr->b_flags &= ~ARC_FLAG_PRIO_ASYNC_READ; if (vd != NULL && l2arc_ndev != 0 && !(l2arc_norw && devw)) { /* * Read from the L2ARC if the following are true: * 1. The L2ARC vdev was previously cached. * 2. This buffer still has L2ARC metadata. * 3. This buffer isn't currently writing to the L2ARC. * 4. The L2ARC entry wasn't evicted, which may * also have invalidated the vdev. * 5. This isn't prefetch and l2arc_noprefetch is set. */ if (HDR_HAS_L2HDR(hdr) && !HDR_L2_WRITING(hdr) && !HDR_L2_EVICTED(hdr) && !(l2arc_noprefetch && HDR_PREFETCH(hdr))) { l2arc_read_callback_t *cb; void* b_data; DTRACE_PROBE1(l2arc__hit, arc_buf_hdr_t *, hdr); ARCSTAT_BUMP(arcstat_l2_hits); cb = kmem_zalloc(sizeof (l2arc_read_callback_t), KM_SLEEP); cb->l2rcb_buf = buf; cb->l2rcb_spa = spa; cb->l2rcb_bp = *bp; cb->l2rcb_zb = *zb; cb->l2rcb_flags = zio_flags; cb->l2rcb_compress = b_compress; if (b_asize > hdr->b_size) { ASSERT3U(b_compress, ==, ZIO_COMPRESS_OFF); b_data = zio_data_buf_alloc(b_asize); cb->l2rcb_data = b_data; } else { b_data = buf->b_data; } ASSERT(addr >= VDEV_LABEL_START_SIZE && addr + size < vd->vdev_psize - VDEV_LABEL_END_SIZE); /* * l2arc read. The SCL_L2ARC lock will be * released by l2arc_read_done(). * Issue a null zio if the underlying buffer * was squashed to zero size by compression. */ if (b_compress == ZIO_COMPRESS_EMPTY) { ASSERT3U(b_asize, ==, 0); rzio = zio_null(pio, spa, vd, l2arc_read_done, cb, zio_flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY); } else { rzio = zio_read_phys(pio, vd, addr, b_asize, b_data, ZIO_CHECKSUM_OFF, l2arc_read_done, cb, priority, zio_flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY, B_FALSE); } DTRACE_PROBE2(l2arc__read, vdev_t *, vd, zio_t *, rzio); ARCSTAT_INCR(arcstat_l2_read_bytes, b_asize); if (*arc_flags & ARC_FLAG_NOWAIT) { zio_nowait(rzio); return (0); } ASSERT(*arc_flags & ARC_FLAG_WAIT); if (zio_wait(rzio) == 0) return (0); /* l2arc read error; goto zio_read() */ } else { DTRACE_PROBE1(l2arc__miss, arc_buf_hdr_t *, hdr); ARCSTAT_BUMP(arcstat_l2_misses); if (HDR_L2_WRITING(hdr)) ARCSTAT_BUMP(arcstat_l2_rw_clash); spa_config_exit(spa, SCL_L2ARC, vd); } } else { if (vd != NULL) spa_config_exit(spa, SCL_L2ARC, vd); if (l2arc_ndev != 0) { DTRACE_PROBE1(l2arc__miss, arc_buf_hdr_t *, hdr); ARCSTAT_BUMP(arcstat_l2_misses); } } rzio = zio_read(pio, spa, bp, buf->b_data, size, arc_read_done, buf, priority, zio_flags, zb); if (*arc_flags & ARC_FLAG_WAIT) return (zio_wait(rzio)); ASSERT(*arc_flags & ARC_FLAG_NOWAIT); zio_nowait(rzio); } return (0); } void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private) { ASSERT(buf->b_hdr != NULL); ASSERT(buf->b_hdr->b_l1hdr.b_state != arc_anon); ASSERT(!refcount_is_zero(&buf->b_hdr->b_l1hdr.b_refcnt) || func == NULL); ASSERT(buf->b_efunc == NULL); ASSERT(!HDR_BUF_AVAILABLE(buf->b_hdr)); buf->b_efunc = func; buf->b_private = private; } /* * Notify the arc that a block was freed, and thus will never be used again. */ void arc_freed(spa_t *spa, const blkptr_t *bp) { arc_buf_hdr_t *hdr; kmutex_t *hash_lock; uint64_t guid = spa_load_guid(spa); ASSERT(!BP_IS_EMBEDDED(bp)); hdr = buf_hash_find(guid, bp, &hash_lock); if (hdr == NULL) return; if (HDR_BUF_AVAILABLE(hdr)) { arc_buf_t *buf = hdr->b_l1hdr.b_buf; add_reference(hdr, hash_lock, FTAG); hdr->b_flags &= ~ARC_FLAG_BUF_AVAILABLE; mutex_exit(hash_lock); arc_release(buf, FTAG); (void) arc_buf_remove_ref(buf, FTAG); } else { mutex_exit(hash_lock); } } /* * Clear the user eviction callback set by arc_set_callback(), first calling * it if it exists. Because the presence of a callback keeps an arc_buf cached * clearing the callback may result in the arc_buf being destroyed. However, * it will not result in the *last* arc_buf being destroyed, hence the data * will remain cached in the ARC. We make a copy of the arc buffer here so * that we can process the callback without holding any locks. * * It's possible that the callback is already in the process of being cleared * by another thread. In this case we can not clear the callback. * * Returns B_TRUE if the callback was successfully called and cleared. */ boolean_t arc_clear_callback(arc_buf_t *buf) { arc_buf_hdr_t *hdr; kmutex_t *hash_lock; arc_evict_func_t *efunc = buf->b_efunc; void *private = buf->b_private; mutex_enter(&buf->b_evict_lock); hdr = buf->b_hdr; if (hdr == NULL) { /* * We are in arc_do_user_evicts(). */ ASSERT(buf->b_data == NULL); mutex_exit(&buf->b_evict_lock); return (B_FALSE); } else if (buf->b_data == NULL) { /* * We are on the eviction list; process this buffer now * but let arc_do_user_evicts() do the reaping. */ buf->b_efunc = NULL; mutex_exit(&buf->b_evict_lock); VERIFY0(efunc(private)); return (B_TRUE); } hash_lock = HDR_LOCK(hdr); mutex_enter(hash_lock); hdr = buf->b_hdr; ASSERT3P(hash_lock, ==, HDR_LOCK(hdr)); ASSERT3U(refcount_count(&hdr->b_l1hdr.b_refcnt), <, hdr->b_l1hdr.b_datacnt); ASSERT(hdr->b_l1hdr.b_state == arc_mru || hdr->b_l1hdr.b_state == arc_mfu); buf->b_efunc = NULL; buf->b_private = NULL; if (hdr->b_l1hdr.b_datacnt > 1) { mutex_exit(&buf->b_evict_lock); arc_buf_destroy(buf, TRUE); } else { ASSERT(buf == hdr->b_l1hdr.b_buf); hdr->b_flags |= ARC_FLAG_BUF_AVAILABLE; mutex_exit(&buf->b_evict_lock); } mutex_exit(hash_lock); VERIFY0(efunc(private)); return (B_TRUE); } /* * Release this buffer from the cache, making it an anonymous buffer. This * must be done after a read and prior to modifying the buffer contents. * If the buffer has more than one reference, we must make * a new hdr for the buffer. */ void arc_release(arc_buf_t *buf, void *tag) { arc_buf_hdr_t *hdr = buf->b_hdr; /* * It would be nice to assert that if it's DMU metadata (level > * 0 || it's the dnode file), then it must be syncing context. * But we don't know that information at this level. */ mutex_enter(&buf->b_evict_lock); ASSERT(HDR_HAS_L1HDR(hdr)); /* * We don't grab the hash lock prior to this check, because if * the buffer's header is in the arc_anon state, it won't be * linked into the hash table. */ if (hdr->b_l1hdr.b_state == arc_anon) { mutex_exit(&buf->b_evict_lock); ASSERT(!HDR_IO_IN_PROGRESS(hdr)); ASSERT(!HDR_IN_HASH_TABLE(hdr)); ASSERT(!HDR_HAS_L2HDR(hdr)); ASSERT(BUF_EMPTY(hdr)); ASSERT3U(hdr->b_l1hdr.b_datacnt, ==, 1); ASSERT3S(refcount_count(&hdr->b_l1hdr.b_refcnt), ==, 1); ASSERT(!list_link_active(&hdr->b_l1hdr.b_arc_node)); ASSERT3P(buf->b_efunc, ==, NULL); ASSERT3P(buf->b_private, ==, NULL); hdr->b_l1hdr.b_arc_access = 0; arc_buf_thaw(buf); return; } kmutex_t *hash_lock = HDR_LOCK(hdr); mutex_enter(hash_lock); /* * This assignment is only valid as long as the hash_lock is * held, we must be careful not to reference state or the * b_state field after dropping the lock. */ arc_state_t *state = hdr->b_l1hdr.b_state; ASSERT3P(hash_lock, ==, HDR_LOCK(hdr)); ASSERT3P(state, !=, arc_anon); /* this buffer is not on any list */ ASSERT(refcount_count(&hdr->b_l1hdr.b_refcnt) > 0); if (HDR_HAS_L2HDR(hdr)) { mutex_enter(&hdr->b_l2hdr.b_dev->l2ad_mtx); /* * We have to recheck this conditional again now that * we're holding the l2ad_mtx to prevent a race with * another thread which might be concurrently calling * l2arc_evict(). In that case, l2arc_evict() might have * destroyed the header's L2 portion as we were waiting * to acquire the l2ad_mtx. */ if (HDR_HAS_L2HDR(hdr)) { l2arc_trim(hdr); arc_hdr_l2hdr_destroy(hdr); } mutex_exit(&hdr->b_l2hdr.b_dev->l2ad_mtx); } /* * Do we have more than one buf? */ if (hdr->b_l1hdr.b_datacnt > 1) { arc_buf_hdr_t *nhdr; arc_buf_t **bufp; uint64_t blksz = hdr->b_size; uint64_t spa = hdr->b_spa; arc_buf_contents_t type = arc_buf_type(hdr); uint32_t flags = hdr->b_flags; ASSERT(hdr->b_l1hdr.b_buf != buf || buf->b_next != NULL); /* * Pull the data off of this hdr and attach it to * a new anonymous hdr. */ (void) remove_reference(hdr, hash_lock, tag); bufp = &hdr->b_l1hdr.b_buf; while (*bufp != buf) bufp = &(*bufp)->b_next; *bufp = buf->b_next; buf->b_next = NULL; ASSERT3P(state, !=, arc_l2c_only); (void) refcount_remove_many( &state->arcs_size, hdr->b_size, buf); if (refcount_is_zero(&hdr->b_l1hdr.b_refcnt)) { ASSERT3P(state, !=, arc_l2c_only); uint64_t *size = &state->arcs_lsize[type]; ASSERT3U(*size, >=, hdr->b_size); atomic_add_64(size, -hdr->b_size); } /* * We're releasing a duplicate user data buffer, update * our statistics accordingly. */ if (HDR_ISTYPE_DATA(hdr)) { ARCSTAT_BUMPDOWN(arcstat_duplicate_buffers); ARCSTAT_INCR(arcstat_duplicate_buffers_size, -hdr->b_size); } hdr->b_l1hdr.b_datacnt -= 1; arc_cksum_verify(buf); #ifdef illumos arc_buf_unwatch(buf); #endif mutex_exit(hash_lock); nhdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE); nhdr->b_size = blksz; nhdr->b_spa = spa; nhdr->b_flags = flags & ARC_FLAG_L2_WRITING; nhdr->b_flags |= arc_bufc_to_flags(type); nhdr->b_flags |= ARC_FLAG_HAS_L1HDR; nhdr->b_l1hdr.b_buf = buf; nhdr->b_l1hdr.b_datacnt = 1; nhdr->b_l1hdr.b_state = arc_anon; nhdr->b_l1hdr.b_arc_access = 0; nhdr->b_l1hdr.b_tmp_cdata = NULL; nhdr->b_freeze_cksum = NULL; (void) refcount_add(&nhdr->b_l1hdr.b_refcnt, tag); buf->b_hdr = nhdr; mutex_exit(&buf->b_evict_lock); (void) refcount_add_many(&arc_anon->arcs_size, blksz, buf); } else { mutex_exit(&buf->b_evict_lock); ASSERT(refcount_count(&hdr->b_l1hdr.b_refcnt) == 1); /* protected by hash lock, or hdr is on arc_anon */ ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node)); ASSERT(!HDR_IO_IN_PROGRESS(hdr)); arc_change_state(arc_anon, hdr, hash_lock); hdr->b_l1hdr.b_arc_access = 0; mutex_exit(hash_lock); buf_discard_identity(hdr); arc_buf_thaw(buf); } buf->b_efunc = NULL; buf->b_private = NULL; } int arc_released(arc_buf_t *buf) { int released; mutex_enter(&buf->b_evict_lock); released = (buf->b_data != NULL && buf->b_hdr->b_l1hdr.b_state == arc_anon); mutex_exit(&buf->b_evict_lock); return (released); } #ifdef ZFS_DEBUG int arc_referenced(arc_buf_t *buf) { int referenced; mutex_enter(&buf->b_evict_lock); referenced = (refcount_count(&buf->b_hdr->b_l1hdr.b_refcnt)); mutex_exit(&buf->b_evict_lock); return (referenced); } #endif static void arc_write_ready(zio_t *zio) { arc_write_callback_t *callback = zio->io_private; arc_buf_t *buf = callback->awcb_buf; arc_buf_hdr_t *hdr = buf->b_hdr; ASSERT(HDR_HAS_L1HDR(hdr)); ASSERT(!refcount_is_zero(&buf->b_hdr->b_l1hdr.b_refcnt)); ASSERT(hdr->b_l1hdr.b_datacnt > 0); callback->awcb_ready(zio, buf, callback->awcb_private); /* * If the IO is already in progress, then this is a re-write * attempt, so we need to thaw and re-compute the cksum. * It is the responsibility of the callback to handle the * accounting for any re-write attempt. */ if (HDR_IO_IN_PROGRESS(hdr)) { mutex_enter(&hdr->b_l1hdr.b_freeze_lock); if (hdr->b_freeze_cksum != NULL) { kmem_free(hdr->b_freeze_cksum, sizeof (zio_cksum_t)); hdr->b_freeze_cksum = NULL; } mutex_exit(&hdr->b_l1hdr.b_freeze_lock); } arc_cksum_compute(buf, B_FALSE); hdr->b_flags |= ARC_FLAG_IO_IN_PROGRESS; } /* * The SPA calls this callback for each physical write that happens on behalf * of a logical write. See the comment in dbuf_write_physdone() for details. */ static void arc_write_physdone(zio_t *zio) { arc_write_callback_t *cb = zio->io_private; if (cb->awcb_physdone != NULL) cb->awcb_physdone(zio, cb->awcb_buf, cb->awcb_private); } static void arc_write_done(zio_t *zio) { arc_write_callback_t *callback = zio->io_private; arc_buf_t *buf = callback->awcb_buf; arc_buf_hdr_t *hdr = buf->b_hdr; ASSERT(hdr->b_l1hdr.b_acb == NULL); if (zio->io_error == 0) { if (BP_IS_HOLE(zio->io_bp) || BP_IS_EMBEDDED(zio->io_bp)) { buf_discard_identity(hdr); } else { hdr->b_dva = *BP_IDENTITY(zio->io_bp); hdr->b_birth = BP_PHYSICAL_BIRTH(zio->io_bp); } } else { ASSERT(BUF_EMPTY(hdr)); } /* * If the block to be written was all-zero or compressed enough to be * embedded in the BP, no write was performed so there will be no * dva/birth/checksum. The buffer must therefore remain anonymous * (and uncached). */ if (!BUF_EMPTY(hdr)) { arc_buf_hdr_t *exists; kmutex_t *hash_lock; ASSERT(zio->io_error == 0); arc_cksum_verify(buf); exists = buf_hash_insert(hdr, &hash_lock); if (exists != NULL) { /* * This can only happen if we overwrite for * sync-to-convergence, because we remove * buffers from the hash table when we arc_free(). */ if (zio->io_flags & ZIO_FLAG_IO_REWRITE) { if (!BP_EQUAL(&zio->io_bp_orig, zio->io_bp)) panic("bad overwrite, hdr=%p exists=%p", (void *)hdr, (void *)exists); ASSERT(refcount_is_zero( &exists->b_l1hdr.b_refcnt)); arc_change_state(arc_anon, exists, hash_lock); mutex_exit(hash_lock); arc_hdr_destroy(exists); exists = buf_hash_insert(hdr, &hash_lock); ASSERT3P(exists, ==, NULL); } else if (zio->io_flags & ZIO_FLAG_NOPWRITE) { /* nopwrite */ ASSERT(zio->io_prop.zp_nopwrite); if (!BP_EQUAL(&zio->io_bp_orig, zio->io_bp)) panic("bad nopwrite, hdr=%p exists=%p", (void *)hdr, (void *)exists); } else { /* Dedup */ ASSERT(hdr->b_l1hdr.b_datacnt == 1); ASSERT(hdr->b_l1hdr.b_state == arc_anon); ASSERT(BP_GET_DEDUP(zio->io_bp)); ASSERT(BP_GET_LEVEL(zio->io_bp) == 0); } } hdr->b_flags &= ~ARC_FLAG_IO_IN_PROGRESS; /* if it's not anon, we are doing a scrub */ if (exists == NULL && hdr->b_l1hdr.b_state == arc_anon) arc_access(hdr, hash_lock); mutex_exit(hash_lock); } else { hdr->b_flags &= ~ARC_FLAG_IO_IN_PROGRESS; } ASSERT(!refcount_is_zero(&hdr->b_l1hdr.b_refcnt)); callback->awcb_done(zio, buf, callback->awcb_private); kmem_free(callback, sizeof (arc_write_callback_t)); } zio_t * arc_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, boolean_t l2arc_compress, const zio_prop_t *zp, arc_done_func_t *ready, arc_done_func_t *physdone, arc_done_func_t *done, void *private, zio_priority_t priority, int zio_flags, const zbookmark_phys_t *zb) { arc_buf_hdr_t *hdr = buf->b_hdr; arc_write_callback_t *callback; zio_t *zio; ASSERT(ready != NULL); ASSERT(done != NULL); ASSERT(!HDR_IO_ERROR(hdr)); ASSERT(!HDR_IO_IN_PROGRESS(hdr)); ASSERT(hdr->b_l1hdr.b_acb == NULL); ASSERT(hdr->b_l1hdr.b_datacnt > 0); if (l2arc) hdr->b_flags |= ARC_FLAG_L2CACHE; if (l2arc_compress) hdr->b_flags |= ARC_FLAG_L2COMPRESS; callback = kmem_zalloc(sizeof (arc_write_callback_t), KM_SLEEP); callback->awcb_ready = ready; callback->awcb_physdone = physdone; callback->awcb_done = done; callback->awcb_private = private; callback->awcb_buf = buf; zio = zio_write(pio, spa, txg, bp, buf->b_data, hdr->b_size, zp, arc_write_ready, arc_write_physdone, arc_write_done, callback, priority, zio_flags, zb); return (zio); } static int arc_memory_throttle(uint64_t reserve, uint64_t txg) { #ifdef _KERNEL uint64_t available_memory = ptob(freemem); static uint64_t page_load = 0; static uint64_t last_txg = 0; #if defined(__i386) || !defined(UMA_MD_SMALL_ALLOC) available_memory = MIN(available_memory, ptob(vmem_size(heap_arena, VMEM_FREE))); #endif if (freemem > (uint64_t)physmem * arc_lotsfree_percent / 100) return (0); if (txg > last_txg) { last_txg = txg; page_load = 0; } /* * If we are in pageout, we know that memory is already tight, * the arc is already going to be evicting, so we just want to * continue to let page writes occur as quickly as possible. */ if (curproc == pageproc) { if (page_load > MAX(ptob(minfree), available_memory) / 4) return (SET_ERROR(ERESTART)); /* Note: reserve is inflated, so we deflate */ page_load += reserve / 8; return (0); } else if (page_load > 0 && arc_reclaim_needed()) { /* memory is low, delay before restarting */ ARCSTAT_INCR(arcstat_memory_throttle_count, 1); return (SET_ERROR(EAGAIN)); } page_load = 0; #endif return (0); } void arc_tempreserve_clear(uint64_t reserve) { atomic_add_64(&arc_tempreserve, -reserve); ASSERT((int64_t)arc_tempreserve >= 0); } int arc_tempreserve_space(uint64_t reserve, uint64_t txg) { int error; uint64_t anon_size; if (reserve > arc_c/4 && !arc_no_grow) { arc_c = MIN(arc_c_max, reserve * 4); DTRACE_PROBE1(arc__set_reserve, uint64_t, arc_c); } if (reserve > arc_c) return (SET_ERROR(ENOMEM)); /* * Don't count loaned bufs as in flight dirty data to prevent long * network delays from blocking transactions that are ready to be * assigned to a txg. */ anon_size = MAX((int64_t)(refcount_count(&arc_anon->arcs_size) - arc_loaned_bytes), 0); /* * Writes will, almost always, require additional memory allocations * in order to compress/encrypt/etc the data. We therefore need to * make sure that there is sufficient available memory for this. */ error = arc_memory_throttle(reserve, txg); if (error != 0) return (error); /* * Throttle writes when the amount of dirty data in the cache * gets too large. We try to keep the cache less than half full * of dirty blocks so that our sync times don't grow too large. * Note: if two requests come in concurrently, we might let them * both succeed, when one of them should fail. Not a huge deal. */ if (reserve + arc_tempreserve + anon_size > arc_c / 2 && anon_size > arc_c / 4) { dprintf("failing, arc_tempreserve=%lluK anon_meta=%lluK " "anon_data=%lluK tempreserve=%lluK arc_c=%lluK\n", arc_tempreserve>>10, arc_anon->arcs_lsize[ARC_BUFC_METADATA]>>10, arc_anon->arcs_lsize[ARC_BUFC_DATA]>>10, reserve>>10, arc_c>>10); return (SET_ERROR(ERESTART)); } atomic_add_64(&arc_tempreserve, reserve); return (0); } static void arc_kstat_update_state(arc_state_t *state, kstat_named_t *size, kstat_named_t *evict_data, kstat_named_t *evict_metadata) { size->value.ui64 = refcount_count(&state->arcs_size); evict_data->value.ui64 = state->arcs_lsize[ARC_BUFC_DATA]; evict_metadata->value.ui64 = state->arcs_lsize[ARC_BUFC_METADATA]; } static int arc_kstat_update(kstat_t *ksp, int rw) { arc_stats_t *as = ksp->ks_data; if (rw == KSTAT_WRITE) { return (EACCES); } else { arc_kstat_update_state(arc_anon, &as->arcstat_anon_size, &as->arcstat_anon_evictable_data, &as->arcstat_anon_evictable_metadata); arc_kstat_update_state(arc_mru, &as->arcstat_mru_size, &as->arcstat_mru_evictable_data, &as->arcstat_mru_evictable_metadata); arc_kstat_update_state(arc_mru_ghost, &as->arcstat_mru_ghost_size, &as->arcstat_mru_ghost_evictable_data, &as->arcstat_mru_ghost_evictable_metadata); arc_kstat_update_state(arc_mfu, &as->arcstat_mfu_size, &as->arcstat_mfu_evictable_data, &as->arcstat_mfu_evictable_metadata); arc_kstat_update_state(arc_mfu_ghost, &as->arcstat_mfu_ghost_size, &as->arcstat_mfu_ghost_evictable_data, &as->arcstat_mfu_ghost_evictable_metadata); } return (0); } /* * This function *must* return indices evenly distributed between all * sublists of the multilist. This is needed due to how the ARC eviction * code is laid out; arc_evict_state() assumes ARC buffers are evenly * distributed between all sublists and uses this assumption when * deciding which sublist to evict from and how much to evict from it. */ unsigned int arc_state_multilist_index_func(multilist_t *ml, void *obj) { arc_buf_hdr_t *hdr = obj; /* * We rely on b_dva to generate evenly distributed index * numbers using buf_hash below. So, as an added precaution, * let's make sure we never add empty buffers to the arc lists. */ ASSERT(!BUF_EMPTY(hdr)); /* * The assumption here, is the hash value for a given * arc_buf_hdr_t will remain constant throughout it's lifetime * (i.e. it's b_spa, b_dva, and b_birth fields don't change). * Thus, we don't need to store the header's sublist index * on insertion, as this index can be recalculated on removal. * * Also, the low order bits of the hash value are thought to be * distributed evenly. Otherwise, in the case that the multilist * has a power of two number of sublists, each sublists' usage * would not be evenly distributed. */ return (buf_hash(hdr->b_spa, &hdr->b_dva, hdr->b_birth) % multilist_get_num_sublists(ml)); } #ifdef _KERNEL static eventhandler_tag arc_event_lowmem = NULL; static void arc_lowmem(void *arg __unused, int howto __unused) { mutex_enter(&arc_reclaim_lock); /* XXX: Memory deficit should be passed as argument. */ needfree = btoc(arc_c >> arc_shrink_shift); DTRACE_PROBE(arc__needfree); cv_signal(&arc_reclaim_thread_cv); /* * It is unsafe to block here in arbitrary threads, because we can come * here from ARC itself and may hold ARC locks and thus risk a deadlock * with ARC reclaim thread. */ if (curproc == pageproc) (void) cv_wait(&arc_reclaim_waiters_cv, &arc_reclaim_lock); mutex_exit(&arc_reclaim_lock); } #endif void arc_init(void) { int i, prefetch_tunable_set = 0; mutex_init(&arc_reclaim_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&arc_reclaim_thread_cv, NULL, CV_DEFAULT, NULL); cv_init(&arc_reclaim_waiters_cv, NULL, CV_DEFAULT, NULL); mutex_init(&arc_user_evicts_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&arc_user_evicts_cv, NULL, CV_DEFAULT, NULL); mutex_init(&arc_dnlc_evicts_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&arc_dnlc_evicts_cv, NULL, CV_DEFAULT, NULL); /* Convert seconds to clock ticks */ arc_min_prefetch_lifespan = 1 * hz; /* Start out with 1/8 of all memory */ arc_c = kmem_size() / 8; #ifdef illumos #ifdef _KERNEL /* * On architectures where the physical memory can be larger * than the addressable space (intel in 32-bit mode), we may * need to limit the cache to 1/8 of VM size. */ arc_c = MIN(arc_c, vmem_size(heap_arena, VMEM_ALLOC | VMEM_FREE) / 8); #endif #endif /* illumos */ - /* set min cache to 1/32 of all memory, or 16MB, whichever is more */ - arc_c_min = MAX(arc_c / 4, 16 << 20); + /* set min cache to 1/32 of all memory, or arc_abs_min, whichever is more */ + arc_c_min = MAX(arc_c / 4, arc_abs_min); /* set max to 1/2 of all memory, or all but 1GB, whichever is more */ if (arc_c * 8 >= 1 << 30) arc_c_max = (arc_c * 8) - (1 << 30); else arc_c_max = arc_c_min; arc_c_max = MAX(arc_c * 5, arc_c_max); /* * In userland, there's only the memory pressure that we artificially * create (see arc_available_memory()). Don't let arc_c get too * small, because it can cause transactions to be larger than * arc_c, causing arc_tempreserve_space() to fail. */ #ifndef _KERNEL arc_c_min = arc_c_max / 2; #endif #ifdef _KERNEL /* * Allow the tunables to override our calculations if they are - * reasonable (ie. over 16MB) + * reasonable. */ - if (zfs_arc_max > 16 << 20 && zfs_arc_max < kmem_size()) + if (zfs_arc_max > arc_abs_min && zfs_arc_max < kmem_size()) arc_c_max = zfs_arc_max; - if (zfs_arc_min > 16 << 20 && zfs_arc_min <= arc_c_max) + if (zfs_arc_min > arc_abs_min && zfs_arc_min <= arc_c_max) arc_c_min = zfs_arc_min; #endif arc_c = arc_c_max; arc_p = (arc_c >> 1); /* limit meta-data to 1/4 of the arc capacity */ arc_meta_limit = arc_c_max / 4; /* Allow the tunable to override if it is reasonable */ if (zfs_arc_meta_limit > 0 && zfs_arc_meta_limit <= arc_c_max) arc_meta_limit = zfs_arc_meta_limit; if (arc_c_min < arc_meta_limit / 2 && zfs_arc_min == 0) arc_c_min = arc_meta_limit / 2; if (zfs_arc_meta_min > 0) { arc_meta_min = zfs_arc_meta_min; } else { arc_meta_min = arc_c_min / 2; } if (zfs_arc_grow_retry > 0) arc_grow_retry = zfs_arc_grow_retry; if (zfs_arc_shrink_shift > 0) arc_shrink_shift = zfs_arc_shrink_shift; /* * Ensure that arc_no_grow_shift is less than arc_shrink_shift. */ if (arc_no_grow_shift >= arc_shrink_shift) arc_no_grow_shift = arc_shrink_shift - 1; if (zfs_arc_p_min_shift > 0) arc_p_min_shift = zfs_arc_p_min_shift; if (zfs_arc_num_sublists_per_state < 1) zfs_arc_num_sublists_per_state = MAX(max_ncpus, 1); /* if kmem_flags are set, lets try to use less memory */ if (kmem_debugging()) arc_c = arc_c / 2; if (arc_c < arc_c_min) arc_c = arc_c_min; zfs_arc_min = arc_c_min; zfs_arc_max = arc_c_max; arc_anon = &ARC_anon; arc_mru = &ARC_mru; arc_mru_ghost = &ARC_mru_ghost; arc_mfu = &ARC_mfu; arc_mfu_ghost = &ARC_mfu_ghost; arc_l2c_only = &ARC_l2c_only; arc_size = 0; multilist_create(&arc_mru->arcs_list[ARC_BUFC_METADATA], sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); multilist_create(&arc_mru->arcs_list[ARC_BUFC_DATA], sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); multilist_create(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA], sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); multilist_create(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA], sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); multilist_create(&arc_mfu->arcs_list[ARC_BUFC_METADATA], sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); multilist_create(&arc_mfu->arcs_list[ARC_BUFC_DATA], sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); multilist_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA], sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); multilist_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA], sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); multilist_create(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA], sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); multilist_create(&arc_l2c_only->arcs_list[ARC_BUFC_DATA], sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node), zfs_arc_num_sublists_per_state, arc_state_multilist_index_func); refcount_create(&arc_anon->arcs_size); refcount_create(&arc_mru->arcs_size); refcount_create(&arc_mru_ghost->arcs_size); refcount_create(&arc_mfu->arcs_size); refcount_create(&arc_mfu_ghost->arcs_size); refcount_create(&arc_l2c_only->arcs_size); buf_init(); arc_reclaim_thread_exit = FALSE; arc_user_evicts_thread_exit = FALSE; arc_dnlc_evicts_thread_exit = FALSE; arc_eviction_list = NULL; bzero(&arc_eviction_hdr, sizeof (arc_buf_hdr_t)); arc_ksp = kstat_create("zfs", 0, "arcstats", "misc", KSTAT_TYPE_NAMED, sizeof (arc_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); if (arc_ksp != NULL) { arc_ksp->ks_data = &arc_stats; arc_ksp->ks_update = arc_kstat_update; kstat_install(arc_ksp); } (void) thread_create(NULL, 0, arc_reclaim_thread, NULL, 0, &p0, TS_RUN, minclsyspri); #ifdef _KERNEL arc_event_lowmem = EVENTHANDLER_REGISTER(vm_lowmem, arc_lowmem, NULL, EVENTHANDLER_PRI_FIRST); #endif (void) thread_create(NULL, 0, arc_user_evicts_thread, NULL, 0, &p0, TS_RUN, minclsyspri); (void) thread_create(NULL, 0, arc_dnlc_evicts_thread, NULL, 0, &p0, TS_RUN, minclsyspri); arc_dead = FALSE; arc_warm = B_FALSE; /* * Calculate maximum amount of dirty data per pool. * * If it has been set by /etc/system, take that. * Otherwise, use a percentage of physical memory defined by * zfs_dirty_data_max_percent (default 10%) with a cap at * zfs_dirty_data_max_max (default 4GB). */ if (zfs_dirty_data_max == 0) { zfs_dirty_data_max = ptob(physmem) * zfs_dirty_data_max_percent / 100; zfs_dirty_data_max = MIN(zfs_dirty_data_max, zfs_dirty_data_max_max); } #ifdef _KERNEL if (TUNABLE_INT_FETCH("vfs.zfs.prefetch_disable", &zfs_prefetch_disable)) prefetch_tunable_set = 1; #ifdef __i386__ if (prefetch_tunable_set == 0) { printf("ZFS NOTICE: Prefetch is disabled by default on i386 " "-- to enable,\n"); printf(" add \"vfs.zfs.prefetch_disable=0\" " "to /boot/loader.conf.\n"); zfs_prefetch_disable = 1; } #else if ((((uint64_t)physmem * PAGESIZE) < (1ULL << 32)) && prefetch_tunable_set == 0) { printf("ZFS NOTICE: Prefetch is disabled by default if less " "than 4GB of RAM is present;\n" " to enable, add \"vfs.zfs.prefetch_disable=0\" " "to /boot/loader.conf.\n"); zfs_prefetch_disable = 1; } #endif /* Warn about ZFS memory and address space requirements. */ if (((uint64_t)physmem * PAGESIZE) < (256 + 128 + 64) * (1 << 20)) { printf("ZFS WARNING: Recommended minimum RAM size is 512MB; " "expect unstable behavior.\n"); } if (kmem_size() < 512 * (1 << 20)) { printf("ZFS WARNING: Recommended minimum kmem_size is 512MB; " "expect unstable behavior.\n"); printf(" Consider tuning vm.kmem_size and " "vm.kmem_size_max\n"); printf(" in /boot/loader.conf.\n"); } #endif } void arc_fini(void) { mutex_enter(&arc_reclaim_lock); arc_reclaim_thread_exit = TRUE; /* * The reclaim thread will set arc_reclaim_thread_exit back to * FALSE when it is finished exiting; we're waiting for that. */ while (arc_reclaim_thread_exit) { cv_signal(&arc_reclaim_thread_cv); cv_wait(&arc_reclaim_thread_cv, &arc_reclaim_lock); } mutex_exit(&arc_reclaim_lock); mutex_enter(&arc_user_evicts_lock); arc_user_evicts_thread_exit = TRUE; /* * The user evicts thread will set arc_user_evicts_thread_exit * to FALSE when it is finished exiting; we're waiting for that. */ while (arc_user_evicts_thread_exit) { cv_signal(&arc_user_evicts_cv); cv_wait(&arc_user_evicts_cv, &arc_user_evicts_lock); } mutex_exit(&arc_user_evicts_lock); mutex_enter(&arc_dnlc_evicts_lock); arc_dnlc_evicts_thread_exit = TRUE; /* * The user evicts thread will set arc_user_evicts_thread_exit * to FALSE when it is finished exiting; we're waiting for that. */ while (arc_dnlc_evicts_thread_exit) { cv_signal(&arc_dnlc_evicts_cv); cv_wait(&arc_dnlc_evicts_cv, &arc_dnlc_evicts_lock); } mutex_exit(&arc_dnlc_evicts_lock); /* Use TRUE to ensure *all* buffers are evicted */ arc_flush(NULL, TRUE); arc_dead = TRUE; if (arc_ksp != NULL) { kstat_delete(arc_ksp); arc_ksp = NULL; } mutex_destroy(&arc_reclaim_lock); cv_destroy(&arc_reclaim_thread_cv); cv_destroy(&arc_reclaim_waiters_cv); mutex_destroy(&arc_user_evicts_lock); cv_destroy(&arc_user_evicts_cv); mutex_destroy(&arc_dnlc_evicts_lock); cv_destroy(&arc_dnlc_evicts_cv); refcount_destroy(&arc_anon->arcs_size); refcount_destroy(&arc_mru->arcs_size); refcount_destroy(&arc_mru_ghost->arcs_size); refcount_destroy(&arc_mfu->arcs_size); refcount_destroy(&arc_mfu_ghost->arcs_size); refcount_destroy(&arc_l2c_only->arcs_size); multilist_destroy(&arc_mru->arcs_list[ARC_BUFC_METADATA]); multilist_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA]); multilist_destroy(&arc_mfu->arcs_list[ARC_BUFC_METADATA]); multilist_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA]); multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA]); multilist_destroy(&arc_mru->arcs_list[ARC_BUFC_DATA]); multilist_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA]); multilist_destroy(&arc_mfu->arcs_list[ARC_BUFC_DATA]); multilist_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]); multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_DATA]); buf_fini(); ASSERT0(arc_loaned_bytes); #ifdef _KERNEL if (arc_event_lowmem != NULL) EVENTHANDLER_DEREGISTER(vm_lowmem, arc_event_lowmem); #endif } /* * Level 2 ARC * * The level 2 ARC (L2ARC) is a cache layer in-between main memory and disk. * It uses dedicated storage devices to hold cached data, which are populated * using large infrequent writes. The main role of this cache is to boost * the performance of random read workloads. The intended L2ARC devices * include short-stroked disks, solid state disks, and other media with * substantially faster read latency than disk. * * +-----------------------+ * | ARC | * +-----------------------+ * | ^ ^ * | | | * l2arc_feed_thread() arc_read() * | | | * | l2arc read | * V | | * +---------------+ | * | L2ARC | | * +---------------+ | * | ^ | * l2arc_write() | | * | | | * V | | * +-------+ +-------+ * | vdev | | vdev | * | cache | | cache | * +-------+ +-------+ * +=========+ .-----. * : L2ARC : |-_____-| * : devices : | Disks | * +=========+ `-_____-' * * Read requests are satisfied from the following sources, in order: * * 1) ARC * 2) vdev cache of L2ARC devices * 3) L2ARC devices * 4) vdev cache of disks * 5) disks * * Some L2ARC device types exhibit extremely slow write performance. * To accommodate for this there are some significant differences between * the L2ARC and traditional cache design: * * 1. There is no eviction path from the ARC to the L2ARC. Evictions from * the ARC behave as usual, freeing buffers and placing headers on ghost * lists. The ARC does not send buffers to the L2ARC during eviction as * this would add inflated write latencies for all ARC memory pressure. * * 2. The L2ARC attempts to cache data from the ARC before it is evicted. * It does this by periodically scanning buffers from the eviction-end of * the MFU and MRU ARC lists, copying them to the L2ARC devices if they are * not already there. It scans until a headroom of buffers is satisfied, * which itself is a buffer for ARC eviction. If a compressible buffer is * found during scanning and selected for writing to an L2ARC device, we * temporarily boost scanning headroom during the next scan cycle to make * sure we adapt to compression effects (which might significantly reduce * the data volume we write to L2ARC). The thread that does this is * l2arc_feed_thread(), illustrated below; example sizes are included to * provide a better sense of ratio than this diagram: * * head --> tail * +---------------------+----------+ * ARC_mfu |:::::#:::::::::::::::|o#o###o###|-->. # already on L2ARC * +---------------------+----------+ | o L2ARC eligible * ARC_mru |:#:::::::::::::::::::|#o#ooo####|-->| : ARC buffer * +---------------------+----------+ | * 15.9 Gbytes ^ 32 Mbytes | * headroom | * l2arc_feed_thread() * | * l2arc write hand <--[oooo]--' * | 8 Mbyte * | write max * V * +==============================+ * L2ARC dev |####|#|###|###| |####| ... | * +==============================+ * 32 Gbytes * * 3. If an ARC buffer is copied to the L2ARC but then hit instead of * evicted, then the L2ARC has cached a buffer much sooner than it probably * needed to, potentially wasting L2ARC device bandwidth and storage. It is * safe to say that this is an uncommon case, since buffers at the end of * the ARC lists have moved there due to inactivity. * * 4. If the ARC evicts faster than the L2ARC can maintain a headroom, * then the L2ARC simply misses copying some buffers. This serves as a * pressure valve to prevent heavy read workloads from both stalling the ARC * with waits and clogging the L2ARC with writes. This also helps prevent * the potential for the L2ARC to churn if it attempts to cache content too * quickly, such as during backups of the entire pool. * * 5. After system boot and before the ARC has filled main memory, there are * no evictions from the ARC and so the tails of the ARC_mfu and ARC_mru * lists can remain mostly static. Instead of searching from tail of these * lists as pictured, the l2arc_feed_thread() will search from the list heads * for eligible buffers, greatly increasing its chance of finding them. * * The L2ARC device write speed is also boosted during this time so that * the L2ARC warms up faster. Since there have been no ARC evictions yet, * there are no L2ARC reads, and no fear of degrading read performance * through increased writes. * * 6. Writes to the L2ARC devices are grouped and sent in-sequence, so that * the vdev queue can aggregate them into larger and fewer writes. Each * device is written to in a rotor fashion, sweeping writes through * available space then repeating. * * 7. The L2ARC does not store dirty content. It never needs to flush * write buffers back to disk based storage. * * 8. If an ARC buffer is written (and dirtied) which also exists in the * L2ARC, the now stale L2ARC buffer is immediately dropped. * * The performance of the L2ARC can be tweaked by a number of tunables, which * may be necessary for different workloads: * * l2arc_write_max max write bytes per interval * l2arc_write_boost extra write bytes during device warmup * l2arc_noprefetch skip caching prefetched buffers * l2arc_headroom number of max device writes to precache * l2arc_headroom_boost when we find compressed buffers during ARC * scanning, we multiply headroom by this * percentage factor for the next scan cycle, * since more compressed buffers are likely to * be present * l2arc_feed_secs seconds between L2ARC writing * * Tunables may be removed or added as future performance improvements are * integrated, and also may become zpool properties. * * There are three key functions that control how the L2ARC warms up: * * l2arc_write_eligible() check if a buffer is eligible to cache * l2arc_write_size() calculate how much to write * l2arc_write_interval() calculate sleep delay between writes * * These three functions determine what to write, how much, and how quickly * to send writes. */ static boolean_t l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *hdr) { /* * A buffer is *not* eligible for the L2ARC if it: * 1. belongs to a different spa. * 2. is already cached on the L2ARC. * 3. has an I/O in progress (it may be an incomplete read). * 4. is flagged not eligible (zfs property). */ if (hdr->b_spa != spa_guid) { ARCSTAT_BUMP(arcstat_l2_write_spa_mismatch); return (B_FALSE); } if (HDR_HAS_L2HDR(hdr)) { ARCSTAT_BUMP(arcstat_l2_write_in_l2); return (B_FALSE); } if (HDR_IO_IN_PROGRESS(hdr)) { ARCSTAT_BUMP(arcstat_l2_write_hdr_io_in_progress); return (B_FALSE); } if (!HDR_L2CACHE(hdr)) { ARCSTAT_BUMP(arcstat_l2_write_not_cacheable); return (B_FALSE); } return (B_TRUE); } static uint64_t l2arc_write_size(void) { uint64_t size; /* * Make sure our globals have meaningful values in case the user * altered them. */ size = l2arc_write_max; if (size == 0) { cmn_err(CE_NOTE, "Bad value for l2arc_write_max, value must " "be greater than zero, resetting it to the default (%d)", L2ARC_WRITE_SIZE); size = l2arc_write_max = L2ARC_WRITE_SIZE; } if (arc_warm == B_FALSE) size += l2arc_write_boost; return (size); } static clock_t l2arc_write_interval(clock_t began, uint64_t wanted, uint64_t wrote) { clock_t interval, next, now; /* * If the ARC lists are busy, increase our write rate; if the * lists are stale, idle back. This is achieved by checking * how much we previously wrote - if it was more than half of * what we wanted, schedule the next write much sooner. */ if (l2arc_feed_again && wrote > (wanted / 2)) interval = (hz * l2arc_feed_min_ms) / 1000; else interval = hz * l2arc_feed_secs; now = ddi_get_lbolt(); next = MAX(now, MIN(now + interval, began + interval)); return (next); } /* * Cycle through L2ARC devices. This is how L2ARC load balances. * If a device is returned, this also returns holding the spa config lock. */ static l2arc_dev_t * l2arc_dev_get_next(void) { l2arc_dev_t *first, *next = NULL; /* * Lock out the removal of spas (spa_namespace_lock), then removal * of cache devices (l2arc_dev_mtx). Once a device has been selected, * both locks will be dropped and a spa config lock held instead. */ mutex_enter(&spa_namespace_lock); mutex_enter(&l2arc_dev_mtx); /* if there are no vdevs, there is nothing to do */ if (l2arc_ndev == 0) goto out; first = NULL; next = l2arc_dev_last; do { /* loop around the list looking for a non-faulted vdev */ if (next == NULL) { next = list_head(l2arc_dev_list); } else { next = list_next(l2arc_dev_list, next); if (next == NULL) next = list_head(l2arc_dev_list); } /* if we have come back to the start, bail out */ if (first == NULL) first = next; else if (next == first) break; } while (vdev_is_dead(next->l2ad_vdev)); /* if we were unable to find any usable vdevs, return NULL */ if (vdev_is_dead(next->l2ad_vdev)) next = NULL; l2arc_dev_last = next; out: mutex_exit(&l2arc_dev_mtx); /* * Grab the config lock to prevent the 'next' device from being * removed while we are writing to it. */ if (next != NULL) spa_config_enter(next->l2ad_spa, SCL_L2ARC, next, RW_READER); mutex_exit(&spa_namespace_lock); return (next); } /* * Free buffers that were tagged for destruction. */ static void l2arc_do_free_on_write() { list_t *buflist; l2arc_data_free_t *df, *df_prev; mutex_enter(&l2arc_free_on_write_mtx); buflist = l2arc_free_on_write; for (df = list_tail(buflist); df; df = df_prev) { df_prev = list_prev(buflist, df); ASSERT(df->l2df_data != NULL); ASSERT(df->l2df_func != NULL); df->l2df_func(df->l2df_data, df->l2df_size); list_remove(buflist, df); kmem_free(df, sizeof (l2arc_data_free_t)); } mutex_exit(&l2arc_free_on_write_mtx); } /* * A write to a cache device has completed. Update all headers to allow * reads from these buffers to begin. */ static void l2arc_write_done(zio_t *zio) { l2arc_write_callback_t *cb; l2arc_dev_t *dev; list_t *buflist; arc_buf_hdr_t *head, *hdr, *hdr_prev; kmutex_t *hash_lock; int64_t bytes_dropped = 0; cb = zio->io_private; ASSERT(cb != NULL); dev = cb->l2wcb_dev; ASSERT(dev != NULL); head = cb->l2wcb_head; ASSERT(head != NULL); buflist = &dev->l2ad_buflist; ASSERT(buflist != NULL); DTRACE_PROBE2(l2arc__iodone, zio_t *, zio, l2arc_write_callback_t *, cb); if (zio->io_error != 0) ARCSTAT_BUMP(arcstat_l2_writes_error); /* * All writes completed, or an error was hit. */ top: mutex_enter(&dev->l2ad_mtx); for (hdr = list_prev(buflist, head); hdr; hdr = hdr_prev) { hdr_prev = list_prev(buflist, hdr); hash_lock = HDR_LOCK(hdr); /* * We cannot use mutex_enter or else we can deadlock * with l2arc_write_buffers (due to swapping the order * the hash lock and l2ad_mtx are taken). */ if (!mutex_tryenter(hash_lock)) { /* * Missed the hash lock. We must retry so we * don't leave the ARC_FLAG_L2_WRITING bit set. */ ARCSTAT_BUMP(arcstat_l2_writes_lock_retry); /* * We don't want to rescan the headers we've * already marked as having been written out, so * we reinsert the head node so we can pick up * where we left off. */ list_remove(buflist, head); list_insert_after(buflist, hdr, head); mutex_exit(&dev->l2ad_mtx); /* * We wait for the hash lock to become available * to try and prevent busy waiting, and increase * the chance we'll be able to acquire the lock * the next time around. */ mutex_enter(hash_lock); mutex_exit(hash_lock); goto top; } /* * We could not have been moved into the arc_l2c_only * state while in-flight due to our ARC_FLAG_L2_WRITING * bit being set. Let's just ensure that's being enforced. */ ASSERT(HDR_HAS_L1HDR(hdr)); /* * We may have allocated a buffer for L2ARC compression, * we must release it to avoid leaking this data. */ l2arc_release_cdata_buf(hdr); if (zio->io_error != 0) { /* * Error - drop L2ARC entry. */ list_remove(buflist, hdr); l2arc_trim(hdr); hdr->b_flags &= ~ARC_FLAG_HAS_L2HDR; ARCSTAT_INCR(arcstat_l2_asize, -hdr->b_l2hdr.b_asize); ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size); bytes_dropped += hdr->b_l2hdr.b_asize; (void) refcount_remove_many(&dev->l2ad_alloc, hdr->b_l2hdr.b_asize, hdr); } /* * Allow ARC to begin reads and ghost list evictions to * this L2ARC entry. */ hdr->b_flags &= ~ARC_FLAG_L2_WRITING; mutex_exit(hash_lock); } atomic_inc_64(&l2arc_writes_done); list_remove(buflist, head); ASSERT(!HDR_HAS_L1HDR(head)); kmem_cache_free(hdr_l2only_cache, head); mutex_exit(&dev->l2ad_mtx); vdev_space_update(dev->l2ad_vdev, -bytes_dropped, 0, 0); l2arc_do_free_on_write(); kmem_free(cb, sizeof (l2arc_write_callback_t)); } /* * A read to a cache device completed. Validate buffer contents before * handing over to the regular ARC routines. */ static void l2arc_read_done(zio_t *zio) { l2arc_read_callback_t *cb; arc_buf_hdr_t *hdr; arc_buf_t *buf; kmutex_t *hash_lock; int equal; ASSERT(zio->io_vd != NULL); ASSERT(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE); spa_config_exit(zio->io_spa, SCL_L2ARC, zio->io_vd); cb = zio->io_private; ASSERT(cb != NULL); buf = cb->l2rcb_buf; ASSERT(buf != NULL); hash_lock = HDR_LOCK(buf->b_hdr); mutex_enter(hash_lock); hdr = buf->b_hdr; ASSERT3P(hash_lock, ==, HDR_LOCK(hdr)); /* * If the data was read into a temporary buffer, * move it and free the buffer. */ if (cb->l2rcb_data != NULL) { ASSERT3U(hdr->b_size, <, zio->io_size); ASSERT3U(cb->l2rcb_compress, ==, ZIO_COMPRESS_OFF); if (zio->io_error == 0) bcopy(cb->l2rcb_data, buf->b_data, hdr->b_size); /* * The following must be done regardless of whether * there was an error: * - free the temporary buffer * - point zio to the real ARC buffer * - set zio size accordingly * These are required because zio is either re-used for * an I/O of the block in the case of the error * or the zio is passed to arc_read_done() and it * needs real data. */ zio_data_buf_free(cb->l2rcb_data, zio->io_size); zio->io_size = zio->io_orig_size = hdr->b_size; zio->io_data = zio->io_orig_data = buf->b_data; } /* * If the buffer was compressed, decompress it first. */ if (cb->l2rcb_compress != ZIO_COMPRESS_OFF) l2arc_decompress_zio(zio, hdr, cb->l2rcb_compress); ASSERT(zio->io_data != NULL); ASSERT3U(zio->io_size, ==, hdr->b_size); ASSERT3U(BP_GET_LSIZE(&cb->l2rcb_bp), ==, hdr->b_size); /* * Check this survived the L2ARC journey. */ equal = arc_cksum_equal(buf); if (equal && zio->io_error == 0 && !HDR_L2_EVICTED(hdr)) { mutex_exit(hash_lock); zio->io_private = buf; zio->io_bp_copy = cb->l2rcb_bp; /* XXX fix in L2ARC 2.0 */ zio->io_bp = &zio->io_bp_copy; /* XXX fix in L2ARC 2.0 */ arc_read_done(zio); } else { mutex_exit(hash_lock); /* * Buffer didn't survive caching. Increment stats and * reissue to the original storage device. */ if (zio->io_error != 0) { ARCSTAT_BUMP(arcstat_l2_io_error); } else { zio->io_error = SET_ERROR(EIO); } if (!equal) ARCSTAT_BUMP(arcstat_l2_cksum_bad); /* * If there's no waiter, issue an async i/o to the primary * storage now. If there *is* a waiter, the caller must * issue the i/o in a context where it's OK to block. */ if (zio->io_waiter == NULL) { zio_t *pio = zio_unique_parent(zio); ASSERT(!pio || pio->io_child_type == ZIO_CHILD_LOGICAL); zio_nowait(zio_read(pio, cb->l2rcb_spa, &cb->l2rcb_bp, buf->b_data, hdr->b_size, arc_read_done, buf, zio->io_priority, cb->l2rcb_flags, &cb->l2rcb_zb)); } } kmem_free(cb, sizeof (l2arc_read_callback_t)); } /* * This is the list priority from which the L2ARC will search for pages to * cache. This is used within loops (0..3) to cycle through lists in the * desired order. This order can have a significant effect on cache * performance. * * Currently the metadata lists are hit first, MFU then MRU, followed by * the data lists. This function returns a locked list, and also returns * the lock pointer. */ static multilist_sublist_t * l2arc_sublist_lock(int list_num) { multilist_t *ml = NULL; unsigned int idx; ASSERT(list_num >= 0 && list_num <= 3); switch (list_num) { case 0: ml = &arc_mfu->arcs_list[ARC_BUFC_METADATA]; break; case 1: ml = &arc_mru->arcs_list[ARC_BUFC_METADATA]; break; case 2: ml = &arc_mfu->arcs_list[ARC_BUFC_DATA]; break; case 3: ml = &arc_mru->arcs_list[ARC_BUFC_DATA]; break; } /* * Return a randomly-selected sublist. This is acceptable * because the caller feeds only a little bit of data for each * call (8MB). Subsequent calls will result in different * sublists being selected. */ idx = multilist_get_random_index(ml); return (multilist_sublist_lock(ml, idx)); } /* * Evict buffers from the device write hand to the distance specified in * bytes. This distance may span populated buffers, it may span nothing. * This is clearing a region on the L2ARC device ready for writing. * If the 'all' boolean is set, every buffer is evicted. */ static void l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all) { list_t *buflist; arc_buf_hdr_t *hdr, *hdr_prev; kmutex_t *hash_lock; uint64_t taddr; buflist = &dev->l2ad_buflist; if (!all && dev->l2ad_first) { /* * This is the first sweep through the device. There is * nothing to evict. */ return; } if (dev->l2ad_hand >= (dev->l2ad_end - (2 * distance))) { /* * When nearing the end of the device, evict to the end * before the device write hand jumps to the start. */ taddr = dev->l2ad_end; } else { taddr = dev->l2ad_hand + distance; } DTRACE_PROBE4(l2arc__evict, l2arc_dev_t *, dev, list_t *, buflist, uint64_t, taddr, boolean_t, all); top: mutex_enter(&dev->l2ad_mtx); for (hdr = list_tail(buflist); hdr; hdr = hdr_prev) { hdr_prev = list_prev(buflist, hdr); hash_lock = HDR_LOCK(hdr); /* * We cannot use mutex_enter or else we can deadlock * with l2arc_write_buffers (due to swapping the order * the hash lock and l2ad_mtx are taken). */ if (!mutex_tryenter(hash_lock)) { /* * Missed the hash lock. Retry. */ ARCSTAT_BUMP(arcstat_l2_evict_lock_retry); mutex_exit(&dev->l2ad_mtx); mutex_enter(hash_lock); mutex_exit(hash_lock); goto top; } if (HDR_L2_WRITE_HEAD(hdr)) { /* * We hit a write head node. Leave it for * l2arc_write_done(). */ list_remove(buflist, hdr); mutex_exit(hash_lock); continue; } if (!all && HDR_HAS_L2HDR(hdr) && (hdr->b_l2hdr.b_daddr > taddr || hdr->b_l2hdr.b_daddr < dev->l2ad_hand)) { /* * We've evicted to the target address, * or the end of the device. */ mutex_exit(hash_lock); break; } ASSERT(HDR_HAS_L2HDR(hdr)); if (!HDR_HAS_L1HDR(hdr)) { ASSERT(!HDR_L2_READING(hdr)); /* * This doesn't exist in the ARC. Destroy. * arc_hdr_destroy() will call list_remove() * and decrement arcstat_l2_size. */ arc_change_state(arc_anon, hdr, hash_lock); arc_hdr_destroy(hdr); } else { ASSERT(hdr->b_l1hdr.b_state != arc_l2c_only); ARCSTAT_BUMP(arcstat_l2_evict_l1cached); /* * Invalidate issued or about to be issued * reads, since we may be about to write * over this location. */ if (HDR_L2_READING(hdr)) { ARCSTAT_BUMP(arcstat_l2_evict_reading); hdr->b_flags |= ARC_FLAG_L2_EVICTED; } /* Ensure this header has finished being written */ ASSERT(!HDR_L2_WRITING(hdr)); ASSERT3P(hdr->b_l1hdr.b_tmp_cdata, ==, NULL); arc_hdr_l2hdr_destroy(hdr); } mutex_exit(hash_lock); } mutex_exit(&dev->l2ad_mtx); } /* * Find and write ARC buffers to the L2ARC device. * * An ARC_FLAG_L2_WRITING flag is set so that the L2ARC buffers are not valid * for reading until they have completed writing. * The headroom_boost is an in-out parameter used to maintain headroom boost * state between calls to this function. * * Returns the number of bytes actually written (which may be smaller than * the delta by which the device hand has changed due to alignment). */ static uint64_t l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz, boolean_t *headroom_boost) { arc_buf_hdr_t *hdr, *hdr_prev, *head; uint64_t write_asize, write_sz, headroom, buf_compress_minsz; void *buf_data; boolean_t full; l2arc_write_callback_t *cb; zio_t *pio, *wzio; uint64_t guid = spa_load_guid(spa); const boolean_t do_headroom_boost = *headroom_boost; int try; ASSERT(dev->l2ad_vdev != NULL); /* Lower the flag now, we might want to raise it again later. */ *headroom_boost = B_FALSE; pio = NULL; write_sz = write_asize = 0; full = B_FALSE; head = kmem_cache_alloc(hdr_l2only_cache, KM_PUSHPAGE); head->b_flags |= ARC_FLAG_L2_WRITE_HEAD; head->b_flags |= ARC_FLAG_HAS_L2HDR; ARCSTAT_BUMP(arcstat_l2_write_buffer_iter); /* * We will want to try to compress buffers that are at least 2x the * device sector size. */ buf_compress_minsz = 2 << dev->l2ad_vdev->vdev_ashift; /* * Copy buffers for L2ARC writing. */ for (try = 0; try <= 3; try++) { multilist_sublist_t *mls = l2arc_sublist_lock(try); uint64_t passed_sz = 0; ARCSTAT_BUMP(arcstat_l2_write_buffer_list_iter); /* * L2ARC fast warmup. * * Until the ARC is warm and starts to evict, read from the * head of the ARC lists rather than the tail. */ if (arc_warm == B_FALSE) hdr = multilist_sublist_head(mls); else hdr = multilist_sublist_tail(mls); if (hdr == NULL) ARCSTAT_BUMP(arcstat_l2_write_buffer_list_null_iter); headroom = target_sz * l2arc_headroom; if (do_headroom_boost) headroom = (headroom * l2arc_headroom_boost) / 100; for (; hdr; hdr = hdr_prev) { kmutex_t *hash_lock; uint64_t buf_sz; uint64_t buf_a_sz; size_t align; if (arc_warm == B_FALSE) hdr_prev = multilist_sublist_next(mls, hdr); else hdr_prev = multilist_sublist_prev(mls, hdr); ARCSTAT_INCR(arcstat_l2_write_buffer_bytes_scanned, hdr->b_size); hash_lock = HDR_LOCK(hdr); if (!mutex_tryenter(hash_lock)) { ARCSTAT_BUMP(arcstat_l2_write_trylock_fail); /* * Skip this buffer rather than waiting. */ continue; } passed_sz += hdr->b_size; if (passed_sz > headroom) { /* * Searched too far. */ mutex_exit(hash_lock); ARCSTAT_BUMP(arcstat_l2_write_passed_headroom); break; } if (!l2arc_write_eligible(guid, hdr)) { mutex_exit(hash_lock); continue; } /* * Assume that the buffer is not going to be compressed * and could take more space on disk because of a larger * disk block size. */ buf_sz = hdr->b_size; align = (size_t)1 << dev->l2ad_vdev->vdev_ashift; buf_a_sz = P2ROUNDUP(buf_sz, align); if ((write_asize + buf_a_sz) > target_sz) { full = B_TRUE; mutex_exit(hash_lock); ARCSTAT_BUMP(arcstat_l2_write_full); break; } if (pio == NULL) { /* * Insert a dummy header on the buflist so * l2arc_write_done() can find where the * write buffers begin without searching. */ mutex_enter(&dev->l2ad_mtx); list_insert_head(&dev->l2ad_buflist, head); mutex_exit(&dev->l2ad_mtx); cb = kmem_alloc( sizeof (l2arc_write_callback_t), KM_SLEEP); cb->l2wcb_dev = dev; cb->l2wcb_head = head; pio = zio_root(spa, l2arc_write_done, cb, ZIO_FLAG_CANFAIL); ARCSTAT_BUMP(arcstat_l2_write_pios); } /* * Create and add a new L2ARC header. */ hdr->b_l2hdr.b_dev = dev; hdr->b_flags |= ARC_FLAG_L2_WRITING; /* * Temporarily stash the data buffer in b_tmp_cdata. * The subsequent write step will pick it up from * there. This is because can't access b_l1hdr.b_buf * without holding the hash_lock, which we in turn * can't access without holding the ARC list locks * (which we want to avoid during compression/writing). */ hdr->b_l2hdr.b_compress = ZIO_COMPRESS_OFF; hdr->b_l2hdr.b_asize = hdr->b_size; hdr->b_l1hdr.b_tmp_cdata = hdr->b_l1hdr.b_buf->b_data; /* * Explicitly set the b_daddr field to a known * value which means "invalid address". This * enables us to differentiate which stage of * l2arc_write_buffers() the particular header * is in (e.g. this loop, or the one below). * ARC_FLAG_L2_WRITING is not enough to make * this distinction, and we need to know in * order to do proper l2arc vdev accounting in * arc_release() and arc_hdr_destroy(). * * Note, we can't use a new flag to distinguish * the two stages because we don't hold the * header's hash_lock below, in the second stage * of this function. Thus, we can't simply * change the b_flags field to denote that the * IO has been sent. We can change the b_daddr * field of the L2 portion, though, since we'll * be holding the l2ad_mtx; which is why we're * using it to denote the header's state change. */ hdr->b_l2hdr.b_daddr = L2ARC_ADDR_UNSET; hdr->b_flags |= ARC_FLAG_HAS_L2HDR; mutex_enter(&dev->l2ad_mtx); list_insert_head(&dev->l2ad_buflist, hdr); mutex_exit(&dev->l2ad_mtx); /* * Compute and store the buffer cksum before * writing. On debug the cksum is verified first. */ arc_cksum_verify(hdr->b_l1hdr.b_buf); arc_cksum_compute(hdr->b_l1hdr.b_buf, B_TRUE); mutex_exit(hash_lock); write_sz += buf_sz; write_asize += buf_a_sz; } multilist_sublist_unlock(mls); if (full == B_TRUE) break; } /* No buffers selected for writing? */ if (pio == NULL) { ASSERT0(write_sz); ASSERT(!HDR_HAS_L1HDR(head)); kmem_cache_free(hdr_l2only_cache, head); return (0); } mutex_enter(&dev->l2ad_mtx); /* * Now start writing the buffers. We're starting at the write head * and work backwards, retracing the course of the buffer selector * loop above. */ write_asize = 0; for (hdr = list_prev(&dev->l2ad_buflist, head); hdr; hdr = list_prev(&dev->l2ad_buflist, hdr)) { uint64_t buf_sz; boolean_t compress; /* * We rely on the L1 portion of the header below, so * it's invalid for this header to have been evicted out * of the ghost cache, prior to being written out. The * ARC_FLAG_L2_WRITING bit ensures this won't happen. */ ASSERT(HDR_HAS_L1HDR(hdr)); /* * We shouldn't need to lock the buffer here, since we flagged * it as ARC_FLAG_L2_WRITING in the previous step, but we must * take care to only access its L2 cache parameters. In * particular, hdr->l1hdr.b_buf may be invalid by now due to * ARC eviction. */ hdr->b_l2hdr.b_daddr = dev->l2ad_hand; /* * Save a pointer to the original buffer data we had previously * stashed away. */ buf_data = hdr->b_l1hdr.b_tmp_cdata; compress = HDR_L2COMPRESS(hdr) && hdr->b_l2hdr.b_asize >= buf_compress_minsz; if (l2arc_transform_buf(hdr, compress)) { /* * If compression succeeded, enable headroom * boost on the next scan cycle. */ *headroom_boost = B_TRUE; } /* * Get the new buffer size that accounts for compression * and padding. */ buf_sz = hdr->b_l2hdr.b_asize; /* * We need to do this regardless if buf_sz is zero or * not, otherwise, when this l2hdr is evicted we'll * remove a reference that was never added. */ (void) refcount_add_many(&dev->l2ad_alloc, buf_sz, hdr); /* Compression may have squashed the buffer to zero length. */ if (buf_sz != 0) { /* * If the data was padded or compressed, then it * it is in a new buffer. */ if (hdr->b_l1hdr.b_tmp_cdata != NULL) buf_data = hdr->b_l1hdr.b_tmp_cdata; wzio = zio_write_phys(pio, dev->l2ad_vdev, dev->l2ad_hand, buf_sz, buf_data, ZIO_CHECKSUM_OFF, NULL, NULL, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_CANFAIL, B_FALSE); DTRACE_PROBE2(l2arc__write, vdev_t *, dev->l2ad_vdev, zio_t *, wzio); (void) zio_nowait(wzio); write_asize += buf_sz; dev->l2ad_hand += buf_sz; } } mutex_exit(&dev->l2ad_mtx); ASSERT3U(write_asize, <=, target_sz); ARCSTAT_BUMP(arcstat_l2_writes_sent); ARCSTAT_INCR(arcstat_l2_write_bytes, write_asize); ARCSTAT_INCR(arcstat_l2_size, write_sz); ARCSTAT_INCR(arcstat_l2_asize, write_asize); vdev_space_update(dev->l2ad_vdev, write_asize, 0, 0); /* * Bump device hand to the device start if it is approaching the end. * l2arc_evict() will already have evicted ahead for this case. */ if (dev->l2ad_hand >= (dev->l2ad_end - target_sz)) { dev->l2ad_hand = dev->l2ad_start; dev->l2ad_first = B_FALSE; } dev->l2ad_writing = B_TRUE; (void) zio_wait(pio); dev->l2ad_writing = B_FALSE; return (write_asize); } /* * Transforms, possibly compresses and pads, an L2ARC buffer. * The data to be compressed must be prefilled in l1hdr.b_tmp_cdata and its * size in l2hdr->b_asize. This routine tries to compress the data and * depending on the compression result there are three possible outcomes: * *) The buffer was incompressible. The buffer size was already ashift aligned. * The original hdr contents were left untouched except for b_tmp_cdata, * which is reset to NULL. The caller must keep a pointer to the original * data. * *) The buffer was incompressible. The buffer size was not ashift aligned. * b_tmp_cdata was replaced with a temporary data buffer which holds a padded * (aligned) copy of the data. Once writing is done, invoke * l2arc_release_cdata_buf on this hdr to free the temporary buffer. * *) The buffer was all-zeros, so there is no need to write it to an L2 * device. To indicate this situation b_tmp_cdata is NULL'ed, b_asize is * set to zero and b_compress is set to ZIO_COMPRESS_EMPTY. * *) Compression succeeded and b_tmp_cdata was replaced with a temporary * data buffer which holds the compressed data to be written, and b_asize * tells us how much data there is. b_compress is set to the appropriate * compression algorithm. Once writing is done, invoke * l2arc_release_cdata_buf on this l2hdr to free this temporary buffer. * * Returns B_TRUE if compression succeeded, or B_FALSE if it didn't (the * buffer was incompressible). */ static boolean_t l2arc_transform_buf(arc_buf_hdr_t *hdr, boolean_t compress) { void *cdata; size_t align, asize, csize, len, rounded; ASSERT(HDR_HAS_L2HDR(hdr)); l2arc_buf_hdr_t *l2hdr = &hdr->b_l2hdr; ASSERT(HDR_HAS_L1HDR(hdr)); ASSERT3S(l2hdr->b_compress, ==, ZIO_COMPRESS_OFF); ASSERT(hdr->b_l1hdr.b_tmp_cdata != NULL); len = l2hdr->b_asize; align = (size_t)1 << l2hdr->b_dev->l2ad_vdev->vdev_ashift; asize = P2ROUNDUP(len, align); cdata = zio_data_buf_alloc(asize); ASSERT3P(cdata, !=, NULL); if (compress) csize = zio_compress_data(ZIO_COMPRESS_LZ4, hdr->b_l1hdr.b_tmp_cdata, cdata, len); else csize = len; if (csize == 0) { /* zero block, indicate that there's nothing to write */ zio_data_buf_free(cdata, asize); l2hdr->b_compress = ZIO_COMPRESS_EMPTY; l2hdr->b_asize = 0; hdr->b_l1hdr.b_tmp_cdata = NULL; ARCSTAT_BUMP(arcstat_l2_compress_zeros); return (B_TRUE); } rounded = P2ROUNDUP(csize, align); ASSERT3U(rounded, <=, asize); if (rounded < len) { /* * Compression succeeded, we'll keep the cdata around for * writing and release it afterwards. */ if (rounded > csize) { bzero((char *)cdata + csize, rounded - csize); csize = rounded; } l2hdr->b_compress = ZIO_COMPRESS_LZ4; l2hdr->b_asize = csize; hdr->b_l1hdr.b_tmp_cdata = cdata; ARCSTAT_BUMP(arcstat_l2_compress_successes); return (B_TRUE); } else { /* * Compression did not save space. */ if (P2PHASE(len, align) != 0) { /* * Use compression buffer for a copy of data padded to * the proper size. Compression algorithm remains set * to ZIO_COMPRESS_OFF. */ ASSERT3U(len, <, asize); bcopy(hdr->b_l1hdr.b_tmp_cdata, cdata, len); bzero((char *)cdata + len, asize - len); l2hdr->b_asize = asize; hdr->b_l1hdr.b_tmp_cdata = cdata; ARCSTAT_BUMP(arcstat_l2_padding_needed); } else { ASSERT3U(len, ==, asize); /* * The original buffer is good as is, * release the compressed buffer. * l2hdr will be left unmodified except for b_tmp_cdata. */ zio_data_buf_free(cdata, asize); hdr->b_l1hdr.b_tmp_cdata = NULL; } if (compress) ARCSTAT_BUMP(arcstat_l2_compress_failures); return (B_FALSE); } } /* * Decompresses a zio read back from an l2arc device. On success, the * underlying zio's io_data buffer is overwritten by the uncompressed * version. On decompression error (corrupt compressed stream), the * zio->io_error value is set to signal an I/O error. * * Please note that the compressed data stream is not checksummed, so * if the underlying device is experiencing data corruption, we may feed * corrupt data to the decompressor, so the decompressor needs to be * able to handle this situation (LZ4 does). */ static void l2arc_decompress_zio(zio_t *zio, arc_buf_hdr_t *hdr, enum zio_compress c) { ASSERT(L2ARC_IS_VALID_COMPRESS(c)); if (zio->io_error != 0) { /* * An io error has occured, just restore the original io * size in preparation for a main pool read. */ zio->io_orig_size = zio->io_size = hdr->b_size; return; } if (c == ZIO_COMPRESS_EMPTY) { /* * An empty buffer results in a null zio, which means we * need to fill its io_data after we're done restoring the * buffer's contents. */ ASSERT(hdr->b_l1hdr.b_buf != NULL); bzero(hdr->b_l1hdr.b_buf->b_data, hdr->b_size); zio->io_data = zio->io_orig_data = hdr->b_l1hdr.b_buf->b_data; } else { ASSERT(zio->io_data != NULL); /* * We copy the compressed data from the start of the arc buffer * (the zio_read will have pulled in only what we need, the * rest is garbage which we will overwrite at decompression) * and then decompress back to the ARC data buffer. This way we * can minimize copying by simply decompressing back over the * original compressed data (rather than decompressing to an * aux buffer and then copying back the uncompressed buffer, * which is likely to be much larger). */ uint64_t csize; void *cdata; csize = zio->io_size; cdata = zio_data_buf_alloc(csize); bcopy(zio->io_data, cdata, csize); if (zio_decompress_data(c, cdata, zio->io_data, csize, hdr->b_size) != 0) zio->io_error = EIO; zio_data_buf_free(cdata, csize); } /* Restore the expected uncompressed IO size. */ zio->io_orig_size = zio->io_size = hdr->b_size; } /* * Releases the temporary b_tmp_cdata buffer in an l2arc header structure. * This buffer serves as a temporary holder of compressed or padded data while * the buffer entry is being written to an l2arc device. Once that is * done, we can dispose of it. */ static void l2arc_release_cdata_buf(arc_buf_hdr_t *hdr) { size_t align, asize, len; enum zio_compress comp = hdr->b_l2hdr.b_compress; ASSERT(HDR_HAS_L2HDR(hdr)); ASSERT(HDR_HAS_L1HDR(hdr)); ASSERT(comp == ZIO_COMPRESS_OFF || L2ARC_IS_VALID_COMPRESS(comp)); if (hdr->b_l1hdr.b_tmp_cdata != NULL) { ASSERT(comp != ZIO_COMPRESS_EMPTY); len = hdr->b_size; align = (size_t)1 << hdr->b_l2hdr.b_dev->l2ad_vdev->vdev_ashift; asize = P2ROUNDUP(len, align); zio_data_buf_free(hdr->b_l1hdr.b_tmp_cdata, asize); hdr->b_l1hdr.b_tmp_cdata = NULL; } else { ASSERT(comp == ZIO_COMPRESS_OFF || comp == ZIO_COMPRESS_EMPTY); } } /* * This thread feeds the L2ARC at regular intervals. This is the beating * heart of the L2ARC. */ static void l2arc_feed_thread(void *dummy __unused) { callb_cpr_t cpr; l2arc_dev_t *dev; spa_t *spa; uint64_t size, wrote; clock_t begin, next = ddi_get_lbolt(); boolean_t headroom_boost = B_FALSE; CALLB_CPR_INIT(&cpr, &l2arc_feed_thr_lock, callb_generic_cpr, FTAG); mutex_enter(&l2arc_feed_thr_lock); while (l2arc_thread_exit == 0) { CALLB_CPR_SAFE_BEGIN(&cpr); (void) cv_timedwait(&l2arc_feed_thr_cv, &l2arc_feed_thr_lock, next - ddi_get_lbolt()); CALLB_CPR_SAFE_END(&cpr, &l2arc_feed_thr_lock); next = ddi_get_lbolt() + hz; /* * Quick check for L2ARC devices. */ mutex_enter(&l2arc_dev_mtx); if (l2arc_ndev == 0) { mutex_exit(&l2arc_dev_mtx); continue; } mutex_exit(&l2arc_dev_mtx); begin = ddi_get_lbolt(); /* * This selects the next l2arc device to write to, and in * doing so the next spa to feed from: dev->l2ad_spa. This * will return NULL if there are now no l2arc devices or if * they are all faulted. * * If a device is returned, its spa's config lock is also * held to prevent device removal. l2arc_dev_get_next() * will grab and release l2arc_dev_mtx. */ if ((dev = l2arc_dev_get_next()) == NULL) continue; spa = dev->l2ad_spa; ASSERT(spa != NULL); /* * If the pool is read-only then force the feed thread to * sleep a little longer. */ if (!spa_writeable(spa)) { next = ddi_get_lbolt() + 5 * l2arc_feed_secs * hz; spa_config_exit(spa, SCL_L2ARC, dev); continue; } /* * Avoid contributing to memory pressure. */ if (arc_reclaim_needed()) { ARCSTAT_BUMP(arcstat_l2_abort_lowmem); spa_config_exit(spa, SCL_L2ARC, dev); continue; } ARCSTAT_BUMP(arcstat_l2_feeds); size = l2arc_write_size(); /* * Evict L2ARC buffers that will be overwritten. */ l2arc_evict(dev, size, B_FALSE); /* * Write ARC buffers. */ wrote = l2arc_write_buffers(spa, dev, size, &headroom_boost); /* * Calculate interval between writes. */ next = l2arc_write_interval(begin, size, wrote); spa_config_exit(spa, SCL_L2ARC, dev); } l2arc_thread_exit = 0; cv_broadcast(&l2arc_feed_thr_cv); CALLB_CPR_EXIT(&cpr); /* drops l2arc_feed_thr_lock */ thread_exit(); } boolean_t l2arc_vdev_present(vdev_t *vd) { l2arc_dev_t *dev; mutex_enter(&l2arc_dev_mtx); for (dev = list_head(l2arc_dev_list); dev != NULL; dev = list_next(l2arc_dev_list, dev)) { if (dev->l2ad_vdev == vd) break; } mutex_exit(&l2arc_dev_mtx); return (dev != NULL); } /* * Add a vdev for use by the L2ARC. By this point the spa has already * validated the vdev and opened it. */ void l2arc_add_vdev(spa_t *spa, vdev_t *vd) { l2arc_dev_t *adddev; ASSERT(!l2arc_vdev_present(vd)); vdev_ashift_optimize(vd); /* * Create a new l2arc device entry. */ adddev = kmem_zalloc(sizeof (l2arc_dev_t), KM_SLEEP); adddev->l2ad_spa = spa; adddev->l2ad_vdev = vd; adddev->l2ad_start = VDEV_LABEL_START_SIZE; adddev->l2ad_end = VDEV_LABEL_START_SIZE + vdev_get_min_asize(vd); adddev->l2ad_hand = adddev->l2ad_start; adddev->l2ad_first = B_TRUE; adddev->l2ad_writing = B_FALSE; mutex_init(&adddev->l2ad_mtx, NULL, MUTEX_DEFAULT, NULL); /* * This is a list of all ARC buffers that are still valid on the * device. */ list_create(&adddev->l2ad_buflist, sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_l2hdr.b_l2node)); vdev_space_update(vd, 0, 0, adddev->l2ad_end - adddev->l2ad_hand); refcount_create(&adddev->l2ad_alloc); /* * Add device to global list */ mutex_enter(&l2arc_dev_mtx); list_insert_head(l2arc_dev_list, adddev); atomic_inc_64(&l2arc_ndev); mutex_exit(&l2arc_dev_mtx); } /* * Remove a vdev from the L2ARC. */ void l2arc_remove_vdev(vdev_t *vd) { l2arc_dev_t *dev, *nextdev, *remdev = NULL; /* * Find the device by vdev */ mutex_enter(&l2arc_dev_mtx); for (dev = list_head(l2arc_dev_list); dev; dev = nextdev) { nextdev = list_next(l2arc_dev_list, dev); if (vd == dev->l2ad_vdev) { remdev = dev; break; } } ASSERT(remdev != NULL); /* * Remove device from global list */ list_remove(l2arc_dev_list, remdev); l2arc_dev_last = NULL; /* may have been invalidated */ atomic_dec_64(&l2arc_ndev); mutex_exit(&l2arc_dev_mtx); /* * Clear all buflists and ARC references. L2ARC device flush. */ l2arc_evict(remdev, 0, B_TRUE); list_destroy(&remdev->l2ad_buflist); mutex_destroy(&remdev->l2ad_mtx); refcount_destroy(&remdev->l2ad_alloc); kmem_free(remdev, sizeof (l2arc_dev_t)); } void l2arc_init(void) { l2arc_thread_exit = 0; l2arc_ndev = 0; l2arc_writes_sent = 0; l2arc_writes_done = 0; mutex_init(&l2arc_feed_thr_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&l2arc_feed_thr_cv, NULL, CV_DEFAULT, NULL); mutex_init(&l2arc_dev_mtx, NULL, MUTEX_DEFAULT, NULL); mutex_init(&l2arc_free_on_write_mtx, NULL, MUTEX_DEFAULT, NULL); l2arc_dev_list = &L2ARC_dev_list; l2arc_free_on_write = &L2ARC_free_on_write; list_create(l2arc_dev_list, sizeof (l2arc_dev_t), offsetof(l2arc_dev_t, l2ad_node)); list_create(l2arc_free_on_write, sizeof (l2arc_data_free_t), offsetof(l2arc_data_free_t, l2df_list_node)); } void l2arc_fini(void) { /* * This is called from dmu_fini(), which is called from spa_fini(); * Because of this, we can assume that all l2arc devices have * already been removed when the pools themselves were removed. */ l2arc_do_free_on_write(); mutex_destroy(&l2arc_feed_thr_lock); cv_destroy(&l2arc_feed_thr_cv); mutex_destroy(&l2arc_dev_mtx); mutex_destroy(&l2arc_free_on_write_mtx); list_destroy(l2arc_dev_list); list_destroy(l2arc_free_on_write); } void l2arc_start(void) { if (!(spa_mode_global & FWRITE)) return; (void) thread_create(NULL, 0, l2arc_feed_thread, NULL, 0, &p0, TS_RUN, minclsyspri); } void l2arc_stop(void) { if (!(spa_mode_global & FWRITE)) return; mutex_enter(&l2arc_feed_thr_lock); cv_signal(&l2arc_feed_thr_cv); /* kick thread out of startup */ l2arc_thread_exit = 1; while (l2arc_thread_exit != 0) cv_wait(&l2arc_feed_thr_cv, &l2arc_feed_thr_lock); mutex_exit(&l2arc_feed_thr_lock); } Index: projects/vnet/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c =================================================================== --- projects/vnet/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c (revision 302276) +++ projects/vnet/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c (revision 302277) @@ -1,1172 +1,1172 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, 2014 by Delphix. All rights reserved. * Copyright (c) 2013 Steven Hartland. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright (c) 2014 Integros [integros.com] */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__FreeBSD__) && defined(_KERNEL) -#include #include +#include #endif /* * ZFS Write Throttle * ------------------ * * ZFS must limit the rate of incoming writes to the rate at which it is able * to sync data modifications to the backend storage. Throttling by too much * creates an artificial limit; throttling by too little can only be sustained * for short periods and would lead to highly lumpy performance. On a per-pool * basis, ZFS tracks the amount of modified (dirty) data. As operations change * data, the amount of dirty data increases; as ZFS syncs out data, the amount * of dirty data decreases. When the amount of dirty data exceeds a * predetermined threshold further modifications are blocked until the amount * of dirty data decreases (as data is synced out). * * The limit on dirty data is tunable, and should be adjusted according to * both the IO capacity and available memory of the system. The larger the * window, the more ZFS is able to aggregate and amortize metadata (and data) * changes. However, memory is a limited resource, and allowing for more dirty * data comes at the cost of keeping other useful data in memory (for example * ZFS data cached by the ARC). * * Implementation * * As buffers are modified dsl_pool_willuse_space() increments both the per- * txg (dp_dirty_pertxg[]) and poolwide (dp_dirty_total) accounting of * dirty space used; dsl_pool_dirty_space() decrements those values as data * is synced out from dsl_pool_sync(). While only the poolwide value is * relevant, the per-txg value is useful for debugging. The tunable * zfs_dirty_data_max determines the dirty space limit. Once that value is * exceeded, new writes are halted until space frees up. * * The zfs_dirty_data_sync tunable dictates the threshold at which we * ensure that there is a txg syncing (see the comment in txg.c for a full * description of transaction group stages). * * The IO scheduler uses both the dirty space limit and current amount of * dirty data as inputs. Those values affect the number of concurrent IOs ZFS * issues. See the comment in vdev_queue.c for details of the IO scheduler. * * The delay is also calculated based on the amount of dirty data. See the * comment above dmu_tx_delay() for details. */ /* * zfs_dirty_data_max will be set to zfs_dirty_data_max_percent% of all memory, * capped at zfs_dirty_data_max_max. It can also be overridden in /etc/system. */ uint64_t zfs_dirty_data_max; uint64_t zfs_dirty_data_max_max = 4ULL * 1024 * 1024 * 1024; int zfs_dirty_data_max_percent = 10; /* * If there is at least this much dirty data, push out a txg. */ uint64_t zfs_dirty_data_sync = 64 * 1024 * 1024; /* * Once there is this amount of dirty data, the dmu_tx_delay() will kick in * and delay each transaction. * This value should be >= zfs_vdev_async_write_active_max_dirty_percent. */ int zfs_delay_min_dirty_percent = 60; /* * This controls how quickly the delay approaches infinity. * Larger values cause it to delay more for a given amount of dirty data. * Therefore larger values will cause there to be less dirty data for a * given throughput. * * For the smoothest delay, this value should be about 1 billion divided * by the maximum number of operations per second. This will smoothly * handle between 10x and 1/10th this number. * * Note: zfs_delay_scale * zfs_dirty_data_max must be < 2^64, due to the * multiply in dmu_tx_delay(). */ uint64_t zfs_delay_scale = 1000 * 1000 * 1000 / 2000; #if defined(__FreeBSD__) && defined(_KERNEL) extern int zfs_vdev_async_write_active_max_dirty_percent; SYSCTL_DECL(_vfs_zfs); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, dirty_data_max, CTLFLAG_RWTUN, &zfs_dirty_data_max, 0, "The maximum amount of dirty data in bytes after which new writes are " "halted until space becomes available"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, dirty_data_max_max, CTLFLAG_RDTUN, &zfs_dirty_data_max_max, 0, "The absolute cap on dirty_data_max when auto calculating"); static int sysctl_zfs_dirty_data_max_percent(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_vfs_zfs, OID_AUTO, dirty_data_max_percent, CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RWTUN, 0, sizeof(int), sysctl_zfs_dirty_data_max_percent, "I", "The percent of physical memory used to auto calculate dirty_data_max"); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, dirty_data_sync, CTLFLAG_RWTUN, &zfs_dirty_data_sync, 0, "Force a txg if the number of dirty buffer bytes exceed this value"); static int sysctl_zfs_delay_min_dirty_percent(SYSCTL_HANDLER_ARGS); /* No zfs_delay_min_dirty_percent tunable due to limit requirements */ SYSCTL_PROC(_vfs_zfs, OID_AUTO, delay_min_dirty_percent, CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(int), sysctl_zfs_delay_min_dirty_percent, "I", "The limit of outstanding dirty data before transations are delayed"); static int sysctl_zfs_delay_scale(SYSCTL_HANDLER_ARGS); /* No zfs_delay_scale tunable due to limit requirements */ SYSCTL_PROC(_vfs_zfs, OID_AUTO, delay_scale, CTLTYPE_U64 | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(uint64_t), sysctl_zfs_delay_scale, "QU", "Controls how quickly the delay approaches infinity"); static int sysctl_zfs_dirty_data_max_percent(SYSCTL_HANDLER_ARGS) { int val, err; val = zfs_dirty_data_max_percent; err = sysctl_handle_int(oidp, &val, 0, req); if (err != 0 || req->newptr == NULL) return (err); if (val < 0 || val > 100) return (EINVAL); zfs_dirty_data_max_percent = val; return (0); } static int sysctl_zfs_delay_min_dirty_percent(SYSCTL_HANDLER_ARGS) { int val, err; val = zfs_delay_min_dirty_percent; err = sysctl_handle_int(oidp, &val, 0, req); if (err != 0 || req->newptr == NULL) return (err); if (val < zfs_vdev_async_write_active_max_dirty_percent) return (EINVAL); zfs_delay_min_dirty_percent = val; return (0); } static int sysctl_zfs_delay_scale(SYSCTL_HANDLER_ARGS) { uint64_t val; int err; val = zfs_delay_scale; err = sysctl_handle_64(oidp, &val, 0, req); if (err != 0 || req->newptr == NULL) return (err); if (val > UINT64_MAX / zfs_dirty_data_max) return (EINVAL); zfs_delay_scale = val; return (0); } #endif hrtime_t zfs_throttle_delay = MSEC2NSEC(10); hrtime_t zfs_throttle_resolution = MSEC2NSEC(10); int dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp) { uint64_t obj; int err; err = zap_lookup(dp->dp_meta_objset, dsl_dir_phys(dp->dp_root_dir)->dd_child_dir_zapobj, name, sizeof (obj), 1, &obj); if (err) return (err); return (dsl_dir_hold_obj(dp, obj, name, dp, ddp)); } static dsl_pool_t * dsl_pool_open_impl(spa_t *spa, uint64_t txg) { dsl_pool_t *dp; blkptr_t *bp = spa_get_rootblkptr(spa); dp = kmem_zalloc(sizeof (dsl_pool_t), KM_SLEEP); dp->dp_spa = spa; dp->dp_meta_rootbp = *bp; rrw_init(&dp->dp_config_rwlock, B_TRUE); txg_init(dp, txg); txg_list_create(&dp->dp_dirty_datasets, offsetof(dsl_dataset_t, ds_dirty_link)); txg_list_create(&dp->dp_dirty_zilogs, offsetof(zilog_t, zl_dirty_link)); txg_list_create(&dp->dp_dirty_dirs, offsetof(dsl_dir_t, dd_dirty_link)); txg_list_create(&dp->dp_sync_tasks, offsetof(dsl_sync_task_t, dst_node)); mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&dp->dp_spaceavail_cv, NULL, CV_DEFAULT, NULL); dp->dp_vnrele_taskq = taskq_create("zfs_vn_rele_taskq", 1, minclsyspri, 1, 4, 0); return (dp); } int dsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp) { int err; dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp, &dp->dp_meta_objset); if (err != 0) dsl_pool_close(dp); else *dpp = dp; return (err); } int dsl_pool_open(dsl_pool_t *dp) { int err; dsl_dir_t *dd; dsl_dataset_t *ds; uint64_t obj; rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &dp->dp_root_dir_obj); if (err) goto out; err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, dp, &dp->dp_root_dir); if (err) goto out; err = dsl_pool_open_special_dir(dp, MOS_DIR_NAME, &dp->dp_mos_dir); if (err) goto out; if (spa_version(dp->dp_spa) >= SPA_VERSION_ORIGIN) { err = dsl_pool_open_special_dir(dp, ORIGIN_DIR_NAME, &dd); if (err) goto out; err = dsl_dataset_hold_obj(dp, dsl_dir_phys(dd)->dd_head_dataset_obj, FTAG, &ds); if (err == 0) { err = dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj, dp, &dp->dp_origin_snap); dsl_dataset_rele(ds, FTAG); } dsl_dir_rele(dd, dp); if (err) goto out; } if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) { err = dsl_pool_open_special_dir(dp, FREE_DIR_NAME, &dp->dp_free_dir); if (err) goto out; err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj); if (err) goto out; VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj)); } /* * Note: errors ignored, because the leak dir will not exist if we * have not encountered a leak yet. */ (void) dsl_pool_open_special_dir(dp, LEAK_DIR_NAME, &dp->dp_leak_dir); if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY)) { err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1, &dp->dp_bptree_obj); if (err != 0) goto out; } if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_EMPTY_BPOBJ)) { err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_EMPTY_BPOBJ, sizeof (uint64_t), 1, &dp->dp_empty_bpobj); if (err != 0) goto out; } err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS, sizeof (uint64_t), 1, &dp->dp_tmp_userrefs_obj); if (err == ENOENT) err = 0; if (err) goto out; err = dsl_scan_init(dp, dp->dp_tx.tx_open_txg); out: rrw_exit(&dp->dp_config_rwlock, FTAG); return (err); } void dsl_pool_close(dsl_pool_t *dp) { /* * Drop our references from dsl_pool_open(). * * Since we held the origin_snap from "syncing" context (which * includes pool-opening context), it actually only got a "ref" * and not a hold, so just drop that here. */ if (dp->dp_origin_snap) dsl_dataset_rele(dp->dp_origin_snap, dp); if (dp->dp_mos_dir) dsl_dir_rele(dp->dp_mos_dir, dp); if (dp->dp_free_dir) dsl_dir_rele(dp->dp_free_dir, dp); if (dp->dp_leak_dir) dsl_dir_rele(dp->dp_leak_dir, dp); if (dp->dp_root_dir) dsl_dir_rele(dp->dp_root_dir, dp); bpobj_close(&dp->dp_free_bpobj); /* undo the dmu_objset_open_impl(mos) from dsl_pool_open() */ if (dp->dp_meta_objset) dmu_objset_evict(dp->dp_meta_objset); txg_list_destroy(&dp->dp_dirty_datasets); txg_list_destroy(&dp->dp_dirty_zilogs); txg_list_destroy(&dp->dp_sync_tasks); txg_list_destroy(&dp->dp_dirty_dirs); /* * We can't set retry to TRUE since we're explicitly specifying * a spa to flush. This is good enough; any missed buffers for * this spa won't cause trouble, and they'll eventually fall * out of the ARC just like any other unused buffer. */ arc_flush(dp->dp_spa, FALSE); txg_fini(dp); dsl_scan_fini(dp); dmu_buf_user_evict_wait(); rrw_destroy(&dp->dp_config_rwlock); mutex_destroy(&dp->dp_lock); taskq_destroy(dp->dp_vnrele_taskq); if (dp->dp_blkstats) kmem_free(dp->dp_blkstats, sizeof (zfs_all_blkstats_t)); kmem_free(dp, sizeof (dsl_pool_t)); } dsl_pool_t * dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) { int err; dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg); objset_t *os; dsl_dataset_t *ds; uint64_t obj; rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); /* create and open the MOS (meta-objset) */ dp->dp_meta_objset = dmu_objset_create_impl(spa, NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx); /* create the pool directory */ err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_OT_OBJECT_DIRECTORY, DMU_OT_NONE, 0, tx); ASSERT0(err); /* Initialize scan structures */ VERIFY0(dsl_scan_init(dp, txg)); /* create and open the root dir */ dp->dp_root_dir_obj = dsl_dir_create_sync(dp, NULL, NULL, tx); VERIFY0(dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, dp, &dp->dp_root_dir)); /* create and open the meta-objset dir */ (void) dsl_dir_create_sync(dp, dp->dp_root_dir, MOS_DIR_NAME, tx); VERIFY0(dsl_pool_open_special_dir(dp, MOS_DIR_NAME, &dp->dp_mos_dir)); if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { /* create and open the free dir */ (void) dsl_dir_create_sync(dp, dp->dp_root_dir, FREE_DIR_NAME, tx); VERIFY0(dsl_pool_open_special_dir(dp, FREE_DIR_NAME, &dp->dp_free_dir)); /* create and open the free_bplist */ obj = bpobj_alloc(dp->dp_meta_objset, SPA_OLD_MAXBLOCKSIZE, tx); VERIFY(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx) == 0); VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj)); } if (spa_version(spa) >= SPA_VERSION_DSL_SCRUB) dsl_pool_create_origin(dp, tx); /* create the root dataset */ obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx); /* create the root objset */ VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds)); os = dmu_objset_create_impl(dp->dp_spa, ds, dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx); #ifdef _KERNEL zfs_create_fs(os, kcred, zplprops, tx); #endif dsl_dataset_rele(ds, FTAG); dmu_tx_commit(tx); rrw_exit(&dp->dp_config_rwlock, FTAG); return (dp); } /* * Account for the meta-objset space in its placeholder dsl_dir. */ void dsl_pool_mos_diduse_space(dsl_pool_t *dp, int64_t used, int64_t comp, int64_t uncomp) { ASSERT3U(comp, ==, uncomp); /* it's all metadata */ mutex_enter(&dp->dp_lock); dp->dp_mos_used_delta += used; dp->dp_mos_compressed_delta += comp; dp->dp_mos_uncompressed_delta += uncomp; mutex_exit(&dp->dp_lock); } static int deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) { dsl_deadlist_t *dl = arg; dsl_deadlist_insert(dl, bp, tx); return (0); } static void dsl_pool_sync_mos(dsl_pool_t *dp, dmu_tx_t *tx) { zio_t *zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); dmu_objset_sync(dp->dp_meta_objset, zio, tx); VERIFY0(zio_wait(zio)); dprintf_bp(&dp->dp_meta_rootbp, "meta objset rootbp is %s", ""); spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp); } static void dsl_pool_dirty_delta(dsl_pool_t *dp, int64_t delta) { ASSERT(MUTEX_HELD(&dp->dp_lock)); if (delta < 0) ASSERT3U(-delta, <=, dp->dp_dirty_total); dp->dp_dirty_total += delta; /* * Note: we signal even when increasing dp_dirty_total. * This ensures forward progress -- each thread wakes the next waiter. */ if (dp->dp_dirty_total <= zfs_dirty_data_max) cv_signal(&dp->dp_spaceavail_cv); } void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg) { zio_t *zio; dmu_tx_t *tx; dsl_dir_t *dd; dsl_dataset_t *ds; objset_t *mos = dp->dp_meta_objset; list_t synced_datasets; list_create(&synced_datasets, sizeof (dsl_dataset_t), offsetof(dsl_dataset_t, ds_synced_link)); tx = dmu_tx_create_assigned(dp, txg); /* * Write out all dirty blocks of dirty datasets. */ zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); while ((ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) != NULL) { /* * We must not sync any non-MOS datasets twice, because * we may have taken a snapshot of them. However, we * may sync newly-created datasets on pass 2. */ ASSERT(!list_link_active(&ds->ds_synced_link)); list_insert_tail(&synced_datasets, ds); dsl_dataset_sync(ds, zio, tx); } VERIFY0(zio_wait(zio)); /* * We have written all of the accounted dirty data, so our * dp_space_towrite should now be zero. However, some seldom-used * code paths do not adhere to this (e.g. dbuf_undirty(), also * rounding error in dbuf_write_physdone). * Shore up the accounting of any dirtied space now. */ dsl_pool_undirty_space(dp, dp->dp_dirty_pertxg[txg & TXG_MASK], txg); /* * After the data blocks have been written (ensured by the zio_wait() * above), update the user/group space accounting. */ for (ds = list_head(&synced_datasets); ds != NULL; ds = list_next(&synced_datasets, ds)) { dmu_objset_do_userquota_updates(ds->ds_objset, tx); } /* * Sync the datasets again to push out the changes due to * userspace updates. This must be done before we process the * sync tasks, so that any snapshots will have the correct * user accounting information (and we won't get confused * about which blocks are part of the snapshot). */ zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); while ((ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) != NULL) { ASSERT(list_link_active(&ds->ds_synced_link)); dmu_buf_rele(ds->ds_dbuf, ds); dsl_dataset_sync(ds, zio, tx); } VERIFY0(zio_wait(zio)); /* * Now that the datasets have been completely synced, we can * clean up our in-memory structures accumulated while syncing: * * - move dead blocks from the pending deadlist to the on-disk deadlist * - release hold from dsl_dataset_dirty() */ while ((ds = list_remove_head(&synced_datasets)) != NULL) { objset_t *os = ds->ds_objset; bplist_iterate(&ds->ds_pending_deadlist, deadlist_enqueue_cb, &ds->ds_deadlist, tx); ASSERT(!dmu_objset_is_dirty(os, txg)); dmu_buf_rele(ds->ds_dbuf, ds); } while ((dd = txg_list_remove(&dp->dp_dirty_dirs, txg)) != NULL) { dsl_dir_sync(dd, tx); } /* * The MOS's space is accounted for in the pool/$MOS * (dp_mos_dir). We can't modify the mos while we're syncing * it, so we remember the deltas and apply them here. */ if (dp->dp_mos_used_delta != 0 || dp->dp_mos_compressed_delta != 0 || dp->dp_mos_uncompressed_delta != 0) { dsl_dir_diduse_space(dp->dp_mos_dir, DD_USED_HEAD, dp->dp_mos_used_delta, dp->dp_mos_compressed_delta, dp->dp_mos_uncompressed_delta, tx); dp->dp_mos_used_delta = 0; dp->dp_mos_compressed_delta = 0; dp->dp_mos_uncompressed_delta = 0; } if (list_head(&mos->os_dirty_dnodes[txg & TXG_MASK]) != NULL || list_head(&mos->os_free_dnodes[txg & TXG_MASK]) != NULL) { dsl_pool_sync_mos(dp, tx); } /* * If we modify a dataset in the same txg that we want to destroy it, * its dsl_dir's dd_dbuf will be dirty, and thus have a hold on it. * dsl_dir_destroy_check() will fail if there are unexpected holds. * Therefore, we want to sync the MOS (thus syncing the dd_dbuf * and clearing the hold on it) before we process the sync_tasks. * The MOS data dirtied by the sync_tasks will be synced on the next * pass. */ if (!txg_list_empty(&dp->dp_sync_tasks, txg)) { dsl_sync_task_t *dst; /* * No more sync tasks should have been added while we * were syncing. */ ASSERT3U(spa_sync_pass(dp->dp_spa), ==, 1); while ((dst = txg_list_remove(&dp->dp_sync_tasks, txg)) != NULL) dsl_sync_task_sync(dst, tx); } dmu_tx_commit(tx); DTRACE_PROBE2(dsl_pool_sync__done, dsl_pool_t *dp, dp, uint64_t, txg); } void dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg) { zilog_t *zilog; while (zilog = txg_list_remove(&dp->dp_dirty_zilogs, txg)) { dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os); zil_clean(zilog, txg); ASSERT(!dmu_objset_is_dirty(zilog->zl_os, txg)); dmu_buf_rele(ds->ds_dbuf, zilog); } ASSERT(!dmu_objset_is_dirty(dp->dp_meta_objset, txg)); } /* * TRUE if the current thread is the tx_sync_thread or if we * are being called from SPA context during pool initialization. */ int dsl_pool_sync_context(dsl_pool_t *dp) { return (curthread == dp->dp_tx.tx_sync_thread || spa_is_initializing(dp->dp_spa)); } uint64_t dsl_pool_adjustedsize(dsl_pool_t *dp, boolean_t netfree) { uint64_t space, resv; /* * If we're trying to assess whether it's OK to do a free, * cut the reservation in half to allow forward progress * (e.g. make it possible to rm(1) files from a full pool). */ space = spa_get_dspace(dp->dp_spa); resv = spa_get_slop_space(dp->dp_spa); if (netfree) resv >>= 1; return (space - resv); } boolean_t dsl_pool_need_dirty_delay(dsl_pool_t *dp) { uint64_t delay_min_bytes = zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100; boolean_t rv; mutex_enter(&dp->dp_lock); if (dp->dp_dirty_total > zfs_dirty_data_sync) txg_kick(dp); rv = (dp->dp_dirty_total > delay_min_bytes); mutex_exit(&dp->dp_lock); return (rv); } void dsl_pool_dirty_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx) { if (space > 0) { mutex_enter(&dp->dp_lock); dp->dp_dirty_pertxg[tx->tx_txg & TXG_MASK] += space; dsl_pool_dirty_delta(dp, space); mutex_exit(&dp->dp_lock); } } void dsl_pool_undirty_space(dsl_pool_t *dp, int64_t space, uint64_t txg) { ASSERT3S(space, >=, 0); if (space == 0) return; mutex_enter(&dp->dp_lock); if (dp->dp_dirty_pertxg[txg & TXG_MASK] < space) { /* XXX writing something we didn't dirty? */ space = dp->dp_dirty_pertxg[txg & TXG_MASK]; } ASSERT3U(dp->dp_dirty_pertxg[txg & TXG_MASK], >=, space); dp->dp_dirty_pertxg[txg & TXG_MASK] -= space; ASSERT3U(dp->dp_dirty_total, >=, space); dsl_pool_dirty_delta(dp, -space); mutex_exit(&dp->dp_lock); } /* ARGSUSED */ static int upgrade_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg) { dmu_tx_t *tx = arg; dsl_dataset_t *ds, *prev = NULL; int err; err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds); if (err) return (err); while (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { err = dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev); if (err) { dsl_dataset_rele(ds, FTAG); return (err); } if (dsl_dataset_phys(prev)->ds_next_snap_obj != ds->ds_object) break; dsl_dataset_rele(ds, FTAG); ds = prev; prev = NULL; } if (prev == NULL) { prev = dp->dp_origin_snap; /* * The $ORIGIN can't have any data, or the accounting * will be wrong. */ ASSERT0(dsl_dataset_phys(prev)->ds_bp.blk_birth); /* The origin doesn't get attached to itself */ if (ds->ds_object == prev->ds_object) { dsl_dataset_rele(ds, FTAG); return (0); } dmu_buf_will_dirty(ds->ds_dbuf, tx); dsl_dataset_phys(ds)->ds_prev_snap_obj = prev->ds_object; dsl_dataset_phys(ds)->ds_prev_snap_txg = dsl_dataset_phys(prev)->ds_creation_txg; dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); dsl_dir_phys(ds->ds_dir)->dd_origin_obj = prev->ds_object; dmu_buf_will_dirty(prev->ds_dbuf, tx); dsl_dataset_phys(prev)->ds_num_children++; if (dsl_dataset_phys(ds)->ds_next_snap_obj == 0) { ASSERT(ds->ds_prev == NULL); VERIFY0(dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj, ds, &ds->ds_prev)); } } ASSERT3U(dsl_dir_phys(ds->ds_dir)->dd_origin_obj, ==, prev->ds_object); ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_obj, ==, prev->ds_object); if (dsl_dataset_phys(prev)->ds_next_clones_obj == 0) { dmu_buf_will_dirty(prev->ds_dbuf, tx); dsl_dataset_phys(prev)->ds_next_clones_obj = zap_create(dp->dp_meta_objset, DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); } VERIFY0(zap_add_int(dp->dp_meta_objset, dsl_dataset_phys(prev)->ds_next_clones_obj, ds->ds_object, tx)); dsl_dataset_rele(ds, FTAG); if (prev != dp->dp_origin_snap) dsl_dataset_rele(prev, FTAG); return (0); } void dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx) { ASSERT(dmu_tx_is_syncing(tx)); ASSERT(dp->dp_origin_snap != NULL); VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj, upgrade_clones_cb, tx, DS_FIND_CHILDREN | DS_FIND_SERIALIZE)); } /* ARGSUSED */ static int upgrade_dir_clones_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg) { dmu_tx_t *tx = arg; objset_t *mos = dp->dp_meta_objset; if (dsl_dir_phys(ds->ds_dir)->dd_origin_obj != 0) { dsl_dataset_t *origin; VERIFY0(dsl_dataset_hold_obj(dp, dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &origin)); if (dsl_dir_phys(origin->ds_dir)->dd_clones == 0) { dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx); dsl_dir_phys(origin->ds_dir)->dd_clones = zap_create(mos, DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); } VERIFY0(zap_add_int(dp->dp_meta_objset, dsl_dir_phys(origin->ds_dir)->dd_clones, ds->ds_object, tx)); dsl_dataset_rele(origin, FTAG); } return (0); } void dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx) { ASSERT(dmu_tx_is_syncing(tx)); uint64_t obj; (void) dsl_dir_create_sync(dp, dp->dp_root_dir, FREE_DIR_NAME, tx); VERIFY0(dsl_pool_open_special_dir(dp, FREE_DIR_NAME, &dp->dp_free_dir)); /* * We can't use bpobj_alloc(), because spa_version() still * returns the old version, and we need a new-version bpobj with * subobj support. So call dmu_object_alloc() directly. */ obj = dmu_object_alloc(dp->dp_meta_objset, DMU_OT_BPOBJ, SPA_OLD_MAXBLOCKSIZE, DMU_OT_BPOBJ_HDR, sizeof (bpobj_phys_t), tx); VERIFY0(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx)); VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj)); VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj, upgrade_dir_clones_cb, tx, DS_FIND_CHILDREN | DS_FIND_SERIALIZE)); } void dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx) { uint64_t dsobj; dsl_dataset_t *ds; ASSERT(dmu_tx_is_syncing(tx)); ASSERT(dp->dp_origin_snap == NULL); ASSERT(rrw_held(&dp->dp_config_rwlock, RW_WRITER)); /* create the origin dir, ds, & snap-ds */ dsobj = dsl_dataset_create_sync(dp->dp_root_dir, ORIGIN_DIR_NAME, NULL, 0, kcred, tx); VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); dsl_dataset_snapshot_sync_impl(ds, ORIGIN_DIR_NAME, tx); VERIFY0(dsl_dataset_hold_obj(dp, dsl_dataset_phys(ds)->ds_prev_snap_obj, dp, &dp->dp_origin_snap)); dsl_dataset_rele(ds, FTAG); } taskq_t * dsl_pool_vnrele_taskq(dsl_pool_t *dp) { return (dp->dp_vnrele_taskq); } /* * Walk through the pool-wide zap object of temporary snapshot user holds * and release them. */ void dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp) { zap_attribute_t za; zap_cursor_t zc; objset_t *mos = dp->dp_meta_objset; uint64_t zapobj = dp->dp_tmp_userrefs_obj; nvlist_t *holds; if (zapobj == 0) return; ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); holds = fnvlist_alloc(); for (zap_cursor_init(&zc, mos, zapobj); zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) { char *htag; nvlist_t *tags; htag = strchr(za.za_name, '-'); *htag = '\0'; ++htag; if (nvlist_lookup_nvlist(holds, za.za_name, &tags) != 0) { tags = fnvlist_alloc(); fnvlist_add_boolean(tags, htag); fnvlist_add_nvlist(holds, za.za_name, tags); fnvlist_free(tags); } else { fnvlist_add_boolean(tags, htag); } } dsl_dataset_user_release_tmp(dp, holds); fnvlist_free(holds); zap_cursor_fini(&zc); } /* * Create the pool-wide zap object for storing temporary snapshot holds. */ void dsl_pool_user_hold_create_obj(dsl_pool_t *dp, dmu_tx_t *tx) { objset_t *mos = dp->dp_meta_objset; ASSERT(dp->dp_tmp_userrefs_obj == 0); ASSERT(dmu_tx_is_syncing(tx)); dp->dp_tmp_userrefs_obj = zap_create_link(mos, DMU_OT_USERREFS, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS, tx); } static int dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj, const char *tag, uint64_t now, dmu_tx_t *tx, boolean_t holding) { objset_t *mos = dp->dp_meta_objset; uint64_t zapobj = dp->dp_tmp_userrefs_obj; char *name; int error; ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); ASSERT(dmu_tx_is_syncing(tx)); /* * If the pool was created prior to SPA_VERSION_USERREFS, the * zap object for temporary holds might not exist yet. */ if (zapobj == 0) { if (holding) { dsl_pool_user_hold_create_obj(dp, tx); zapobj = dp->dp_tmp_userrefs_obj; } else { return (SET_ERROR(ENOENT)); } } name = kmem_asprintf("%llx-%s", (u_longlong_t)dsobj, tag); if (holding) error = zap_add(mos, zapobj, name, 8, 1, &now, tx); else error = zap_remove(mos, zapobj, name, tx); strfree(name); return (error); } /* * Add a temporary hold for the given dataset object and tag. */ int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj, const char *tag, uint64_t now, dmu_tx_t *tx) { return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, now, tx, B_TRUE)); } /* * Release a temporary hold for the given dataset object and tag. */ int dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj, const char *tag, dmu_tx_t *tx) { return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, 0, tx, B_FALSE)); } /* * DSL Pool Configuration Lock * * The dp_config_rwlock protects against changes to DSL state (e.g. dataset * creation / destruction / rename / property setting). It must be held for * read to hold a dataset or dsl_dir. I.e. you must call * dsl_pool_config_enter() or dsl_pool_hold() before calling * dsl_{dataset,dir}_hold{_obj}. In most circumstances, the dp_config_rwlock * must be held continuously until all datasets and dsl_dirs are released. * * The only exception to this rule is that if a "long hold" is placed on * a dataset, then the dp_config_rwlock may be dropped while the dataset * is still held. The long hold will prevent the dataset from being * destroyed -- the destroy will fail with EBUSY. A long hold can be * obtained by calling dsl_dataset_long_hold(), or by "owning" a dataset * (by calling dsl_{dataset,objset}_{try}own{_obj}). * * Legitimate long-holders (including owners) should be long-running, cancelable * tasks that should cause "zfs destroy" to fail. This includes DMU * consumers (i.e. a ZPL filesystem being mounted or ZVOL being open), * "zfs send", and "zfs diff". There are several other long-holders whose * uses are suboptimal (e.g. "zfs promote", and zil_suspend()). * * The usual formula for long-holding would be: * dsl_pool_hold() * dsl_dataset_hold() * ... perform checks ... * dsl_dataset_long_hold() * dsl_pool_rele() * ... perform long-running task ... * dsl_dataset_long_rele() * dsl_dataset_rele() * * Note that when the long hold is released, the dataset is still held but * the pool is not held. The dataset may change arbitrarily during this time * (e.g. it could be destroyed). Therefore you shouldn't do anything to the * dataset except release it. * * User-initiated operations (e.g. ioctls, zfs_ioc_*()) are either read-only * or modifying operations. * * Modifying operations should generally use dsl_sync_task(). The synctask * infrastructure enforces proper locking strategy with respect to the * dp_config_rwlock. See the comment above dsl_sync_task() for details. * * Read-only operations will manually hold the pool, then the dataset, obtain * information from the dataset, then release the pool and dataset. * dmu_objset_{hold,rele}() are convenience routines that also do the pool * hold/rele. */ int dsl_pool_hold(const char *name, void *tag, dsl_pool_t **dp) { spa_t *spa; int error; error = spa_open(name, &spa, tag); if (error == 0) { *dp = spa_get_dsl(spa); dsl_pool_config_enter(*dp, tag); } return (error); } void dsl_pool_rele(dsl_pool_t *dp, void *tag) { dsl_pool_config_exit(dp, tag); spa_close(dp->dp_spa, tag); } void dsl_pool_config_enter(dsl_pool_t *dp, void *tag) { /* * We use a "reentrant" reader-writer lock, but not reentrantly. * * The rrwlock can (with the track_all flag) track all reading threads, * which is very useful for debugging which code path failed to release * the lock, and for verifying that the *current* thread does hold * the lock. * * (Unlike a rwlock, which knows that N threads hold it for * read, but not *which* threads, so rw_held(RW_READER) returns TRUE * if any thread holds it for read, even if this thread doesn't). */ ASSERT(!rrw_held(&dp->dp_config_rwlock, RW_READER)); rrw_enter(&dp->dp_config_rwlock, RW_READER, tag); } void dsl_pool_config_enter_prio(dsl_pool_t *dp, void *tag) { ASSERT(!rrw_held(&dp->dp_config_rwlock, RW_READER)); rrw_enter_read_prio(&dp->dp_config_rwlock, tag); } void dsl_pool_config_exit(dsl_pool_t *dp, void *tag) { rrw_exit(&dp->dp_config_rwlock, tag); } boolean_t dsl_pool_config_held(dsl_pool_t *dp) { return (RRW_LOCK_HELD(&dp->dp_config_rwlock)); } boolean_t dsl_pool_config_held_writer(dsl_pool_t *dp) { return (RRW_WRITE_HELD(&dp->dp_config_rwlock)); } Index: projects/vnet/sys/cddl/contrib/opensolaris =================================================================== --- projects/vnet/sys/cddl/contrib/opensolaris (revision 302276) +++ projects/vnet/sys/cddl/contrib/opensolaris (revision 302277) Property changes on: projects/vnet/sys/cddl/contrib/opensolaris ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/cddl/contrib/opensolaris:r302158-302276 Index: projects/vnet/sys/dev/cxgbe/t4_main.c =================================================================== --- projects/vnet/sys/dev/cxgbe/t4_main.c (revision 302276) +++ projects/vnet/sys/dev/cxgbe/t4_main.c (revision 302277) @@ -1,9565 +1,9565 @@ /*- * Copyright (c) 2011 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RSS #include #endif #if defined(__i386__) || defined(__amd64__) #include #include #endif #ifdef DDB #include #include #endif #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "common/t4_regs_values.h" #include "t4_ioctl.h" #include "t4_l2t.h" #include "t4_mp_ring.h" /* T4 bus driver interface */ static int t4_probe(device_t); static int t4_attach(device_t); static int t4_detach(device_t); static device_method_t t4_methods[] = { DEVMETHOD(device_probe, t4_probe), DEVMETHOD(device_attach, t4_attach), DEVMETHOD(device_detach, t4_detach), DEVMETHOD_END }; static driver_t t4_driver = { "t4nex", t4_methods, sizeof(struct adapter) }; /* T4 port (cxgbe) interface */ static int cxgbe_probe(device_t); static int cxgbe_attach(device_t); static int cxgbe_detach(device_t); static device_method_t cxgbe_methods[] = { DEVMETHOD(device_probe, cxgbe_probe), DEVMETHOD(device_attach, cxgbe_attach), DEVMETHOD(device_detach, cxgbe_detach), { 0, 0 } }; static driver_t cxgbe_driver = { "cxgbe", cxgbe_methods, sizeof(struct port_info) }; /* T4 VI (vcxgbe) interface */ static int vcxgbe_probe(device_t); static int vcxgbe_attach(device_t); static int vcxgbe_detach(device_t); static device_method_t vcxgbe_methods[] = { DEVMETHOD(device_probe, vcxgbe_probe), DEVMETHOD(device_attach, vcxgbe_attach), DEVMETHOD(device_detach, vcxgbe_detach), { 0, 0 } }; static driver_t vcxgbe_driver = { "vcxgbe", vcxgbe_methods, sizeof(struct vi_info) }; static d_ioctl_t t4_ioctl; static d_open_t t4_open; static d_close_t t4_close; static struct cdevsw t4_cdevsw = { .d_version = D_VERSION, .d_flags = 0, .d_open = t4_open, .d_close = t4_close, .d_ioctl = t4_ioctl, .d_name = "t4nex", }; /* T5 bus driver interface */ static int t5_probe(device_t); static device_method_t t5_methods[] = { DEVMETHOD(device_probe, t5_probe), DEVMETHOD(device_attach, t4_attach), DEVMETHOD(device_detach, t4_detach), DEVMETHOD_END }; static driver_t t5_driver = { "t5nex", t5_methods, sizeof(struct adapter) }; /* T5 port (cxl) interface */ static driver_t cxl_driver = { "cxl", cxgbe_methods, sizeof(struct port_info) }; /* T5 VI (vcxl) interface */ static driver_t vcxl_driver = { "vcxl", vcxgbe_methods, sizeof(struct vi_info) }; static struct cdevsw t5_cdevsw = { .d_version = D_VERSION, .d_flags = 0, .d_open = t4_open, .d_close = t4_close, .d_ioctl = t4_ioctl, .d_name = "t5nex", }; /* ifnet + media interface */ static void cxgbe_init(void *); static int cxgbe_ioctl(struct ifnet *, unsigned long, caddr_t); static int cxgbe_transmit(struct ifnet *, struct mbuf *); static void cxgbe_qflush(struct ifnet *); static int cxgbe_media_change(struct ifnet *); static void cxgbe_media_status(struct ifnet *, struct ifmediareq *); MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4/T5 Ethernet driver and services"); /* * Correct lock order when you need to acquire multiple locks is t4_list_lock, * then ADAPTER_LOCK, then t4_uld_list_lock. */ static struct sx t4_list_lock; SLIST_HEAD(, adapter) t4_list; #ifdef TCP_OFFLOAD static struct sx t4_uld_list_lock; SLIST_HEAD(, uld_info) t4_uld_list; #endif /* * Tunables. See tweak_tunables() too. * * Each tunable is set to a default value here if it's known at compile-time. * Otherwise it is set to -1 as an indication to tweak_tunables() that it should * provide a reasonable default when the driver is loaded. * * Tunables applicable to both T4 and T5 are under hw.cxgbe. Those specific to * T5 are under hw.cxl. */ /* * Number of queues for tx and rx, 10G and 1G, NIC and offload. */ #define NTXQ_10G 16 static int t4_ntxq10g = -1; TUNABLE_INT("hw.cxgbe.ntxq10g", &t4_ntxq10g); #define NRXQ_10G 8 static int t4_nrxq10g = -1; TUNABLE_INT("hw.cxgbe.nrxq10g", &t4_nrxq10g); #define NTXQ_1G 4 static int t4_ntxq1g = -1; TUNABLE_INT("hw.cxgbe.ntxq1g", &t4_ntxq1g); #define NRXQ_1G 2 static int t4_nrxq1g = -1; TUNABLE_INT("hw.cxgbe.nrxq1g", &t4_nrxq1g); #define NTXQ_VI 1 static int t4_ntxq_vi = -1; TUNABLE_INT("hw.cxgbe.ntxq_vi", &t4_ntxq_vi); #define NRXQ_VI 1 static int t4_nrxq_vi = -1; TUNABLE_INT("hw.cxgbe.nrxq_vi", &t4_nrxq_vi); static int t4_rsrv_noflowq = 0; TUNABLE_INT("hw.cxgbe.rsrv_noflowq", &t4_rsrv_noflowq); #ifdef TCP_OFFLOAD #define NOFLDTXQ_10G 8 static int t4_nofldtxq10g = -1; TUNABLE_INT("hw.cxgbe.nofldtxq10g", &t4_nofldtxq10g); #define NOFLDRXQ_10G 2 static int t4_nofldrxq10g = -1; TUNABLE_INT("hw.cxgbe.nofldrxq10g", &t4_nofldrxq10g); #define NOFLDTXQ_1G 2 static int t4_nofldtxq1g = -1; TUNABLE_INT("hw.cxgbe.nofldtxq1g", &t4_nofldtxq1g); #define NOFLDRXQ_1G 1 static int t4_nofldrxq1g = -1; TUNABLE_INT("hw.cxgbe.nofldrxq1g", &t4_nofldrxq1g); #define NOFLDTXQ_VI 1 static int t4_nofldtxq_vi = -1; TUNABLE_INT("hw.cxgbe.nofldtxq_vi", &t4_nofldtxq_vi); #define NOFLDRXQ_VI 1 static int t4_nofldrxq_vi = -1; TUNABLE_INT("hw.cxgbe.nofldrxq_vi", &t4_nofldrxq_vi); #endif #ifdef DEV_NETMAP #define NNMTXQ_VI 2 static int t4_nnmtxq_vi = -1; TUNABLE_INT("hw.cxgbe.nnmtxq_vi", &t4_nnmtxq_vi); #define NNMRXQ_VI 2 static int t4_nnmrxq_vi = -1; TUNABLE_INT("hw.cxgbe.nnmrxq_vi", &t4_nnmrxq_vi); #endif /* * Holdoff parameters for 10G and 1G ports. */ #define TMR_IDX_10G 1 static int t4_tmr_idx_10g = TMR_IDX_10G; TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &t4_tmr_idx_10g); #define PKTC_IDX_10G (-1) static int t4_pktc_idx_10g = PKTC_IDX_10G; TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &t4_pktc_idx_10g); #define TMR_IDX_1G 1 static int t4_tmr_idx_1g = TMR_IDX_1G; TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_1G", &t4_tmr_idx_1g); #define PKTC_IDX_1G (-1) static int t4_pktc_idx_1g = PKTC_IDX_1G; TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_1G", &t4_pktc_idx_1g); /* * Size (# of entries) of each tx and rx queue. */ static unsigned int t4_qsize_txq = TX_EQ_QSIZE; TUNABLE_INT("hw.cxgbe.qsize_txq", &t4_qsize_txq); static unsigned int t4_qsize_rxq = RX_IQ_QSIZE; TUNABLE_INT("hw.cxgbe.qsize_rxq", &t4_qsize_rxq); /* * Interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively). */ static int t4_intr_types = INTR_MSIX | INTR_MSI | INTR_INTX; TUNABLE_INT("hw.cxgbe.interrupt_types", &t4_intr_types); /* * Configuration file. */ #define DEFAULT_CF "default" #define FLASH_CF "flash" #define UWIRE_CF "uwire" #define FPGA_CF "fpga" static char t4_cfg_file[32] = DEFAULT_CF; TUNABLE_STR("hw.cxgbe.config_file", t4_cfg_file, sizeof(t4_cfg_file)); /* * PAUSE settings (bit 0, 1 = rx_pause, tx_pause respectively). * rx_pause = 1 to heed incoming PAUSE frames, 0 to ignore them. * tx_pause = 1 to emit PAUSE frames when the rx FIFO reaches its high water * mark or when signalled to do so, 0 to never emit PAUSE. */ static int t4_pause_settings = PAUSE_TX | PAUSE_RX; TUNABLE_INT("hw.cxgbe.pause_settings", &t4_pause_settings); /* * Firmware auto-install by driver during attach (0, 1, 2 = prohibited, allowed, * encouraged respectively). */ static unsigned int t4_fw_install = 1; TUNABLE_INT("hw.cxgbe.fw_install", &t4_fw_install); /* * ASIC features that will be used. Disable the ones you don't want so that the * chip resources aren't wasted on features that will not be used. */ static int t4_nbmcaps_allowed = 0; TUNABLE_INT("hw.cxgbe.nbmcaps_allowed", &t4_nbmcaps_allowed); static int t4_linkcaps_allowed = 0; /* No DCBX, PPP, etc. by default */ TUNABLE_INT("hw.cxgbe.linkcaps_allowed", &t4_linkcaps_allowed); static int t4_switchcaps_allowed = FW_CAPS_CONFIG_SWITCH_INGRESS | FW_CAPS_CONFIG_SWITCH_EGRESS; TUNABLE_INT("hw.cxgbe.switchcaps_allowed", &t4_switchcaps_allowed); static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC; TUNABLE_INT("hw.cxgbe.niccaps_allowed", &t4_niccaps_allowed); static int t4_toecaps_allowed = -1; TUNABLE_INT("hw.cxgbe.toecaps_allowed", &t4_toecaps_allowed); static int t4_rdmacaps_allowed = -1; TUNABLE_INT("hw.cxgbe.rdmacaps_allowed", &t4_rdmacaps_allowed); static int t4_tlscaps_allowed = 0; TUNABLE_INT("hw.cxgbe.tlscaps_allowed", &t4_tlscaps_allowed); static int t4_iscsicaps_allowed = -1; TUNABLE_INT("hw.cxgbe.iscsicaps_allowed", &t4_iscsicaps_allowed); static int t4_fcoecaps_allowed = 0; TUNABLE_INT("hw.cxgbe.fcoecaps_allowed", &t4_fcoecaps_allowed); static int t5_write_combine = 0; TUNABLE_INT("hw.cxl.write_combine", &t5_write_combine); static int t4_num_vis = 1; TUNABLE_INT("hw.cxgbe.num_vis", &t4_num_vis); /* Functions used by extra VIs to obtain unique MAC addresses for each VI. */ static int vi_mac_funcs[] = { FW_VI_FUNC_OFLD, FW_VI_FUNC_IWARP, FW_VI_FUNC_OPENISCSI, FW_VI_FUNC_OPENFCOE, FW_VI_FUNC_FOISCSI, FW_VI_FUNC_FOFCOE, }; struct intrs_and_queues { uint16_t intr_type; /* INTx, MSI, or MSI-X */ uint16_t nirq; /* Total # of vectors */ uint16_t intr_flags_10g;/* Interrupt flags for each 10G port */ uint16_t intr_flags_1g; /* Interrupt flags for each 1G port */ uint16_t ntxq10g; /* # of NIC txq's for each 10G port */ uint16_t nrxq10g; /* # of NIC rxq's for each 10G port */ uint16_t ntxq1g; /* # of NIC txq's for each 1G port */ uint16_t nrxq1g; /* # of NIC rxq's for each 1G port */ uint16_t rsrv_noflowq; /* Flag whether to reserve queue 0 */ uint16_t nofldtxq10g; /* # of TOE txq's for each 10G port */ uint16_t nofldrxq10g; /* # of TOE rxq's for each 10G port */ uint16_t nofldtxq1g; /* # of TOE txq's for each 1G port */ uint16_t nofldrxq1g; /* # of TOE rxq's for each 1G port */ /* The vcxgbe/vcxl interfaces use these and not the ones above. */ uint16_t ntxq_vi; /* # of NIC txq's */ uint16_t nrxq_vi; /* # of NIC rxq's */ uint16_t nofldtxq_vi; /* # of TOE txq's */ uint16_t nofldrxq_vi; /* # of TOE rxq's */ uint16_t nnmtxq_vi; /* # of netmap txq's */ uint16_t nnmrxq_vi; /* # of netmap rxq's */ }; struct filter_entry { uint32_t valid:1; /* filter allocated and valid */ uint32_t locked:1; /* filter is administratively locked */ uint32_t pending:1; /* filter action is pending firmware reply */ uint32_t smtidx:8; /* Source MAC Table index for smac */ struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ struct t4_filter_specification fs; }; static int map_bars_0_and_4(struct adapter *); static int map_bar_2(struct adapter *); static void setup_memwin(struct adapter *); static void position_memwin(struct adapter *, int, uint32_t); static int rw_via_memwin(struct adapter *, int, uint32_t, uint32_t *, int, int); static inline int read_via_memwin(struct adapter *, int, uint32_t, uint32_t *, int); static inline int write_via_memwin(struct adapter *, int, uint32_t, const uint32_t *, int); static int validate_mem_range(struct adapter *, uint32_t, int); static int fwmtype_to_hwmtype(int); static int validate_mt_off_len(struct adapter *, int, uint32_t, int, uint32_t *); static int fixup_devlog_params(struct adapter *); static int cfg_itype_and_nqueues(struct adapter *, int, int, int, struct intrs_and_queues *); static int prep_firmware(struct adapter *); static int partition_resources(struct adapter *, const struct firmware *, const char *); static int get_params__pre_init(struct adapter *); static int get_params__post_init(struct adapter *); static int set_params__post_init(struct adapter *); static void t4_set_desc(struct adapter *); static void build_medialist(struct port_info *, struct ifmedia *); static int cxgbe_init_synchronized(struct vi_info *); static int cxgbe_uninit_synchronized(struct vi_info *); static int setup_intr_handlers(struct adapter *); static void quiesce_txq(struct adapter *, struct sge_txq *); static void quiesce_wrq(struct adapter *, struct sge_wrq *); static void quiesce_iq(struct adapter *, struct sge_iq *); static void quiesce_fl(struct adapter *, struct sge_fl *); static int t4_alloc_irq(struct adapter *, struct irq *, int rid, driver_intr_t *, void *, char *); static int t4_free_irq(struct adapter *, struct irq *); static void get_regs(struct adapter *, struct t4_regdump *, uint8_t *); static void vi_refresh_stats(struct adapter *, struct vi_info *); static void cxgbe_refresh_stats(struct adapter *, struct port_info *); static void cxgbe_tick(void *); static void cxgbe_vlan_config(void *, struct ifnet *, uint16_t); static int cpl_not_handled(struct sge_iq *, const struct rss_header *, struct mbuf *); static int an_not_handled(struct sge_iq *, const struct rsp_ctrl *); static int fw_msg_not_handled(struct adapter *, const __be64 *); static void t4_sysctls(struct adapter *); static void cxgbe_sysctls(struct port_info *); static int sysctl_int_array(SYSCTL_HANDLER_ARGS); static int sysctl_bitfield(SYSCTL_HANDLER_ARGS); static int sysctl_btphy(SYSCTL_HANDLER_ARGS); static int sysctl_noflowq(SYSCTL_HANDLER_ARGS); static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS); static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS); static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS); static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS); static int sysctl_pause_settings(SYSCTL_HANDLER_ARGS); static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS); static int sysctl_temperature(SYSCTL_HANDLER_ARGS); #ifdef SBUF_DRAIN static int sysctl_cctrl(SYSCTL_HANDLER_ARGS); static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS); static int sysctl_cim_la(SYSCTL_HANDLER_ARGS); static int sysctl_cim_la_t6(SYSCTL_HANDLER_ARGS); static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS); static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS); static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS); static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS); static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS); static int sysctl_devlog(SYSCTL_HANDLER_ARGS); static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS); static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS); static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS); static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS); static int sysctl_meminfo(SYSCTL_HANDLER_ARGS); static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS); static int sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS); static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS); static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS); static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tids(SYSCTL_HANDLER_ARGS); static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS); static int sysctl_tp_la(SYSCTL_HANDLER_ARGS); static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS); static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS); static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tc_params(SYSCTL_HANDLER_ARGS); #endif #ifdef TCP_OFFLOAD static int sysctl_tp_tick(SYSCTL_HANDLER_ARGS); static int sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS); static int sysctl_tp_timer(SYSCTL_HANDLER_ARGS); #endif static uint32_t fconf_iconf_to_mode(uint32_t, uint32_t); static uint32_t mode_to_fconf(uint32_t); static uint32_t mode_to_iconf(uint32_t); static int check_fspec_against_fconf_iconf(struct adapter *, struct t4_filter_specification *); static int get_filter_mode(struct adapter *, uint32_t *); static int set_filter_mode(struct adapter *, uint32_t); static inline uint64_t get_filter_hits(struct adapter *, uint32_t); static int get_filter(struct adapter *, struct t4_filter *); static int set_filter(struct adapter *, struct t4_filter *); static int del_filter(struct adapter *, struct t4_filter *); static void clear_filter(struct filter_entry *); static int set_filter_wr(struct adapter *, int); static int del_filter_wr(struct adapter *, int); static int get_sge_context(struct adapter *, struct t4_sge_context *); static int load_fw(struct adapter *, struct t4_data *); static int read_card_mem(struct adapter *, int, struct t4_mem_range *); static int read_i2c(struct adapter *, struct t4_i2c_data *); static int set_sched_class(struct adapter *, struct t4_sched_params *); static int set_sched_queue(struct adapter *, struct t4_sched_queue *); #ifdef TCP_OFFLOAD static int toe_capability(struct vi_info *, int); #endif static int mod_event(module_t, int, void *); struct { uint16_t device; char *desc; } t4_pciids[] = { {0xa000, "Chelsio Terminator 4 FPGA"}, {0x4400, "Chelsio T440-dbg"}, {0x4401, "Chelsio T420-CR"}, {0x4402, "Chelsio T422-CR"}, {0x4403, "Chelsio T440-CR"}, {0x4404, "Chelsio T420-BCH"}, {0x4405, "Chelsio T440-BCH"}, {0x4406, "Chelsio T440-CH"}, {0x4407, "Chelsio T420-SO"}, {0x4408, "Chelsio T420-CX"}, {0x4409, "Chelsio T420-BT"}, {0x440a, "Chelsio T404-BT"}, {0x440e, "Chelsio T440-LP-CR"}, }, t5_pciids[] = { {0xb000, "Chelsio Terminator 5 FPGA"}, {0x5400, "Chelsio T580-dbg"}, {0x5401, "Chelsio T520-CR"}, /* 2 x 10G */ {0x5402, "Chelsio T522-CR"}, /* 2 x 10G, 2 X 1G */ {0x5403, "Chelsio T540-CR"}, /* 4 x 10G */ {0x5407, "Chelsio T520-SO"}, /* 2 x 10G, nomem */ {0x5409, "Chelsio T520-BT"}, /* 2 x 10GBaseT */ {0x540a, "Chelsio T504-BT"}, /* 4 x 1G */ {0x540d, "Chelsio T580-CR"}, /* 2 x 40G */ {0x540e, "Chelsio T540-LP-CR"}, /* 4 x 10G */ {0x5410, "Chelsio T580-LP-CR"}, /* 2 x 40G */ {0x5411, "Chelsio T520-LL-CR"}, /* 2 x 10G */ {0x5412, "Chelsio T560-CR"}, /* 1 x 40G, 2 x 10G */ {0x5414, "Chelsio T580-LP-SO-CR"}, /* 2 x 40G, nomem */ {0x5415, "Chelsio T502-BT"}, /* 2 x 1G */ #ifdef notyet {0x5404, "Chelsio T520-BCH"}, {0x5405, "Chelsio T540-BCH"}, {0x5406, "Chelsio T540-CH"}, {0x5408, "Chelsio T520-CX"}, {0x540b, "Chelsio B520-SR"}, {0x540c, "Chelsio B504-BT"}, {0x540f, "Chelsio Amsterdam"}, {0x5413, "Chelsio T580-CHR"}, #endif }; #ifdef TCP_OFFLOAD /* * service_iq() has an iq and needs the fl. Offset of fl from the iq should be * exactly the same for both rxq and ofld_rxq. */ CTASSERT(offsetof(struct sge_ofld_rxq, iq) == offsetof(struct sge_rxq, iq)); CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl)); #endif /* No easy way to include t4_msg.h before adapter.h so we check this way */ CTASSERT(nitems(((struct adapter *)0)->cpl_handler) == NUM_CPL_CMDS); CTASSERT(nitems(((struct adapter *)0)->fw_msg_handler) == NUM_FW6_TYPES); CTASSERT(sizeof(struct cluster_metadata) <= CL_METADATA_SIZE); static int t4_probe(device_t dev) { int i; uint16_t v = pci_get_vendor(dev); uint16_t d = pci_get_device(dev); uint8_t f = pci_get_function(dev); if (v != PCI_VENDOR_ID_CHELSIO) return (ENXIO); /* Attach only to PF0 of the FPGA */ if (d == 0xa000 && f != 0) return (ENXIO); for (i = 0; i < nitems(t4_pciids); i++) { if (d == t4_pciids[i].device) { device_set_desc(dev, t4_pciids[i].desc); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } static int t5_probe(device_t dev) { int i; uint16_t v = pci_get_vendor(dev); uint16_t d = pci_get_device(dev); uint8_t f = pci_get_function(dev); if (v != PCI_VENDOR_ID_CHELSIO) return (ENXIO); /* Attach only to PF0 of the FPGA */ if (d == 0xb000 && f != 0) return (ENXIO); for (i = 0; i < nitems(t5_pciids); i++) { if (d == t5_pciids[i].device) { device_set_desc(dev, t5_pciids[i].desc); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } static void t5_attribute_workaround(device_t dev) { device_t root_port; uint32_t v; /* * The T5 chips do not properly echo the No Snoop and Relaxed * Ordering attributes when replying to a TLP from a Root * Port. As a workaround, find the parent Root Port and * disable No Snoop and Relaxed Ordering. Note that this * affects all devices under this root port. */ root_port = pci_find_pcie_root_port(dev); if (root_port == NULL) { device_printf(dev, "Unable to find parent root port\n"); return; } v = pcie_adjust_config(root_port, PCIER_DEVICE_CTL, PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE, 0, 2); if ((v & (PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE)) != 0) device_printf(dev, "Disabled No Snoop/Relaxed Ordering on %s\n", device_get_nameunit(root_port)); } static int t4_attach(device_t dev) { struct adapter *sc; int rc = 0, i, j, n10g, n1g, rqidx, tqidx; struct intrs_and_queues iaq; struct sge *s; uint8_t *buf; #ifdef TCP_OFFLOAD int ofld_rqidx, ofld_tqidx; #endif #ifdef DEV_NETMAP int nm_rqidx, nm_tqidx; #endif int num_vis; sc = device_get_softc(dev); sc->dev = dev; TUNABLE_INT_FETCH("hw.cxgbe.debug_flags", &sc->debug_flags); if ((pci_get_device(dev) & 0xff00) == 0x5400) t5_attribute_workaround(dev); pci_enable_busmaster(dev); if (pci_find_cap(dev, PCIY_EXPRESS, &i) == 0) { uint32_t v; pci_set_max_read_req(dev, 4096); v = pci_read_config(dev, i + PCIER_DEVICE_CTL, 2); v |= PCIEM_CTL_RELAXED_ORD_ENABLE; pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2); sc->params.pci.mps = 128 << ((v & PCIEM_CTL_MAX_PAYLOAD) >> 5); } sc->traceq = -1; mtx_init(&sc->ifp_lock, sc->ifp_lockname, 0, MTX_DEF); snprintf(sc->ifp_lockname, sizeof(sc->ifp_lockname), "%s tracer", device_get_nameunit(dev)); snprintf(sc->lockname, sizeof(sc->lockname), "%s", device_get_nameunit(dev)); mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF); sx_xlock(&t4_list_lock); SLIST_INSERT_HEAD(&t4_list, sc, link); sx_xunlock(&t4_list_lock); mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF); TAILQ_INIT(&sc->sfl); callout_init_mtx(&sc->sfl_callout, &sc->sfl_lock, 0); mtx_init(&sc->reg_lock, "indirect register access", 0, MTX_DEF); rc = map_bars_0_and_4(sc); if (rc != 0) goto done; /* error message displayed already */ /* * This is the real PF# to which we're attaching. Works from within PCI * passthrough environments too, where pci_get_function() could return a * different PF# depending on the passthrough configuration. We need to * use the real PF# in all our communication with the firmware. */ sc->pf = G_SOURCEPF(t4_read_reg(sc, A_PL_WHOAMI)); sc->mbox = sc->pf; memset(sc->chan_map, 0xff, sizeof(sc->chan_map)); sc->an_handler = an_not_handled; for (i = 0; i < nitems(sc->cpl_handler); i++) sc->cpl_handler[i] = cpl_not_handled; for (i = 0; i < nitems(sc->fw_msg_handler); i++) sc->fw_msg_handler[i] = fw_msg_not_handled; t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, t4_filter_rpl); t4_register_cpl_handler(sc, CPL_TRACE_PKT, t4_trace_pkt); t4_register_cpl_handler(sc, CPL_T5_TRACE_PKT, t5_trace_pkt); t4_init_sge_cpl_handlers(sc); /* Prepare the adapter for operation. */ buf = malloc(PAGE_SIZE, M_CXGBE, M_ZERO | M_WAITOK); rc = -t4_prep_adapter(sc, buf); free(buf, M_CXGBE); if (rc != 0) { device_printf(dev, "failed to prepare adapter: %d.\n", rc); goto done; } /* * Do this really early, with the memory windows set up even before the * character device. The userland tool's register i/o and mem read * will work even in "recovery mode". */ setup_memwin(sc); if (t4_init_devlog_params(sc, 0) == 0) fixup_devlog_params(sc); sc->cdev = make_dev(is_t4(sc) ? &t4_cdevsw : &t5_cdevsw, device_get_unit(dev), UID_ROOT, GID_WHEEL, 0600, "%s", device_get_nameunit(dev)); if (sc->cdev == NULL) device_printf(dev, "failed to create nexus char device.\n"); else sc->cdev->si_drv1 = sc; /* Go no further if recovery mode has been requested. */ if (TUNABLE_INT_FETCH("hw.cxgbe.sos", &i) && i != 0) { device_printf(dev, "recovery mode.\n"); goto done; } #if defined(__i386__) if ((cpu_feature & CPUID_CX8) == 0) { device_printf(dev, "64 bit atomics not available.\n"); rc = ENOTSUP; goto done; } #endif /* Prepare the firmware for operation */ rc = prep_firmware(sc); if (rc != 0) goto done; /* error message displayed already */ rc = get_params__post_init(sc); if (rc != 0) goto done; /* error message displayed already */ rc = set_params__post_init(sc); if (rc != 0) goto done; /* error message displayed already */ rc = map_bar_2(sc); if (rc != 0) goto done; /* error message displayed already */ rc = t4_create_dma_tag(sc); if (rc != 0) goto done; /* error message displayed already */ /* * Number of VIs to create per-port. The first VI is the "main" regular * VI for the port. The rest are additional virtual interfaces on the * same physical port. Note that the main VI does not have native * netmap support but the extra VIs do. * * Limit the number of VIs per port to the number of available * MAC addresses per port. */ if (t4_num_vis >= 1) num_vis = t4_num_vis; else num_vis = 1; if (num_vis > nitems(vi_mac_funcs)) { num_vis = nitems(vi_mac_funcs); device_printf(dev, "Number of VIs limited to %d\n", num_vis); } /* * First pass over all the ports - allocate VIs and initialize some * basic parameters like mac address, port type, etc. We also figure * out whether a port is 10G or 1G and use that information when * calculating how many interrupts to attempt to allocate. */ n10g = n1g = 0; for_each_port(sc, i) { struct port_info *pi; pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK); sc->port[i] = pi; /* These must be set before t4_port_init */ pi->adapter = sc; pi->port_id = i; /* * XXX: vi[0] is special so we can't delay this allocation until * pi->nvi's final value is known. */ pi->vi = malloc(sizeof(struct vi_info) * num_vis, M_CXGBE, M_ZERO | M_WAITOK); /* * Allocate the "main" VI and initialize parameters * like mac addr. */ rc = -t4_port_init(sc, sc->mbox, sc->pf, 0, i); if (rc != 0) { device_printf(dev, "unable to initialize port %d: %d\n", i, rc); free(pi->vi, M_CXGBE); free(pi, M_CXGBE); sc->port[i] = NULL; goto done; } pi->link_cfg.requested_fc &= ~(PAUSE_TX | PAUSE_RX); pi->link_cfg.requested_fc |= t4_pause_settings; pi->link_cfg.fc &= ~(PAUSE_TX | PAUSE_RX); pi->link_cfg.fc |= t4_pause_settings; rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, &pi->link_cfg); if (rc != 0) { device_printf(dev, "port %d l1cfg failed: %d\n", i, rc); free(pi->vi, M_CXGBE); free(pi, M_CXGBE); sc->port[i] = NULL; goto done; } snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d", device_get_nameunit(dev), i); mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF); sc->chan_map[pi->tx_chan] = i; pi->tc = malloc(sizeof(struct tx_sched_class) * sc->chip_params->nsched_cls, M_CXGBE, M_ZERO | M_WAITOK); if (is_10G_port(pi) || is_40G_port(pi)) { n10g++; } else { n1g++; } pi->linkdnrc = -1; pi->dev = device_add_child(dev, is_t4(sc) ? "cxgbe" : "cxl", -1); if (pi->dev == NULL) { device_printf(dev, "failed to add device for port %d.\n", i); rc = ENXIO; goto done; } pi->vi[0].dev = pi->dev; device_set_softc(pi->dev, pi); } /* * Interrupt type, # of interrupts, # of rx/tx queues, etc. */ rc = cfg_itype_and_nqueues(sc, n10g, n1g, num_vis, &iaq); if (rc != 0) goto done; /* error message displayed already */ if (iaq.nrxq_vi + iaq.nofldrxq_vi + iaq.nnmrxq_vi == 0) num_vis = 1; sc->intr_type = iaq.intr_type; sc->intr_count = iaq.nirq; s = &sc->sge; s->nrxq = n10g * iaq.nrxq10g + n1g * iaq.nrxq1g; s->ntxq = n10g * iaq.ntxq10g + n1g * iaq.ntxq1g; if (num_vis > 1) { s->nrxq += (n10g + n1g) * (num_vis - 1) * iaq.nrxq_vi; s->ntxq += (n10g + n1g) * (num_vis - 1) * iaq.ntxq_vi; } s->neq = s->ntxq + s->nrxq; /* the free list in an rxq is an eq */ s->neq += sc->params.nports + 1;/* ctrl queues: 1 per port + 1 mgmt */ s->niq = s->nrxq + 1; /* 1 extra for firmware event queue */ #ifdef TCP_OFFLOAD if (is_offload(sc)) { s->nofldrxq = n10g * iaq.nofldrxq10g + n1g * iaq.nofldrxq1g; s->nofldtxq = n10g * iaq.nofldtxq10g + n1g * iaq.nofldtxq1g; if (num_vis > 1) { s->nofldrxq += (n10g + n1g) * (num_vis - 1) * iaq.nofldrxq_vi; s->nofldtxq += (n10g + n1g) * (num_vis - 1) * iaq.nofldtxq_vi; } s->neq += s->nofldtxq + s->nofldrxq; s->niq += s->nofldrxq; s->ofld_rxq = malloc(s->nofldrxq * sizeof(struct sge_ofld_rxq), M_CXGBE, M_ZERO | M_WAITOK); s->ofld_txq = malloc(s->nofldtxq * sizeof(struct sge_wrq), M_CXGBE, M_ZERO | M_WAITOK); } #endif #ifdef DEV_NETMAP if (num_vis > 1) { s->nnmrxq = (n10g + n1g) * (num_vis - 1) * iaq.nnmrxq_vi; s->nnmtxq = (n10g + n1g) * (num_vis - 1) * iaq.nnmtxq_vi; } s->neq += s->nnmtxq + s->nnmrxq; s->niq += s->nnmrxq; s->nm_rxq = malloc(s->nnmrxq * sizeof(struct sge_nm_rxq), M_CXGBE, M_ZERO | M_WAITOK); s->nm_txq = malloc(s->nnmtxq * sizeof(struct sge_nm_txq), M_CXGBE, M_ZERO | M_WAITOK); #endif s->ctrlq = malloc(sc->params.nports * sizeof(struct sge_wrq), M_CXGBE, M_ZERO | M_WAITOK); s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE, M_ZERO | M_WAITOK); s->txq = malloc(s->ntxq * sizeof(struct sge_txq), M_CXGBE, M_ZERO | M_WAITOK); s->iqmap = malloc(s->niq * sizeof(struct sge_iq *), M_CXGBE, M_ZERO | M_WAITOK); s->eqmap = malloc(s->neq * sizeof(struct sge_eq *), M_CXGBE, M_ZERO | M_WAITOK); sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE, M_ZERO | M_WAITOK); t4_init_l2t(sc, M_WAITOK); /* * Second pass over the ports. This time we know the number of rx and * tx queues that each port should get. */ rqidx = tqidx = 0; #ifdef TCP_OFFLOAD ofld_rqidx = ofld_tqidx = 0; #endif #ifdef DEV_NETMAP nm_rqidx = nm_tqidx = 0; #endif for_each_port(sc, i) { struct port_info *pi = sc->port[i]; struct vi_info *vi; if (pi == NULL) continue; pi->nvi = num_vis; for_each_vi(pi, j, vi) { vi->pi = pi; vi->qsize_rxq = t4_qsize_rxq; vi->qsize_txq = t4_qsize_txq; vi->first_rxq = rqidx; vi->first_txq = tqidx; if (is_10G_port(pi) || is_40G_port(pi)) { vi->tmr_idx = t4_tmr_idx_10g; vi->pktc_idx = t4_pktc_idx_10g; vi->flags |= iaq.intr_flags_10g & INTR_RXQ; vi->nrxq = j == 0 ? iaq.nrxq10g : iaq.nrxq_vi; vi->ntxq = j == 0 ? iaq.ntxq10g : iaq.ntxq_vi; } else { vi->tmr_idx = t4_tmr_idx_1g; vi->pktc_idx = t4_pktc_idx_1g; vi->flags |= iaq.intr_flags_1g & INTR_RXQ; vi->nrxq = j == 0 ? iaq.nrxq1g : iaq.nrxq_vi; vi->ntxq = j == 0 ? iaq.ntxq1g : iaq.ntxq_vi; } rqidx += vi->nrxq; tqidx += vi->ntxq; if (j == 0 && vi->ntxq > 1) vi->rsrv_noflowq = iaq.rsrv_noflowq ? 1 : 0; else vi->rsrv_noflowq = 0; #ifdef TCP_OFFLOAD vi->first_ofld_rxq = ofld_rqidx; vi->first_ofld_txq = ofld_tqidx; if (is_10G_port(pi) || is_40G_port(pi)) { vi->flags |= iaq.intr_flags_10g & INTR_OFLD_RXQ; vi->nofldrxq = j == 0 ? iaq.nofldrxq10g : iaq.nofldrxq_vi; vi->nofldtxq = j == 0 ? iaq.nofldtxq10g : iaq.nofldtxq_vi; } else { vi->flags |= iaq.intr_flags_1g & INTR_OFLD_RXQ; vi->nofldrxq = j == 0 ? iaq.nofldrxq1g : iaq.nofldrxq_vi; vi->nofldtxq = j == 0 ? iaq.nofldtxq1g : iaq.nofldtxq_vi; } ofld_rqidx += vi->nofldrxq; ofld_tqidx += vi->nofldtxq; #endif #ifdef DEV_NETMAP if (j > 0) { vi->first_nm_rxq = nm_rqidx; vi->first_nm_txq = nm_tqidx; vi->nnmrxq = iaq.nnmrxq_vi; vi->nnmtxq = iaq.nnmtxq_vi; nm_rqidx += vi->nnmrxq; nm_tqidx += vi->nnmtxq; } #endif } } rc = setup_intr_handlers(sc); if (rc != 0) { device_printf(dev, "failed to setup interrupt handlers: %d\n", rc); goto done; } rc = bus_generic_attach(dev); if (rc != 0) { device_printf(dev, "failed to attach all child ports: %d\n", rc); goto done; } device_printf(dev, "PCIe gen%d x%d, %d ports, %d %s interrupt%s, %d eq, %d iq\n", sc->params.pci.speed, sc->params.pci.width, sc->params.nports, sc->intr_count, sc->intr_type == INTR_MSIX ? "MSI-X" : (sc->intr_type == INTR_MSI ? "MSI" : "INTx"), sc->intr_count > 1 ? "s" : "", sc->sge.neq, sc->sge.niq); t4_set_desc(sc); done: if (rc != 0 && sc->cdev) { /* cdev was created and so cxgbetool works; recover that way. */ device_printf(dev, "error during attach, adapter is now in recovery mode.\n"); rc = 0; } if (rc != 0) t4_detach(dev); else t4_sysctls(sc); return (rc); } /* * Idempotent */ static int t4_detach(device_t dev) { struct adapter *sc; struct port_info *pi; int i, rc; sc = device_get_softc(dev); if (sc->flags & FULL_INIT_DONE) t4_intr_disable(sc); if (sc->cdev) { destroy_dev(sc->cdev); sc->cdev = NULL; } rc = bus_generic_detach(dev); if (rc) { device_printf(dev, "failed to detach child devices: %d\n", rc); return (rc); } for (i = 0; i < sc->intr_count; i++) t4_free_irq(sc, &sc->irq[i]); for (i = 0; i < MAX_NPORTS; i++) { pi = sc->port[i]; if (pi) { t4_free_vi(sc, sc->mbox, sc->pf, 0, pi->vi[0].viid); if (pi->dev) device_delete_child(dev, pi->dev); mtx_destroy(&pi->pi_lock); free(pi->vi, M_CXGBE); free(pi->tc, M_CXGBE); free(pi, M_CXGBE); } } if (sc->flags & FULL_INIT_DONE) adapter_full_uninit(sc); if (sc->flags & FW_OK) t4_fw_bye(sc, sc->mbox); if (sc->intr_type == INTR_MSI || sc->intr_type == INTR_MSIX) pci_release_msi(dev); if (sc->regs_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->regs_rid, sc->regs_res); if (sc->udbs_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->udbs_rid, sc->udbs_res); if (sc->msix_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_rid, sc->msix_res); if (sc->l2t) t4_free_l2t(sc->l2t); #ifdef TCP_OFFLOAD free(sc->sge.ofld_rxq, M_CXGBE); free(sc->sge.ofld_txq, M_CXGBE); #endif #ifdef DEV_NETMAP free(sc->sge.nm_rxq, M_CXGBE); free(sc->sge.nm_txq, M_CXGBE); #endif free(sc->irq, M_CXGBE); free(sc->sge.rxq, M_CXGBE); free(sc->sge.txq, M_CXGBE); free(sc->sge.ctrlq, M_CXGBE); free(sc->sge.iqmap, M_CXGBE); free(sc->sge.eqmap, M_CXGBE); free(sc->tids.ftid_tab, M_CXGBE); t4_destroy_dma_tag(sc); if (mtx_initialized(&sc->sc_lock)) { sx_xlock(&t4_list_lock); SLIST_REMOVE(&t4_list, sc, adapter, link); sx_xunlock(&t4_list_lock); mtx_destroy(&sc->sc_lock); } callout_drain(&sc->sfl_callout); if (mtx_initialized(&sc->tids.ftid_lock)) mtx_destroy(&sc->tids.ftid_lock); if (mtx_initialized(&sc->sfl_lock)) mtx_destroy(&sc->sfl_lock); if (mtx_initialized(&sc->ifp_lock)) mtx_destroy(&sc->ifp_lock); if (mtx_initialized(&sc->reg_lock)) mtx_destroy(&sc->reg_lock); for (i = 0; i < NUM_MEMWIN; i++) { struct memwin *mw = &sc->memwin[i]; if (rw_initialized(&mw->mw_lock)) rw_destroy(&mw->mw_lock); } bzero(sc, sizeof(*sc)); return (0); } static int cxgbe_probe(device_t dev) { char buf[128]; struct port_info *pi = device_get_softc(dev); snprintf(buf, sizeof(buf), "port %d", pi->port_id); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); } #define T4_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \ IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \ IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6 | IFCAP_HWSTATS) #define T4_CAP_ENABLE (T4_CAP) static int cxgbe_vi_attach(device_t dev, struct vi_info *vi) { struct ifnet *ifp; struct sbuf *sb; vi->xact_addr_filt = -1; callout_init(&vi->tick, 1); /* Allocate an ifnet and set it up */ ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "Cannot allocate ifnet\n"); return (ENOMEM); } vi->ifp = ifp; ifp->if_softc = vi; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_init = cxgbe_init; ifp->if_ioctl = cxgbe_ioctl; ifp->if_transmit = cxgbe_transmit; ifp->if_qflush = cxgbe_qflush; ifp->if_get_counter = cxgbe_get_counter; ifp->if_capabilities = T4_CAP; #ifdef TCP_OFFLOAD if (vi->nofldrxq != 0) ifp->if_capabilities |= IFCAP_TOE; #endif ifp->if_capenable = T4_CAP_ENABLE; ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO | CSUM_UDP_IPV6 | CSUM_TCP_IPV6; ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS; ifp->if_hw_tsomaxsegsize = 65536; /* Initialize ifmedia for this VI */ ifmedia_init(&vi->media, IFM_IMASK, cxgbe_media_change, cxgbe_media_status); build_medialist(vi->pi, &vi->media); vi->vlan_c = EVENTHANDLER_REGISTER(vlan_config, cxgbe_vlan_config, ifp, EVENTHANDLER_PRI_ANY); ether_ifattach(ifp, vi->hw_addr); #ifdef DEV_NETMAP if (vi->nnmrxq != 0) cxgbe_nm_attach(vi); #endif sb = sbuf_new_auto(); sbuf_printf(sb, "%d txq, %d rxq (NIC)", vi->ntxq, vi->nrxq); #ifdef TCP_OFFLOAD if (ifp->if_capabilities & IFCAP_TOE) sbuf_printf(sb, "; %d txq, %d rxq (TOE)", vi->nofldtxq, vi->nofldrxq); #endif #ifdef DEV_NETMAP if (ifp->if_capabilities & IFCAP_NETMAP) sbuf_printf(sb, "; %d txq, %d rxq (netmap)", vi->nnmtxq, vi->nnmrxq); #endif sbuf_finish(sb); device_printf(dev, "%s\n", sbuf_data(sb)); sbuf_delete(sb); vi_sysctls(vi); return (0); } static int cxgbe_attach(device_t dev) { struct port_info *pi = device_get_softc(dev); struct vi_info *vi; int i, rc; callout_init_mtx(&pi->tick, &pi->pi_lock, 0); rc = cxgbe_vi_attach(dev, &pi->vi[0]); if (rc) return (rc); for_each_vi(pi, i, vi) { if (i == 0) continue; vi->dev = device_add_child(dev, is_t4(pi->adapter) ? "vcxgbe" : "vcxl", -1); if (vi->dev == NULL) { device_printf(dev, "failed to add VI %d\n", i); continue; } device_set_softc(vi->dev, vi); } cxgbe_sysctls(pi); bus_generic_attach(dev); return (0); } static void cxgbe_vi_detach(struct vi_info *vi) { struct ifnet *ifp = vi->ifp; ether_ifdetach(ifp); if (vi->vlan_c) EVENTHANDLER_DEREGISTER(vlan_config, vi->vlan_c); /* Let detach proceed even if these fail. */ #ifdef DEV_NETMAP if (ifp->if_capabilities & IFCAP_NETMAP) cxgbe_nm_detach(vi); #endif cxgbe_uninit_synchronized(vi); callout_drain(&vi->tick); vi_full_uninit(vi); ifmedia_removeall(&vi->media); if_free(vi->ifp); vi->ifp = NULL; } static int cxgbe_detach(device_t dev) { struct port_info *pi = device_get_softc(dev); struct adapter *sc = pi->adapter; int rc; /* Detach the extra VIs first. */ rc = bus_generic_detach(dev); if (rc) return (rc); device_delete_children(dev); doom_vi(sc, &pi->vi[0]); if (pi->flags & HAS_TRACEQ) { sc->traceq = -1; /* cloner should not create ifnet */ t4_tracer_port_detach(sc); } cxgbe_vi_detach(&pi->vi[0]); callout_drain(&pi->tick); end_synchronized_op(sc, 0); return (0); } static void cxgbe_init(void *arg) { struct vi_info *vi = arg; struct adapter *sc = vi->pi->adapter; if (begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4init") != 0) return; cxgbe_init_synchronized(vi); end_synchronized_op(sc, 0); } static int cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data) { int rc = 0, mtu, flags, can_sleep; struct vi_info *vi = ifp->if_softc; struct adapter *sc = vi->pi->adapter; struct ifreq *ifr = (struct ifreq *)data; uint32_t mask; switch (cmd) { case SIOCSIFMTU: mtu = ifr->ifr_mtu; if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) return (EINVAL); rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4mtu"); if (rc) return (rc); ifp->if_mtu = mtu; if (vi->flags & VI_INIT_DONE) { t4_update_fl_bufsize(ifp); if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = update_mac_settings(ifp, XGMAC_MTU); } end_synchronized_op(sc, 0); break; case SIOCSIFFLAGS: can_sleep = 0; redo_sifflags: rc = begin_synchronized_op(sc, vi, can_sleep ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4flg"); if (rc) return (rc); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { flags = vi->if_flags; if ((ifp->if_flags ^ flags) & (IFF_PROMISC | IFF_ALLMULTI)) { if (can_sleep == 1) { end_synchronized_op(sc, 0); can_sleep = 0; goto redo_sifflags; } rc = update_mac_settings(ifp, XGMAC_PROMISC | XGMAC_ALLMULTI); } } else { if (can_sleep == 0) { end_synchronized_op(sc, LOCK_HELD); can_sleep = 1; goto redo_sifflags; } rc = cxgbe_init_synchronized(vi); } vi->if_flags = ifp->if_flags; } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if (can_sleep == 0) { end_synchronized_op(sc, LOCK_HELD); can_sleep = 1; goto redo_sifflags; } rc = cxgbe_uninit_synchronized(vi); } end_synchronized_op(sc, can_sleep ? 0 : LOCK_HELD); break; case SIOCADDMULTI: case SIOCDELMULTI: /* these two are called with a mutex held :-( */ rc = begin_synchronized_op(sc, vi, HOLD_LOCK, "t4multi"); if (rc) return (rc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = update_mac_settings(ifp, XGMAC_MCADDRS); end_synchronized_op(sc, LOCK_HELD); break; case SIOCSIFCAP: rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4cap"); if (rc) return (rc); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (IFCAP_TSO4 & ifp->if_capenable && !(IFCAP_TXCSUM & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO4; if_printf(ifp, "tso4 disabled due to -txcsum.\n"); } } if (mask & IFCAP_TXCSUM_IPV6) { ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); if (IFCAP_TSO6 & ifp->if_capenable && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO6; if_printf(ifp, "tso6 disabled due to -txcsum6.\n"); } } if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; /* * Note that we leave CSUM_TSO alone (it is always set). The * kernel takes both IFCAP_TSOx and CSUM_TSO into account before * sending a TSO request our way, so it's sufficient to toggle * IFCAP_TSOx only. */ if (mask & IFCAP_TSO4) { if (!(IFCAP_TSO4 & ifp->if_capenable) && !(IFCAP_TXCSUM & ifp->if_capenable)) { if_printf(ifp, "enable txcsum first.\n"); rc = EAGAIN; goto fail; } ifp->if_capenable ^= IFCAP_TSO4; } if (mask & IFCAP_TSO6) { if (!(IFCAP_TSO6 & ifp->if_capenable) && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { if_printf(ifp, "enable txcsum6 first.\n"); rc = EAGAIN; goto fail; } ifp->if_capenable ^= IFCAP_TSO6; } if (mask & IFCAP_LRO) { #if defined(INET) || defined(INET6) int i; struct sge_rxq *rxq; ifp->if_capenable ^= IFCAP_LRO; for_each_rxq(vi, i, rxq) { if (ifp->if_capenable & IFCAP_LRO) rxq->iq.flags |= IQ_LRO_ENABLED; else rxq->iq.flags &= ~IQ_LRO_ENABLED; } #endif } #ifdef TCP_OFFLOAD if (mask & IFCAP_TOE) { int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE; rc = toe_capability(vi, enable); if (rc != 0) goto fail; ifp->if_capenable ^= mask; } #endif if (mask & IFCAP_VLAN_HWTAGGING) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = update_mac_settings(ifp, XGMAC_VLANEX); } if (mask & IFCAP_VLAN_MTU) { ifp->if_capenable ^= IFCAP_VLAN_MTU; /* Need to find out how to disable auto-mtu-inflation */ } if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (mask & IFCAP_VLAN_HWCSUM) ifp->if_capenable ^= IFCAP_VLAN_HWCSUM; #ifdef VLAN_CAPABILITIES VLAN_CAPABILITIES(ifp); #endif fail: end_synchronized_op(sc, 0); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: ifmedia_ioctl(ifp, ifr, &vi->media, cmd); break; case SIOCGI2C: { struct ifi2creq i2c; rc = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); if (rc != 0) break; if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) { rc = EPERM; break; } if (i2c.len > sizeof(i2c.data)) { rc = EINVAL; break; } rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4i2c"); if (rc) return (rc); rc = -t4_i2c_rd(sc, sc->mbox, vi->pi->port_id, i2c.dev_addr, i2c.offset, i2c.len, &i2c.data[0]); end_synchronized_op(sc, 0); if (rc == 0) rc = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); break; } default: rc = ether_ioctl(ifp, cmd, data); } return (rc); } static int cxgbe_transmit(struct ifnet *ifp, struct mbuf *m) { struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct sge_txq *txq; void *items[1]; int rc; M_ASSERTPKTHDR(m); MPASS(m->m_nextpkt == NULL); /* not quite ready for this yet */ if (__predict_false(pi->link_cfg.link_ok == 0)) { m_freem(m); return (ENETDOWN); } rc = parse_pkt(&m); if (__predict_false(rc != 0)) { MPASS(m == NULL); /* was freed already */ atomic_add_int(&pi->tx_parse_error, 1); /* rare, atomic is ok */ return (rc); } /* Select a txq. */ txq = &sc->sge.txq[vi->first_txq]; if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) txq += ((m->m_pkthdr.flowid % (vi->ntxq - vi->rsrv_noflowq)) + vi->rsrv_noflowq); items[0] = m; rc = mp_ring_enqueue(txq->r, items, 1, 4096); if (__predict_false(rc != 0)) m_freem(m); return (rc); } static void cxgbe_qflush(struct ifnet *ifp) { struct vi_info *vi = ifp->if_softc; struct sge_txq *txq; int i; /* queues do not exist if !VI_INIT_DONE. */ if (vi->flags & VI_INIT_DONE) { for_each_txq(vi, i, txq) { TXQ_LOCK(txq); txq->eq.flags &= ~EQ_ENABLED; TXQ_UNLOCK(txq); while (!mp_ring_is_idle(txq->r)) { mp_ring_check_drainage(txq->r, 0); pause("qflush", 1); } } } if_qflush(ifp); } static uint64_t vi_get_counter(struct ifnet *ifp, ift_counter c) { struct vi_info *vi = ifp->if_softc; struct fw_vi_stats_vf *s = &vi->stats; vi_refresh_stats(vi->pi->adapter, vi); switch (c) { case IFCOUNTER_IPACKETS: return (s->rx_bcast_frames + s->rx_mcast_frames + s->rx_ucast_frames); case IFCOUNTER_IERRORS: return (s->rx_err_frames); case IFCOUNTER_OPACKETS: return (s->tx_bcast_frames + s->tx_mcast_frames + s->tx_ucast_frames + s->tx_offload_frames); case IFCOUNTER_OERRORS: return (s->tx_drop_frames); case IFCOUNTER_IBYTES: return (s->rx_bcast_bytes + s->rx_mcast_bytes + s->rx_ucast_bytes); case IFCOUNTER_OBYTES: return (s->tx_bcast_bytes + s->tx_mcast_bytes + s->tx_ucast_bytes + s->tx_offload_bytes); case IFCOUNTER_IMCASTS: return (s->rx_mcast_frames); case IFCOUNTER_OMCASTS: return (s->tx_mcast_frames); case IFCOUNTER_OQDROPS: { uint64_t drops; drops = 0; if (vi->flags & VI_INIT_DONE) { int i; struct sge_txq *txq; for_each_txq(vi, i, txq) drops += counter_u64_fetch(txq->r->drops); } return (drops); } default: return (if_get_counter_default(ifp, c)); } } uint64_t cxgbe_get_counter(struct ifnet *ifp, ift_counter c) { struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct port_stats *s = &pi->stats; if (pi->nvi > 1) return (vi_get_counter(ifp, c)); cxgbe_refresh_stats(sc, pi); switch (c) { case IFCOUNTER_IPACKETS: return (s->rx_frames); case IFCOUNTER_IERRORS: return (s->rx_jabber + s->rx_runt + s->rx_too_long + s->rx_fcs_err + s->rx_len_err); case IFCOUNTER_OPACKETS: return (s->tx_frames); case IFCOUNTER_OERRORS: return (s->tx_error_frames); case IFCOUNTER_IBYTES: return (s->rx_octets); case IFCOUNTER_OBYTES: return (s->tx_octets); case IFCOUNTER_IMCASTS: return (s->rx_mcast_frames); case IFCOUNTER_OMCASTS: return (s->tx_mcast_frames); case IFCOUNTER_IQDROPS: return (s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 + s->rx_ovflow3 + s->rx_trunc0 + s->rx_trunc1 + s->rx_trunc2 + s->rx_trunc3 + pi->tnl_cong_drops); case IFCOUNTER_OQDROPS: { uint64_t drops; drops = s->tx_drop; if (vi->flags & VI_INIT_DONE) { int i; struct sge_txq *txq; for_each_txq(vi, i, txq) drops += counter_u64_fetch(txq->r->drops); } return (drops); } default: return (if_get_counter_default(ifp, c)); } } static int cxgbe_media_change(struct ifnet *ifp) { struct vi_info *vi = ifp->if_softc; device_printf(vi->dev, "%s unimplemented.\n", __func__); return (EOPNOTSUPP); } static void cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct ifmedia_entry *cur; int speed = pi->link_cfg.speed; cur = vi->media.ifm_cur; ifmr->ifm_status = IFM_AVALID; if (!pi->link_cfg.link_ok) return; ifmr->ifm_status |= IFM_ACTIVE; /* active and current will differ iff current media is autoselect. */ if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO) return; ifmr->ifm_active = IFM_ETHER | IFM_FDX; if (speed == 10000) ifmr->ifm_active |= IFM_10G_T; else if (speed == 1000) ifmr->ifm_active |= IFM_1000_T; else if (speed == 100) ifmr->ifm_active |= IFM_100_TX; else if (speed == 10) ifmr->ifm_active |= IFM_10_T; else KASSERT(0, ("%s: link up but speed unknown (%u)", __func__, speed)); } static int vcxgbe_probe(device_t dev) { char buf[128]; struct vi_info *vi = device_get_softc(dev); snprintf(buf, sizeof(buf), "port %d vi %td", vi->pi->port_id, vi - vi->pi->vi); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); } static int vcxgbe_attach(device_t dev) { struct vi_info *vi; struct port_info *pi; struct adapter *sc; int func, index, rc; u32 param, val; vi = device_get_softc(dev); pi = vi->pi; sc = pi->adapter; index = vi - pi->vi; KASSERT(index < nitems(vi_mac_funcs), ("%s: VI %s doesn't have a MAC func", __func__, device_get_nameunit(dev))); func = vi_mac_funcs[index]; rc = t4_alloc_vi_func(sc, sc->mbox, pi->tx_chan, sc->pf, 0, 1, vi->hw_addr, &vi->rss_size, func, 0); if (rc < 0) { device_printf(dev, "Failed to allocate virtual interface " "for port %d: %d\n", pi->port_id, -rc); return (-rc); } vi->viid = rc; param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_RSSINFO) | V_FW_PARAMS_PARAM_YZ(vi->viid); rc = t4_query_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); if (rc) vi->rss_base = 0xffff; else { /* MPASS((val >> 16) == rss_size); */ vi->rss_base = val & 0xffff; } rc = cxgbe_vi_attach(dev, vi); if (rc) { t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid); return (rc); } return (0); } static int vcxgbe_detach(device_t dev) { struct vi_info *vi; struct adapter *sc; vi = device_get_softc(dev); sc = vi->pi->adapter; doom_vi(sc, vi); cxgbe_vi_detach(vi); t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid); end_synchronized_op(sc, 0); return (0); } void t4_fatal_err(struct adapter *sc) { t4_set_reg_field(sc, A_SGE_CONTROL, F_GLOBALENABLE, 0); t4_intr_disable(sc); log(LOG_EMERG, "%s: encountered fatal error, adapter stopped.\n", device_get_nameunit(sc->dev)); } static int map_bars_0_and_4(struct adapter *sc) { sc->regs_rid = PCIR_BAR(0); sc->regs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->regs_rid, RF_ACTIVE); if (sc->regs_res == NULL) { device_printf(sc->dev, "cannot map registers.\n"); return (ENXIO); } sc->bt = rman_get_bustag(sc->regs_res); sc->bh = rman_get_bushandle(sc->regs_res); sc->mmio_len = rman_get_size(sc->regs_res); setbit(&sc->doorbells, DOORBELL_KDB); sc->msix_rid = PCIR_BAR(4); sc->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->msix_rid, RF_ACTIVE); if (sc->msix_res == NULL) { device_printf(sc->dev, "cannot map MSI-X BAR.\n"); return (ENXIO); } return (0); } static int map_bar_2(struct adapter *sc) { /* * T4: only iWARP driver uses the userspace doorbells. There is no need * to map it if RDMA is disabled. */ if (is_t4(sc) && sc->rdmacaps == 0) return (0); sc->udbs_rid = PCIR_BAR(2); sc->udbs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->udbs_rid, RF_ACTIVE); if (sc->udbs_res == NULL) { device_printf(sc->dev, "cannot map doorbell BAR.\n"); return (ENXIO); } sc->udbs_base = rman_get_virtual(sc->udbs_res); if (is_t5(sc)) { setbit(&sc->doorbells, DOORBELL_UDB); #if defined(__i386__) || defined(__amd64__) if (t5_write_combine) { int rc; /* * Enable write combining on BAR2. This is the * userspace doorbell BAR and is split into 128B * (UDBS_SEG_SIZE) doorbell regions, each associated * with an egress queue. The first 64B has the doorbell * and the second 64B can be used to submit a tx work * request with an implicit doorbell. */ rc = pmap_change_attr((vm_offset_t)sc->udbs_base, rman_get_size(sc->udbs_res), PAT_WRITE_COMBINING); if (rc == 0) { clrbit(&sc->doorbells, DOORBELL_UDB); setbit(&sc->doorbells, DOORBELL_WCWR); setbit(&sc->doorbells, DOORBELL_UDBWC); } else { device_printf(sc->dev, "couldn't enable write combining: %d\n", rc); } t4_write_reg(sc, A_SGE_STAT_CFG, V_STATSOURCE_T5(7) | V_STATMODE(0)); } #endif } return (0); } struct memwin_init { uint32_t base; uint32_t aperture; }; static const struct memwin_init t4_memwin[NUM_MEMWIN] = { { MEMWIN0_BASE, MEMWIN0_APERTURE }, { MEMWIN1_BASE, MEMWIN1_APERTURE }, { MEMWIN2_BASE_T4, MEMWIN2_APERTURE_T4 } }; static const struct memwin_init t5_memwin[NUM_MEMWIN] = { { MEMWIN0_BASE, MEMWIN0_APERTURE }, { MEMWIN1_BASE, MEMWIN1_APERTURE }, { MEMWIN2_BASE_T5, MEMWIN2_APERTURE_T5 }, }; static void setup_memwin(struct adapter *sc) { const struct memwin_init *mw_init; struct memwin *mw; int i; uint32_t bar0; if (is_t4(sc)) { /* * Read low 32b of bar0 indirectly via the hardware backdoor * mechanism. Works from within PCI passthrough environments * too, where rman_get_start() can return a different value. We * need to program the T4 memory window decoders with the actual * addresses that will be coming across the PCIe link. */ bar0 = t4_hw_pci_read_cfg4(sc, PCIR_BAR(0)); bar0 &= (uint32_t) PCIM_BAR_MEM_BASE; mw_init = &t4_memwin[0]; } else { /* T5+ use the relative offset inside the PCIe BAR */ bar0 = 0; mw_init = &t5_memwin[0]; } for (i = 0, mw = &sc->memwin[0]; i < NUM_MEMWIN; i++, mw_init++, mw++) { rw_init(&mw->mw_lock, "memory window access"); mw->mw_base = mw_init->base; mw->mw_aperture = mw_init->aperture; mw->mw_curpos = 0; t4_write_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, i), (mw->mw_base + bar0) | V_BIR(0) | V_WINDOW(ilog2(mw->mw_aperture) - 10)); rw_wlock(&mw->mw_lock); position_memwin(sc, i, 0); rw_wunlock(&mw->mw_lock); } /* flush */ t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 2)); } /* * Positions the memory window at the given address in the card's address space. * There are some alignment requirements and the actual position may be at an * address prior to the requested address. mw->mw_curpos always has the actual * position of the window. */ static void position_memwin(struct adapter *sc, int idx, uint32_t addr) { struct memwin *mw; uint32_t pf; uint32_t reg; MPASS(idx >= 0 && idx < NUM_MEMWIN); mw = &sc->memwin[idx]; rw_assert(&mw->mw_lock, RA_WLOCKED); if (is_t4(sc)) { pf = 0; mw->mw_curpos = addr & ~0xf; /* start must be 16B aligned */ } else { pf = V_PFNUM(sc->pf); mw->mw_curpos = addr & ~0x7f; /* start must be 128B aligned */ } reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, idx); t4_write_reg(sc, reg, mw->mw_curpos | pf); t4_read_reg(sc, reg); /* flush */ } static int rw_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val, int len, int rw) { struct memwin *mw; uint32_t mw_end, v; MPASS(idx >= 0 && idx < NUM_MEMWIN); /* Memory can only be accessed in naturally aligned 4 byte units */ if (addr & 3 || len & 3 || len <= 0) return (EINVAL); mw = &sc->memwin[idx]; while (len > 0) { rw_rlock(&mw->mw_lock); mw_end = mw->mw_curpos + mw->mw_aperture; if (addr >= mw_end || addr < mw->mw_curpos) { /* Will need to reposition the window */ if (!rw_try_upgrade(&mw->mw_lock)) { rw_runlock(&mw->mw_lock); rw_wlock(&mw->mw_lock); } rw_assert(&mw->mw_lock, RA_WLOCKED); position_memwin(sc, idx, addr); rw_downgrade(&mw->mw_lock); mw_end = mw->mw_curpos + mw->mw_aperture; } rw_assert(&mw->mw_lock, RA_RLOCKED); while (addr < mw_end && len > 0) { if (rw == 0) { v = t4_read_reg(sc, mw->mw_base + addr - mw->mw_curpos); *val++ = le32toh(v); } else { v = *val++; t4_write_reg(sc, mw->mw_base + addr - mw->mw_curpos, htole32(v)); } addr += 4; len -= 4; } rw_runlock(&mw->mw_lock); } return (0); } static inline int read_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val, int len) { return (rw_via_memwin(sc, idx, addr, val, len, 0)); } static inline int write_via_memwin(struct adapter *sc, int idx, uint32_t addr, const uint32_t *val, int len) { return (rw_via_memwin(sc, idx, addr, (void *)(uintptr_t)val, len, 1)); } static int t4_range_cmp(const void *a, const void *b) { return ((const struct t4_range *)a)->start - ((const struct t4_range *)b)->start; } /* * Verify that the memory range specified by the addr/len pair is valid within * the card's address space. */ static int validate_mem_range(struct adapter *sc, uint32_t addr, int len) { struct t4_range mem_ranges[4], *r, *next; uint32_t em, addr_len; int i, n, remaining; /* Memory can only be accessed in naturally aligned 4 byte units */ if (addr & 3 || len & 3 || len <= 0) return (EINVAL); /* Enabled memories */ em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); r = &mem_ranges[0]; n = 0; bzero(r, sizeof(mem_ranges)); if (em & F_EDRAM0_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR); r->size = G_EDRAM0_SIZE(addr_len) << 20; if (r->size > 0) { r->start = G_EDRAM0_BASE(addr_len) << 20; if (addr >= r->start && addr + len <= r->start + r->size) return (0); r++; n++; } } if (em & F_EDRAM1_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR); r->size = G_EDRAM1_SIZE(addr_len) << 20; if (r->size > 0) { r->start = G_EDRAM1_BASE(addr_len) << 20; if (addr >= r->start && addr + len <= r->start + r->size) return (0); r++; n++; } } if (em & F_EXT_MEM_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR); r->size = G_EXT_MEM_SIZE(addr_len) << 20; if (r->size > 0) { r->start = G_EXT_MEM_BASE(addr_len) << 20; if (addr >= r->start && addr + len <= r->start + r->size) return (0); r++; n++; } } if (is_t5(sc) && em & F_EXT_MEM1_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR); r->size = G_EXT_MEM1_SIZE(addr_len) << 20; if (r->size > 0) { r->start = G_EXT_MEM1_BASE(addr_len) << 20; if (addr >= r->start && addr + len <= r->start + r->size) return (0); r++; n++; } } MPASS(n <= nitems(mem_ranges)); if (n > 1) { /* Sort and merge the ranges. */ qsort(mem_ranges, n, sizeof(struct t4_range), t4_range_cmp); /* Start from index 0 and examine the next n - 1 entries. */ r = &mem_ranges[0]; for (remaining = n - 1; remaining > 0; remaining--, r++) { MPASS(r->size > 0); /* r is a valid entry. */ next = r + 1; MPASS(next->size > 0); /* and so is the next one. */ while (r->start + r->size >= next->start) { /* Merge the next one into the current entry. */ r->size = max(r->start + r->size, next->start + next->size) - r->start; n--; /* One fewer entry in total. */ if (--remaining == 0) goto done; /* short circuit */ next++; } if (next != r + 1) { /* * Some entries were merged into r and next * points to the first valid entry that couldn't * be merged. */ MPASS(next->size > 0); /* must be valid */ memcpy(r + 1, next, remaining * sizeof(*r)); #ifdef INVARIANTS /* * This so that the foo->size assertion in the * next iteration of the loop do the right * thing for entries that were pulled up and are * no longer valid. */ MPASS(n < nitems(mem_ranges)); bzero(&mem_ranges[n], (nitems(mem_ranges) - n) * sizeof(struct t4_range)); #endif } } done: /* Done merging the ranges. */ MPASS(n > 0); r = &mem_ranges[0]; for (i = 0; i < n; i++, r++) { if (addr >= r->start && addr + len <= r->start + r->size) return (0); } } return (EFAULT); } static int fwmtype_to_hwmtype(int mtype) { switch (mtype) { case FW_MEMTYPE_EDC0: return (MEM_EDC0); case FW_MEMTYPE_EDC1: return (MEM_EDC1); case FW_MEMTYPE_EXTMEM: return (MEM_MC0); case FW_MEMTYPE_EXTMEM1: return (MEM_MC1); default: panic("%s: cannot translate fw mtype %d.", __func__, mtype); } } /* * Verify that the memory range specified by the memtype/offset/len pair is * valid and lies entirely within the memtype specified. The global address of * the start of the range is returned in addr. */ static int validate_mt_off_len(struct adapter *sc, int mtype, uint32_t off, int len, uint32_t *addr) { uint32_t em, addr_len, maddr; /* Memory can only be accessed in naturally aligned 4 byte units */ if (off & 3 || len & 3 || len == 0) return (EINVAL); em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); switch (fwmtype_to_hwmtype(mtype)) { case MEM_EDC0: if (!(em & F_EDRAM0_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR); maddr = G_EDRAM0_BASE(addr_len) << 20; break; case MEM_EDC1: if (!(em & F_EDRAM1_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR); maddr = G_EDRAM1_BASE(addr_len) << 20; break; case MEM_MC: if (!(em & F_EXT_MEM_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR); maddr = G_EXT_MEM_BASE(addr_len) << 20; break; case MEM_MC1: if (!is_t5(sc) || !(em & F_EXT_MEM1_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR); maddr = G_EXT_MEM1_BASE(addr_len) << 20; break; default: return (EINVAL); } *addr = maddr + off; /* global address */ return (validate_mem_range(sc, *addr, len)); } static int fixup_devlog_params(struct adapter *sc) { struct devlog_params *dparams = &sc->params.devlog; int rc; rc = validate_mt_off_len(sc, dparams->memtype, dparams->start, dparams->size, &dparams->addr); return (rc); } static int cfg_itype_and_nqueues(struct adapter *sc, int n10g, int n1g, int num_vis, struct intrs_and_queues *iaq) { int rc, itype, navail, nrxq10g, nrxq1g, n; int nofldrxq10g = 0, nofldrxq1g = 0; bzero(iaq, sizeof(*iaq)); iaq->ntxq10g = t4_ntxq10g; iaq->ntxq1g = t4_ntxq1g; iaq->ntxq_vi = t4_ntxq_vi; iaq->nrxq10g = nrxq10g = t4_nrxq10g; iaq->nrxq1g = nrxq1g = t4_nrxq1g; iaq->nrxq_vi = t4_nrxq_vi; iaq->rsrv_noflowq = t4_rsrv_noflowq; #ifdef TCP_OFFLOAD if (is_offload(sc)) { iaq->nofldtxq10g = t4_nofldtxq10g; iaq->nofldtxq1g = t4_nofldtxq1g; iaq->nofldtxq_vi = t4_nofldtxq_vi; iaq->nofldrxq10g = nofldrxq10g = t4_nofldrxq10g; iaq->nofldrxq1g = nofldrxq1g = t4_nofldrxq1g; iaq->nofldrxq_vi = t4_nofldrxq_vi; } #endif #ifdef DEV_NETMAP iaq->nnmtxq_vi = t4_nnmtxq_vi; iaq->nnmrxq_vi = t4_nnmrxq_vi; #endif for (itype = INTR_MSIX; itype; itype >>= 1) { if ((itype & t4_intr_types) == 0) continue; /* not allowed */ if (itype == INTR_MSIX) navail = pci_msix_count(sc->dev); else if (itype == INTR_MSI) navail = pci_msi_count(sc->dev); else navail = 1; restart: if (navail == 0) continue; iaq->intr_type = itype; iaq->intr_flags_10g = 0; iaq->intr_flags_1g = 0; /* * Best option: an interrupt vector for errors, one for the * firmware event queue, and one for every rxq (NIC and TOE) of * every VI. The VIs that support netmap use the same * interrupts for the NIC rx queues and the netmap rx queues * because only one set of queues is active at a time. */ iaq->nirq = T4_EXTRA_INTR; iaq->nirq += n10g * (nrxq10g + nofldrxq10g); iaq->nirq += n1g * (nrxq1g + nofldrxq1g); iaq->nirq += (n10g + n1g) * (num_vis - 1) * max(iaq->nrxq_vi, iaq->nnmrxq_vi); /* See comment above. */ iaq->nirq += (n10g + n1g) * (num_vis - 1) * iaq->nofldrxq_vi; if (iaq->nirq <= navail && (itype != INTR_MSI || powerof2(iaq->nirq))) { iaq->intr_flags_10g = INTR_ALL; iaq->intr_flags_1g = INTR_ALL; goto allocate; } /* Disable the VIs (and netmap) if there aren't enough intrs */ if (num_vis > 1) { device_printf(sc->dev, "virtual interfaces disabled " "because num_vis=%u with current settings " "(nrxq10g=%u, nrxq1g=%u, nofldrxq10g=%u, " "nofldrxq1g=%u, nrxq_vi=%u nofldrxq_vi=%u, " "nnmrxq_vi=%u) would need %u interrupts but " "only %u are available.\n", num_vis, nrxq10g, nrxq1g, nofldrxq10g, nofldrxq1g, iaq->nrxq_vi, iaq->nofldrxq_vi, iaq->nnmrxq_vi, iaq->nirq, navail); num_vis = 1; iaq->ntxq_vi = iaq->nrxq_vi = 0; iaq->nofldtxq_vi = iaq->nofldrxq_vi = 0; iaq->nnmtxq_vi = iaq->nnmrxq_vi = 0; goto restart; } /* * Second best option: a vector for errors, one for the firmware * event queue, and vectors for either all the NIC rx queues or * all the TOE rx queues. The queues that don't get vectors * will forward their interrupts to those that do. */ iaq->nirq = T4_EXTRA_INTR; if (nrxq10g >= nofldrxq10g) { iaq->intr_flags_10g = INTR_RXQ; iaq->nirq += n10g * nrxq10g; } else { iaq->intr_flags_10g = INTR_OFLD_RXQ; iaq->nirq += n10g * nofldrxq10g; } if (nrxq1g >= nofldrxq1g) { iaq->intr_flags_1g = INTR_RXQ; iaq->nirq += n1g * nrxq1g; } else { iaq->intr_flags_1g = INTR_OFLD_RXQ; iaq->nirq += n1g * nofldrxq1g; } if (iaq->nirq <= navail && (itype != INTR_MSI || powerof2(iaq->nirq))) goto allocate; /* * Next best option: an interrupt vector for errors, one for the * firmware event queue, and at least one per main-VI. At this * point we know we'll have to downsize nrxq and/or nofldrxq to * fit what's available to us. */ iaq->nirq = T4_EXTRA_INTR; iaq->nirq += n10g + n1g; if (iaq->nirq <= navail) { int leftover = navail - iaq->nirq; if (n10g > 0) { int target = max(nrxq10g, nofldrxq10g); iaq->intr_flags_10g = nrxq10g >= nofldrxq10g ? INTR_RXQ : INTR_OFLD_RXQ; n = 1; while (n < target && leftover >= n10g) { leftover -= n10g; iaq->nirq += n10g; n++; } iaq->nrxq10g = min(n, nrxq10g); #ifdef TCP_OFFLOAD iaq->nofldrxq10g = min(n, nofldrxq10g); #endif } if (n1g > 0) { int target = max(nrxq1g, nofldrxq1g); iaq->intr_flags_1g = nrxq1g >= nofldrxq1g ? INTR_RXQ : INTR_OFLD_RXQ; n = 1; while (n < target && leftover >= n1g) { leftover -= n1g; iaq->nirq += n1g; n++; } iaq->nrxq1g = min(n, nrxq1g); #ifdef TCP_OFFLOAD iaq->nofldrxq1g = min(n, nofldrxq1g); #endif } if (itype != INTR_MSI || powerof2(iaq->nirq)) goto allocate; } /* * Least desirable option: one interrupt vector for everything. */ iaq->nirq = iaq->nrxq10g = iaq->nrxq1g = 1; iaq->intr_flags_10g = iaq->intr_flags_1g = 0; #ifdef TCP_OFFLOAD if (is_offload(sc)) iaq->nofldrxq10g = iaq->nofldrxq1g = 1; #endif allocate: navail = iaq->nirq; rc = 0; if (itype == INTR_MSIX) rc = pci_alloc_msix(sc->dev, &navail); else if (itype == INTR_MSI) rc = pci_alloc_msi(sc->dev, &navail); if (rc == 0) { if (navail == iaq->nirq) return (0); /* * Didn't get the number requested. Use whatever number * the kernel is willing to allocate (it's in navail). */ device_printf(sc->dev, "fewer vectors than requested, " "type=%d, req=%d, rcvd=%d; will downshift req.\n", itype, iaq->nirq, navail); pci_release_msi(sc->dev); goto restart; } device_printf(sc->dev, "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n", itype, rc, iaq->nirq, navail); } device_printf(sc->dev, "failed to find a usable interrupt type. " "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types, pci_msix_count(sc->dev), pci_msi_count(sc->dev)); return (ENXIO); } #define FW_VERSION(chip) ( \ V_FW_HDR_FW_VER_MAJOR(chip##FW_VERSION_MAJOR) | \ V_FW_HDR_FW_VER_MINOR(chip##FW_VERSION_MINOR) | \ V_FW_HDR_FW_VER_MICRO(chip##FW_VERSION_MICRO) | \ V_FW_HDR_FW_VER_BUILD(chip##FW_VERSION_BUILD)) #define FW_INTFVER(chip, intf) (chip##FW_HDR_INTFVER_##intf) struct fw_info { uint8_t chip; char *kld_name; char *fw_mod_name; struct fw_hdr fw_hdr; /* XXX: waste of space, need a sparse struct */ } fw_info[] = { { .chip = CHELSIO_T4, .kld_name = "t4fw_cfg", .fw_mod_name = "t4fw", .fw_hdr = { .chip = FW_HDR_CHIP_T4, .fw_ver = htobe32_const(FW_VERSION(T4)), .intfver_nic = FW_INTFVER(T4, NIC), .intfver_vnic = FW_INTFVER(T4, VNIC), .intfver_ofld = FW_INTFVER(T4, OFLD), .intfver_ri = FW_INTFVER(T4, RI), .intfver_iscsipdu = FW_INTFVER(T4, ISCSIPDU), .intfver_iscsi = FW_INTFVER(T4, ISCSI), .intfver_fcoepdu = FW_INTFVER(T4, FCOEPDU), .intfver_fcoe = FW_INTFVER(T4, FCOE), }, }, { .chip = CHELSIO_T5, .kld_name = "t5fw_cfg", .fw_mod_name = "t5fw", .fw_hdr = { .chip = FW_HDR_CHIP_T5, .fw_ver = htobe32_const(FW_VERSION(T5)), .intfver_nic = FW_INTFVER(T5, NIC), .intfver_vnic = FW_INTFVER(T5, VNIC), .intfver_ofld = FW_INTFVER(T5, OFLD), .intfver_ri = FW_INTFVER(T5, RI), .intfver_iscsipdu = FW_INTFVER(T5, ISCSIPDU), .intfver_iscsi = FW_INTFVER(T5, ISCSI), .intfver_fcoepdu = FW_INTFVER(T5, FCOEPDU), .intfver_fcoe = FW_INTFVER(T5, FCOE), }, } }; static struct fw_info * find_fw_info(int chip) { int i; for (i = 0; i < nitems(fw_info); i++) { if (fw_info[i].chip == chip) return (&fw_info[i]); } return (NULL); } /* * Is the given firmware API compatible with the one the driver was compiled * with? */ static int fw_compatible(const struct fw_hdr *hdr1, const struct fw_hdr *hdr2) { /* short circuit if it's the exact same firmware version */ if (hdr1->chip == hdr2->chip && hdr1->fw_ver == hdr2->fw_ver) return (1); /* * XXX: Is this too conservative? Perhaps I should limit this to the * features that are supported in the driver. */ #define SAME_INTF(x) (hdr1->intfver_##x == hdr2->intfver_##x) if (hdr1->chip == hdr2->chip && SAME_INTF(nic) && SAME_INTF(vnic) && SAME_INTF(ofld) && SAME_INTF(ri) && SAME_INTF(iscsipdu) && SAME_INTF(iscsi) && SAME_INTF(fcoepdu) && SAME_INTF(fcoe)) return (1); #undef SAME_INTF return (0); } /* * The firmware in the KLD is usable, but should it be installed? This routine * explains itself in detail if it indicates the KLD firmware should be * installed. */ static int should_install_kld_fw(struct adapter *sc, int card_fw_usable, int k, int c) { const char *reason; if (!card_fw_usable) { reason = "incompatible or unusable"; goto install; } if (k > c) { reason = "older than the version bundled with this driver"; goto install; } if (t4_fw_install == 2 && k != c) { reason = "different than the version bundled with this driver"; goto install; } return (0); install: if (t4_fw_install == 0) { device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, " "but the driver is prohibited from installing a different " "firmware on the card.\n", G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c), G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason); return (0); } device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, " "installing firmware %u.%u.%u.%u on card.\n", G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c), G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason, G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k), G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k)); return (1); } /* * Establish contact with the firmware and determine if we are the master driver * or not, and whether we are responsible for chip initialization. */ static int prep_firmware(struct adapter *sc) { const struct firmware *fw = NULL, *default_cfg; int rc, pf, card_fw_usable, kld_fw_usable, need_fw_reset = 1; enum dev_state state; struct fw_info *fw_info; struct fw_hdr *card_fw; /* fw on the card */ const struct fw_hdr *kld_fw; /* fw in the KLD */ const struct fw_hdr *drv_fw; /* fw header the driver was compiled against */ /* Contact firmware. */ rc = t4_fw_hello(sc, sc->mbox, sc->mbox, MASTER_MAY, &state); if (rc < 0 || state == DEV_STATE_ERR) { rc = -rc; device_printf(sc->dev, "failed to connect to the firmware: %d, %d.\n", rc, state); return (rc); } pf = rc; if (pf == sc->mbox) sc->flags |= MASTER_PF; else if (state == DEV_STATE_UNINIT) { /* * We didn't get to be the master so we definitely won't be * configuring the chip. It's a bug if someone else hasn't * configured it already. */ device_printf(sc->dev, "couldn't be master(%d), " "device not already initialized either(%d).\n", rc, state); return (EDOOFUS); } /* This is the firmware whose headers the driver was compiled against */ fw_info = find_fw_info(chip_id(sc)); if (fw_info == NULL) { device_printf(sc->dev, "unable to look up firmware information for chip %d.\n", chip_id(sc)); return (EINVAL); } drv_fw = &fw_info->fw_hdr; /* * The firmware KLD contains many modules. The KLD name is also the * name of the module that contains the default config file. */ default_cfg = firmware_get(fw_info->kld_name); /* Read the header of the firmware on the card */ card_fw = malloc(sizeof(*card_fw), M_CXGBE, M_ZERO | M_WAITOK); rc = -t4_read_flash(sc, FLASH_FW_START, sizeof (*card_fw) / sizeof (uint32_t), (uint32_t *)card_fw, 1); if (rc == 0) card_fw_usable = fw_compatible(drv_fw, (const void*)card_fw); else { device_printf(sc->dev, "Unable to read card's firmware header: %d\n", rc); card_fw_usable = 0; } /* This is the firmware in the KLD */ fw = firmware_get(fw_info->fw_mod_name); if (fw != NULL) { kld_fw = (const void *)fw->data; kld_fw_usable = fw_compatible(drv_fw, kld_fw); } else { kld_fw = NULL; kld_fw_usable = 0; } if (card_fw_usable && card_fw->fw_ver == drv_fw->fw_ver && (!kld_fw_usable || kld_fw->fw_ver == drv_fw->fw_ver)) { /* * Common case: the firmware on the card is an exact match and * the KLD is an exact match too, or the KLD is * absent/incompatible. Note that t4_fw_install = 2 is ignored * here -- use cxgbetool loadfw if you want to reinstall the * same firmware as the one on the card. */ } else if (kld_fw_usable && state == DEV_STATE_UNINIT && should_install_kld_fw(sc, card_fw_usable, be32toh(kld_fw->fw_ver), be32toh(card_fw->fw_ver))) { rc = -t4_fw_upgrade(sc, sc->mbox, fw->data, fw->datasize, 0); if (rc != 0) { device_printf(sc->dev, "failed to install firmware: %d\n", rc); goto done; } /* Installed successfully, update the cached header too. */ memcpy(card_fw, kld_fw, sizeof(*card_fw)); card_fw_usable = 1; need_fw_reset = 0; /* already reset as part of load_fw */ } if (!card_fw_usable) { uint32_t d, c, k; d = ntohl(drv_fw->fw_ver); c = ntohl(card_fw->fw_ver); k = kld_fw ? ntohl(kld_fw->fw_ver) : 0; device_printf(sc->dev, "Cannot find a usable firmware: " "fw_install %d, chip state %d, " "driver compiled with %d.%d.%d.%d, " "card has %d.%d.%d.%d, KLD has %d.%d.%d.%d\n", t4_fw_install, state, G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d), G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d), G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c), G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k), G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k)); rc = EINVAL; goto done; } /* We're using whatever's on the card and it's known to be good. */ sc->params.fw_vers = ntohl(card_fw->fw_ver); snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u", G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers), G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers), G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers), G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers)); t4_get_tp_version(sc, &sc->params.tp_vers); snprintf(sc->tp_version, sizeof(sc->tp_version), "%u.%u.%u.%u", G_FW_HDR_FW_VER_MAJOR(sc->params.tp_vers), G_FW_HDR_FW_VER_MINOR(sc->params.tp_vers), G_FW_HDR_FW_VER_MICRO(sc->params.tp_vers), G_FW_HDR_FW_VER_BUILD(sc->params.tp_vers)); if (t4_get_exprom_version(sc, &sc->params.exprom_vers) != 0) sc->params.exprom_vers = 0; else { snprintf(sc->exprom_version, sizeof(sc->exprom_version), "%u.%u.%u.%u", G_FW_HDR_FW_VER_MAJOR(sc->params.exprom_vers), G_FW_HDR_FW_VER_MINOR(sc->params.exprom_vers), G_FW_HDR_FW_VER_MICRO(sc->params.exprom_vers), G_FW_HDR_FW_VER_BUILD(sc->params.exprom_vers)); } /* Reset device */ if (need_fw_reset && (rc = -t4_fw_reset(sc, sc->mbox, F_PIORSTMODE | F_PIORST)) != 0) { device_printf(sc->dev, "firmware reset failed: %d.\n", rc); if (rc != ETIMEDOUT && rc != EIO) t4_fw_bye(sc, sc->mbox); goto done; } sc->flags |= FW_OK; rc = get_params__pre_init(sc); if (rc != 0) goto done; /* error message displayed already */ /* Partition adapter resources as specified in the config file. */ if (state == DEV_STATE_UNINIT) { KASSERT(sc->flags & MASTER_PF, ("%s: trying to change chip settings when not master.", __func__)); rc = partition_resources(sc, default_cfg, fw_info->kld_name); if (rc != 0) goto done; /* error message displayed already */ t4_tweak_chip_settings(sc); /* get basic stuff going */ rc = -t4_fw_initialize(sc, sc->mbox); if (rc != 0) { device_printf(sc->dev, "fw init failed: %d.\n", rc); goto done; } } else { snprintf(sc->cfg_file, sizeof(sc->cfg_file), "pf%d", pf); sc->cfcsum = 0; } done: free(card_fw, M_CXGBE); if (fw != NULL) firmware_put(fw, FIRMWARE_UNLOAD); if (default_cfg != NULL) firmware_put(default_cfg, FIRMWARE_UNLOAD); return (rc); } #define FW_PARAM_DEV(param) \ (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \ V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param)) #define FW_PARAM_PFVF(param) \ (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \ V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param)) /* * Partition chip resources for use between various PFs, VFs, etc. */ static int partition_resources(struct adapter *sc, const struct firmware *default_cfg, const char *name_prefix) { const struct firmware *cfg = NULL; int rc = 0; struct fw_caps_config_cmd caps; uint32_t mtype, moff, finicsum, cfcsum; /* * Figure out what configuration file to use. Pick the default config * file for the card if the user hasn't specified one explicitly. */ snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", t4_cfg_file); if (strncmp(t4_cfg_file, DEFAULT_CF, sizeof(t4_cfg_file)) == 0) { /* Card specific overrides go here. */ if (pci_get_device(sc->dev) == 0x440a) snprintf(sc->cfg_file, sizeof(sc->cfg_file), UWIRE_CF); if (is_fpga(sc)) snprintf(sc->cfg_file, sizeof(sc->cfg_file), FPGA_CF); } /* * We need to load another module if the profile is anything except * "default" or "flash". */ if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) != 0 && strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) { char s[32]; snprintf(s, sizeof(s), "%s_%s", name_prefix, sc->cfg_file); cfg = firmware_get(s); if (cfg == NULL) { if (default_cfg != NULL) { device_printf(sc->dev, "unable to load module \"%s\" for " "configuration profile \"%s\", will use " "the default config file instead.\n", s, sc->cfg_file); snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", DEFAULT_CF); } else { device_printf(sc->dev, "unable to load module \"%s\" for " "configuration profile \"%s\", will use " "the config file on the card's flash " "instead.\n", s, sc->cfg_file); snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", FLASH_CF); } } } if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) == 0 && default_cfg == NULL) { device_printf(sc->dev, "default config file not available, will use the config " "file on the card's flash instead.\n"); snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", FLASH_CF); } if (strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) { u_int cflen; const uint32_t *cfdata; uint32_t param, val, addr; KASSERT(cfg != NULL || default_cfg != NULL, ("%s: no config to upload", __func__)); /* * Ask the firmware where it wants us to upload the config file. */ param = FW_PARAM_DEV(CF); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); if (rc != 0) { /* No support for config file? Shouldn't happen. */ device_printf(sc->dev, "failed to query config file location: %d.\n", rc); goto done; } mtype = G_FW_PARAMS_PARAM_Y(val); moff = G_FW_PARAMS_PARAM_Z(val) << 16; /* * XXX: sheer laziness. We deliberately added 4 bytes of * useless stuffing/comments at the end of the config file so * it's ok to simply throw away the last remaining bytes when * the config file is not an exact multiple of 4. This also * helps with the validate_mt_off_len check. */ if (cfg != NULL) { cflen = cfg->datasize & ~3; cfdata = cfg->data; } else { cflen = default_cfg->datasize & ~3; cfdata = default_cfg->data; } if (cflen > FLASH_CFG_MAX_SIZE) { device_printf(sc->dev, "config file too long (%d, max allowed is %d). " "Will try to use the config on the card, if any.\n", cflen, FLASH_CFG_MAX_SIZE); goto use_config_on_flash; } rc = validate_mt_off_len(sc, mtype, moff, cflen, &addr); if (rc != 0) { device_printf(sc->dev, "%s: addr (%d/0x%x) or len %d is not valid: %d. " "Will try to use the config on the card, if any.\n", __func__, mtype, moff, cflen, rc); goto use_config_on_flash; } write_via_memwin(sc, 2, addr, cfdata, cflen); } else { use_config_on_flash: mtype = FW_MEMTYPE_FLASH; moff = t4_flash_cfg_addr(sc); } bzero(&caps, sizeof(caps)); caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ); caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID | V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) | V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(moff >> 16) | FW_LEN16(caps)); rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps); if (rc != 0) { device_printf(sc->dev, "failed to pre-process config file: %d " "(mtype %d, moff 0x%x).\n", rc, mtype, moff); goto done; } finicsum = be32toh(caps.finicsum); cfcsum = be32toh(caps.cfcsum); if (finicsum != cfcsum) { device_printf(sc->dev, "WARNING: config file checksum mismatch: %08x %08x\n", finicsum, cfcsum); } sc->cfcsum = cfcsum; #define LIMIT_CAPS(x) do { \ caps.x &= htobe16(t4_##x##_allowed); \ } while (0) /* * Let the firmware know what features will (not) be used so it can tune * things accordingly. */ LIMIT_CAPS(nbmcaps); LIMIT_CAPS(linkcaps); LIMIT_CAPS(switchcaps); LIMIT_CAPS(niccaps); LIMIT_CAPS(toecaps); LIMIT_CAPS(rdmacaps); LIMIT_CAPS(tlscaps); LIMIT_CAPS(iscsicaps); LIMIT_CAPS(fcoecaps); #undef LIMIT_CAPS caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE); caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps)); rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), NULL); if (rc != 0) { device_printf(sc->dev, "failed to process config file: %d.\n", rc); } done: if (cfg != NULL) firmware_put(cfg, FIRMWARE_UNLOAD); return (rc); } /* * Retrieve parameters that are needed (or nice to have) very early. */ static int get_params__pre_init(struct adapter *sc) { int rc; uint32_t param[2], val[2]; param[0] = FW_PARAM_DEV(PORTVEC); param[1] = FW_PARAM_DEV(CCLK); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query parameters (pre_init): %d.\n", rc); return (rc); } sc->params.portvec = val[0]; sc->params.nports = bitcount32(val[0]); sc->params.vpd.cclk = val[1]; /* Read device log parameters. */ rc = -t4_init_devlog_params(sc, 1); if (rc == 0) fixup_devlog_params(sc); else { device_printf(sc->dev, "failed to get devlog parameters: %d.\n", rc); rc = 0; /* devlog isn't critical for device operation */ } return (rc); } /* * Retrieve various parameters that are of interest to the driver. The device * has been initialized by the firmware at this point. */ static int get_params__post_init(struct adapter *sc) { int rc; uint32_t param[7], val[7]; struct fw_caps_config_cmd caps; param[0] = FW_PARAM_PFVF(IQFLINT_START); param[1] = FW_PARAM_PFVF(EQ_START); param[2] = FW_PARAM_PFVF(FILTER_START); param[3] = FW_PARAM_PFVF(FILTER_END); param[4] = FW_PARAM_PFVF(L2T_START); param[5] = FW_PARAM_PFVF(L2T_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query parameters (post_init): %d.\n", rc); return (rc); } sc->sge.iq_start = val[0]; sc->sge.eq_start = val[1]; sc->tids.ftid_base = val[2]; sc->tids.nftids = val[3] - val[2] + 1; sc->params.ftid_min = val[2]; sc->params.ftid_max = val[3]; sc->vres.l2t.start = val[4]; sc->vres.l2t.size = val[5] - val[4] + 1; KASSERT(sc->vres.l2t.size <= L2T_SIZE, ("%s: L2 table size (%u) larger than expected (%u)", __func__, sc->vres.l2t.size, L2T_SIZE)); /* get capabilites */ bzero(&caps, sizeof(caps)); caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ); caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps)); rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps); if (rc != 0) { device_printf(sc->dev, "failed to get card capabilities: %d.\n", rc); return (rc); } #define READ_CAPS(x) do { \ sc->x = htobe16(caps.x); \ } while (0) READ_CAPS(nbmcaps); READ_CAPS(linkcaps); READ_CAPS(switchcaps); READ_CAPS(niccaps); READ_CAPS(toecaps); READ_CAPS(rdmacaps); READ_CAPS(tlscaps); READ_CAPS(iscsicaps); READ_CAPS(fcoecaps); if (sc->niccaps & FW_CAPS_CONFIG_NIC_ETHOFLD) { param[0] = FW_PARAM_PFVF(ETHOFLD_START); param[1] = FW_PARAM_PFVF(ETHOFLD_END); param[2] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 3, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query NIC parameters: %d.\n", rc); return (rc); } sc->tids.etid_base = val[0]; sc->params.etid_min = val[0]; sc->tids.netids = val[1] - val[0] + 1; sc->params.netids = sc->tids.netids; sc->params.eo_wr_cred = val[2]; sc->params.ethoffload = 1; } if (sc->toecaps) { /* query offload-related parameters */ param[0] = FW_PARAM_DEV(NTID); param[1] = FW_PARAM_PFVF(SERVER_START); param[2] = FW_PARAM_PFVF(SERVER_END); param[3] = FW_PARAM_PFVF(TDDP_START); param[4] = FW_PARAM_PFVF(TDDP_END); param[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query TOE parameters: %d.\n", rc); return (rc); } sc->tids.ntids = val[0]; sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS); sc->tids.stid_base = val[1]; sc->tids.nstids = val[2] - val[1] + 1; sc->vres.ddp.start = val[3]; sc->vres.ddp.size = val[4] - val[3] + 1; sc->params.ofldq_wr_cred = val[5]; sc->params.offload = 1; } if (sc->rdmacaps) { param[0] = FW_PARAM_PFVF(STAG_START); param[1] = FW_PARAM_PFVF(STAG_END); param[2] = FW_PARAM_PFVF(RQ_START); param[3] = FW_PARAM_PFVF(RQ_END); param[4] = FW_PARAM_PFVF(PBL_START); param[5] = FW_PARAM_PFVF(PBL_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query RDMA parameters(1): %d.\n", rc); return (rc); } sc->vres.stag.start = val[0]; sc->vres.stag.size = val[1] - val[0] + 1; sc->vres.rq.start = val[2]; sc->vres.rq.size = val[3] - val[2] + 1; sc->vres.pbl.start = val[4]; sc->vres.pbl.size = val[5] - val[4] + 1; param[0] = FW_PARAM_PFVF(SQRQ_START); param[1] = FW_PARAM_PFVF(SQRQ_END); param[2] = FW_PARAM_PFVF(CQ_START); param[3] = FW_PARAM_PFVF(CQ_END); param[4] = FW_PARAM_PFVF(OCQ_START); param[5] = FW_PARAM_PFVF(OCQ_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query RDMA parameters(2): %d.\n", rc); return (rc); } sc->vres.qp.start = val[0]; sc->vres.qp.size = val[1] - val[0] + 1; sc->vres.cq.start = val[2]; sc->vres.cq.size = val[3] - val[2] + 1; sc->vres.ocq.start = val[4]; sc->vres.ocq.size = val[5] - val[4] + 1; } if (sc->iscsicaps) { param[0] = FW_PARAM_PFVF(ISCSI_START); param[1] = FW_PARAM_PFVF(ISCSI_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query iSCSI parameters: %d.\n", rc); return (rc); } sc->vres.iscsi.start = val[0]; sc->vres.iscsi.size = val[1] - val[0] + 1; } /* * We've got the params we wanted to query via the firmware. Now grab * some others directly from the chip. */ rc = t4_read_chip_settings(sc); return (rc); } static int set_params__post_init(struct adapter *sc) { uint32_t param, val; /* ask for encapsulated CPLs */ param = FW_PARAM_PFVF(CPLFW4MSG_ENCAP); val = 1; (void)t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); return (0); } #undef FW_PARAM_PFVF #undef FW_PARAM_DEV static void t4_set_desc(struct adapter *sc) { char buf[128]; struct adapter_params *p = &sc->params; snprintf(buf, sizeof(buf), "Chelsio %s %sNIC (rev %d), S/N:%s, " "P/N:%s, E/C:%s", p->vpd.id, is_offload(sc) ? "R" : "", chip_rev(sc), p->vpd.sn, p->vpd.pn, p->vpd.ec); device_set_desc_copy(sc->dev, buf); } static void build_medialist(struct port_info *pi, struct ifmedia *media) { int m; PORT_LOCK(pi); ifmedia_removeall(media); m = IFM_ETHER | IFM_FDX; switch(pi->port_type) { case FW_PORT_TYPE_BT_XFI: case FW_PORT_TYPE_BT_XAUI: ifmedia_add(media, m | IFM_10G_T, 0, NULL); /* fall through */ case FW_PORT_TYPE_BT_SGMII: ifmedia_add(media, m | IFM_1000_T, 0, NULL); ifmedia_add(media, m | IFM_100_TX, 0, NULL); ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(media, IFM_ETHER | IFM_AUTO); break; case FW_PORT_TYPE_CX4: ifmedia_add(media, m | IFM_10G_CX4, 0, NULL); ifmedia_set(media, m | IFM_10G_CX4); break; case FW_PORT_TYPE_QSFP_10G: case FW_PORT_TYPE_SFP: case FW_PORT_TYPE_FIBER_XFI: case FW_PORT_TYPE_FIBER_XAUI: switch (pi->mod_type) { case FW_PORT_MOD_TYPE_LR: ifmedia_add(media, m | IFM_10G_LR, 0, NULL); ifmedia_set(media, m | IFM_10G_LR); break; case FW_PORT_MOD_TYPE_SR: ifmedia_add(media, m | IFM_10G_SR, 0, NULL); ifmedia_set(media, m | IFM_10G_SR); break; case FW_PORT_MOD_TYPE_LRM: ifmedia_add(media, m | IFM_10G_LRM, 0, NULL); ifmedia_set(media, m | IFM_10G_LRM); break; case FW_PORT_MOD_TYPE_TWINAX_PASSIVE: case FW_PORT_MOD_TYPE_TWINAX_ACTIVE: ifmedia_add(media, m | IFM_10G_TWINAX, 0, NULL); ifmedia_set(media, m | IFM_10G_TWINAX); break; case FW_PORT_MOD_TYPE_NONE: m &= ~IFM_FDX; ifmedia_add(media, m | IFM_NONE, 0, NULL); ifmedia_set(media, m | IFM_NONE); break; case FW_PORT_MOD_TYPE_NA: case FW_PORT_MOD_TYPE_ER: default: device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } break; case FW_PORT_TYPE_QSFP: switch (pi->mod_type) { case FW_PORT_MOD_TYPE_LR: ifmedia_add(media, m | IFM_40G_LR4, 0, NULL); ifmedia_set(media, m | IFM_40G_LR4); break; case FW_PORT_MOD_TYPE_SR: ifmedia_add(media, m | IFM_40G_SR4, 0, NULL); ifmedia_set(media, m | IFM_40G_SR4); break; case FW_PORT_MOD_TYPE_TWINAX_PASSIVE: case FW_PORT_MOD_TYPE_TWINAX_ACTIVE: ifmedia_add(media, m | IFM_40G_CR4, 0, NULL); ifmedia_set(media, m | IFM_40G_CR4); break; case FW_PORT_MOD_TYPE_NONE: m &= ~IFM_FDX; ifmedia_add(media, m | IFM_NONE, 0, NULL); ifmedia_set(media, m | IFM_NONE); break; default: device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } break; default: device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } PORT_UNLOCK(pi); } #define FW_MAC_EXACT_CHUNK 7 /* * Program the port's XGMAC based on parameters in ifnet. The caller also * indicates which parameters should be programmed (the rest are left alone). */ int update_mac_settings(struct ifnet *ifp, int flags) { int rc = 0; struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1; ASSERT_SYNCHRONIZED_OP(sc); KASSERT(flags, ("%s: not told what to update.", __func__)); if (flags & XGMAC_MTU) mtu = ifp->if_mtu; if (flags & XGMAC_PROMISC) promisc = ifp->if_flags & IFF_PROMISC ? 1 : 0; if (flags & XGMAC_ALLMULTI) allmulti = ifp->if_flags & IFF_ALLMULTI ? 1 : 0; if (flags & XGMAC_VLANEX) vlanex = ifp->if_capenable & IFCAP_VLAN_HWTAGGING ? 1 : 0; if (flags & (XGMAC_MTU|XGMAC_PROMISC|XGMAC_ALLMULTI|XGMAC_VLANEX)) { rc = -t4_set_rxmode(sc, sc->mbox, vi->viid, mtu, promisc, allmulti, 1, vlanex, false); if (rc) { if_printf(ifp, "set_rxmode (%x) failed: %d\n", flags, rc); return (rc); } } if (flags & XGMAC_UCADDR) { uint8_t ucaddr[ETHER_ADDR_LEN]; bcopy(IF_LLADDR(ifp), ucaddr, sizeof(ucaddr)); rc = t4_change_mac(sc, sc->mbox, vi->viid, vi->xact_addr_filt, ucaddr, true, true); if (rc < 0) { rc = -rc; if_printf(ifp, "change_mac failed: %d\n", rc); return (rc); } else { vi->xact_addr_filt = rc; rc = 0; } } if (flags & XGMAC_MCADDRS) { const uint8_t *mcaddr[FW_MAC_EXACT_CHUNK]; int del = 1; uint64_t hash = 0; struct ifmultiaddr *ifma; int i = 0, j; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; mcaddr[i] = LLADDR((struct sockaddr_dl *)ifma->ifma_addr); MPASS(ETHER_IS_MULTICAST(mcaddr[i])); i++; if (i == FW_MAC_EXACT_CHUNK) { rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, del, i, mcaddr, NULL, &hash, 0); if (rc < 0) { rc = -rc; for (j = 0; j < i; j++) { if_printf(ifp, "failed to add mc address" " %02x:%02x:%02x:" "%02x:%02x:%02x rc=%d\n", mcaddr[j][0], mcaddr[j][1], mcaddr[j][2], mcaddr[j][3], mcaddr[j][4], mcaddr[j][5], rc); } goto mcfail; } del = 0; i = 0; } } if (i > 0) { rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, del, i, mcaddr, NULL, &hash, 0); if (rc < 0) { rc = -rc; for (j = 0; j < i; j++) { if_printf(ifp, "failed to add mc address" " %02x:%02x:%02x:" "%02x:%02x:%02x rc=%d\n", mcaddr[j][0], mcaddr[j][1], mcaddr[j][2], mcaddr[j][3], mcaddr[j][4], mcaddr[j][5], rc); } goto mcfail; } } rc = -t4_set_addr_hash(sc, sc->mbox, vi->viid, 0, hash, 0); if (rc != 0) if_printf(ifp, "failed to set mc address hash: %d", rc); mcfail: if_maddr_runlock(ifp); } return (rc); } /* * {begin|end}_synchronized_op must be called from the same thread. */ int begin_synchronized_op(struct adapter *sc, struct vi_info *vi, int flags, char *wmesg) { int rc, pri; #ifdef WITNESS /* the caller thinks it's ok to sleep, but is it really? */ if (flags & SLEEP_OK) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "begin_synchronized_op"); #endif if (INTR_OK) pri = PCATCH; else pri = 0; ADAPTER_LOCK(sc); for (;;) { if (vi && IS_DOOMED(vi)) { rc = ENXIO; goto done; } if (!IS_BUSY(sc)) { rc = 0; break; } if (!(flags & SLEEP_OK)) { rc = EBUSY; goto done; } if (mtx_sleep(&sc->flags, &sc->sc_lock, pri, wmesg, 0)) { rc = EINTR; goto done; } } KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__)); SET_BUSY(sc); #ifdef INVARIANTS sc->last_op = wmesg; sc->last_op_thr = curthread; sc->last_op_flags = flags; #endif done: if (!(flags & HOLD_LOCK) || rc) ADAPTER_UNLOCK(sc); return (rc); } /* * Tell if_ioctl and if_init that the VI is going away. This is * special variant of begin_synchronized_op and must be paired with a * call to end_synchronized_op. */ void doom_vi(struct adapter *sc, struct vi_info *vi) { ADAPTER_LOCK(sc); SET_DOOMED(vi); wakeup(&sc->flags); while (IS_BUSY(sc)) mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0); SET_BUSY(sc); #ifdef INVARIANTS sc->last_op = "t4detach"; sc->last_op_thr = curthread; sc->last_op_flags = 0; #endif ADAPTER_UNLOCK(sc); } /* * {begin|end}_synchronized_op must be called from the same thread. */ void end_synchronized_op(struct adapter *sc, int flags) { if (flags & LOCK_HELD) ADAPTER_LOCK_ASSERT_OWNED(sc); else ADAPTER_LOCK(sc); KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__)); CLR_BUSY(sc); wakeup(&sc->flags); ADAPTER_UNLOCK(sc); } static int cxgbe_init_synchronized(struct vi_info *vi) { struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct ifnet *ifp = vi->ifp; int rc = 0, i; struct sge_txq *txq; ASSERT_SYNCHRONIZED_OP(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) return (0); /* already running */ if (!(sc->flags & FULL_INIT_DONE) && ((rc = adapter_full_init(sc)) != 0)) return (rc); /* error message displayed already */ if (!(vi->flags & VI_INIT_DONE) && ((rc = vi_full_init(vi)) != 0)) return (rc); /* error message displayed already */ rc = update_mac_settings(ifp, XGMAC_ALL); if (rc) goto done; /* error message displayed already */ rc = -t4_enable_vi(sc, sc->mbox, vi->viid, true, true); if (rc != 0) { if_printf(ifp, "enable_vi failed: %d\n", rc); goto done; } /* * Can't fail from this point onwards. Review cxgbe_uninit_synchronized * if this changes. */ for_each_txq(vi, i, txq) { TXQ_LOCK(txq); txq->eq.flags |= EQ_ENABLED; TXQ_UNLOCK(txq); } /* * The first iq of the first port to come up is used for tracing. */ if (sc->traceq < 0 && IS_MAIN_VI(vi)) { sc->traceq = sc->sge.rxq[vi->first_rxq].iq.abs_id; t4_write_reg(sc, is_t4(sc) ? A_MPS_TRC_RSS_CONTROL : A_MPS_T5_TRC_RSS_CONTROL, V_RSSCONTROL(pi->tx_chan) | V_QUEUENUMBER(sc->traceq)); pi->flags |= HAS_TRACEQ; } /* all ok */ PORT_LOCK(pi); ifp->if_drv_flags |= IFF_DRV_RUNNING; pi->up_vis++; if (pi->nvi > 1) callout_reset(&vi->tick, hz, vi_tick, vi); else callout_reset(&pi->tick, hz, cxgbe_tick, pi); PORT_UNLOCK(pi); done: if (rc != 0) cxgbe_uninit_synchronized(vi); return (rc); } /* * Idempotent. */ static int cxgbe_uninit_synchronized(struct vi_info *vi) { struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct ifnet *ifp = vi->ifp; int rc, i; struct sge_txq *txq; ASSERT_SYNCHRONIZED_OP(sc); if (!(vi->flags & VI_INIT_DONE)) { KASSERT(!(ifp->if_drv_flags & IFF_DRV_RUNNING), ("uninited VI is running")); return (0); } /* * Disable the VI so that all its data in either direction is discarded * by the MPS. Leave everything else (the queues, interrupts, and 1Hz * tick) intact as the TP can deliver negative advice or data that it's * holding in its RAM (for an offloaded connection) even after the VI is * disabled. */ rc = -t4_enable_vi(sc, sc->mbox, vi->viid, false, false); if (rc) { if_printf(ifp, "disable_vi failed: %d\n", rc); return (rc); } for_each_txq(vi, i, txq) { TXQ_LOCK(txq); txq->eq.flags &= ~EQ_ENABLED; TXQ_UNLOCK(txq); } PORT_LOCK(pi); if (pi->nvi == 1) callout_stop(&pi->tick); else callout_stop(&vi->tick); if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { PORT_UNLOCK(pi); return (0); } ifp->if_drv_flags &= ~IFF_DRV_RUNNING; pi->up_vis--; if (pi->up_vis > 0) { PORT_UNLOCK(pi); return (0); } PORT_UNLOCK(pi); pi->link_cfg.link_ok = 0; pi->link_cfg.speed = 0; pi->linkdnrc = -1; t4_os_link_changed(sc, pi->port_id, 0, -1); return (0); } /* * It is ok for this function to fail midway and return right away. t4_detach * will walk the entire sc->irq list and clean up whatever is valid. */ static int setup_intr_handlers(struct adapter *sc) { int rc, rid, p, q, v; char s[8]; struct irq *irq; struct port_info *pi; struct vi_info *vi; struct sge *sge = &sc->sge; struct sge_rxq *rxq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; #endif #ifdef DEV_NETMAP struct sge_nm_rxq *nm_rxq; #endif #ifdef RSS int nbuckets = rss_getnumbuckets(); #endif /* * Setup interrupts. */ irq = &sc->irq[0]; rid = sc->intr_type == INTR_INTX ? 0 : 1; if (sc->intr_count == 1) return (t4_alloc_irq(sc, irq, rid, t4_intr_all, sc, "all")); /* Multiple interrupts. */ KASSERT(sc->intr_count >= T4_EXTRA_INTR + sc->params.nports, ("%s: too few intr.", __func__)); /* The first one is always error intr */ rc = t4_alloc_irq(sc, irq, rid, t4_intr_err, sc, "err"); if (rc != 0) return (rc); irq++; rid++; /* The second one is always the firmware event queue */ rc = t4_alloc_irq(sc, irq, rid, t4_intr_evt, &sge->fwq, "evt"); if (rc != 0) return (rc); irq++; rid++; for_each_port(sc, p) { pi = sc->port[p]; for_each_vi(pi, v, vi) { vi->first_intr = rid - 1; if (vi->nnmrxq > 0) { int n = max(vi->nrxq, vi->nnmrxq); MPASS(vi->flags & INTR_RXQ); rxq = &sge->rxq[vi->first_rxq]; #ifdef DEV_NETMAP nm_rxq = &sge->nm_rxq[vi->first_nm_rxq]; #endif for (q = 0; q < n; q++) { snprintf(s, sizeof(s), "%x%c%x", p, 'a' + v, q); if (q < vi->nrxq) irq->rxq = rxq++; #ifdef DEV_NETMAP if (q < vi->nnmrxq) irq->nm_rxq = nm_rxq++; #endif rc = t4_alloc_irq(sc, irq, rid, t4_vi_intr, irq, s); if (rc != 0) return (rc); irq++; rid++; vi->nintr++; } } else if (vi->flags & INTR_RXQ) { for_each_rxq(vi, q, rxq) { snprintf(s, sizeof(s), "%x%c%x", p, 'a' + v, q); rc = t4_alloc_irq(sc, irq, rid, t4_intr, rxq, s); if (rc != 0) return (rc); #ifdef RSS bus_bind_intr(sc->dev, irq->res, rss_getcpu(q % nbuckets)); #endif irq++; rid++; vi->nintr++; } } #ifdef TCP_OFFLOAD if (vi->flags & INTR_OFLD_RXQ) { for_each_ofld_rxq(vi, q, ofld_rxq) { snprintf(s, sizeof(s), "%x%c%x", p, 'A' + v, q); rc = t4_alloc_irq(sc, irq, rid, t4_intr, ofld_rxq, s); if (rc != 0) return (rc); irq++; rid++; vi->nintr++; } } #endif } } MPASS(irq == &sc->irq[sc->intr_count]); return (0); } int adapter_full_init(struct adapter *sc) { int rc, i; ASSERT_SYNCHRONIZED_OP(sc); ADAPTER_LOCK_ASSERT_NOTOWNED(sc); KASSERT((sc->flags & FULL_INIT_DONE) == 0, ("%s: FULL_INIT_DONE already", __func__)); /* * queues that belong to the adapter (not any particular port). */ rc = t4_setup_adapter_queues(sc); if (rc != 0) goto done; for (i = 0; i < nitems(sc->tq); i++) { sc->tq[i] = taskqueue_create("t4 taskq", M_NOWAIT, taskqueue_thread_enqueue, &sc->tq[i]); if (sc->tq[i] == NULL) { device_printf(sc->dev, "failed to allocate task queue %d\n", i); rc = ENOMEM; goto done; } taskqueue_start_threads(&sc->tq[i], 1, PI_NET, "%s tq%d", device_get_nameunit(sc->dev), i); } t4_intr_enable(sc); sc->flags |= FULL_INIT_DONE; done: if (rc != 0) adapter_full_uninit(sc); return (rc); } int adapter_full_uninit(struct adapter *sc) { int i; ADAPTER_LOCK_ASSERT_NOTOWNED(sc); t4_teardown_adapter_queues(sc); for (i = 0; i < nitems(sc->tq) && sc->tq[i]; i++) { taskqueue_free(sc->tq[i]); sc->tq[i] = NULL; } sc->flags &= ~FULL_INIT_DONE; return (0); } #ifdef RSS #define SUPPORTED_RSS_HASHTYPES (RSS_HASHTYPE_RSS_IPV4 | \ RSS_HASHTYPE_RSS_TCP_IPV4 | RSS_HASHTYPE_RSS_IPV6 | \ RSS_HASHTYPE_RSS_TCP_IPV6 | RSS_HASHTYPE_RSS_UDP_IPV4 | \ RSS_HASHTYPE_RSS_UDP_IPV6) /* Translates kernel hash types to hardware. */ static int hashconfig_to_hashen(int hashconfig) { int hashen = 0; if (hashconfig & RSS_HASHTYPE_RSS_IPV4) hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN; if (hashconfig & RSS_HASHTYPE_RSS_IPV6) hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN; if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV4) { hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN | F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN; } if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV6) { hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN | F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN; } if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV4) hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN; if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV6) hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN; return (hashen); } /* Translates hardware hash types to kernel. */ static int hashen_to_hashconfig(int hashen) { int hashconfig = 0; if (hashen & F_FW_RSS_VI_CONFIG_CMD_UDPEN) { /* * If UDP hashing was enabled it must have been enabled for * either IPv4 or IPv6 (inclusive or). Enabling UDP without * enabling any 4-tuple hash is nonsense configuration. */ MPASS(hashen & (F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN | F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)); if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN) hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV4; if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN) hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV6; } if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN) hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV4; if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN) hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV6; if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN) hashconfig |= RSS_HASHTYPE_RSS_IPV4; if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN) hashconfig |= RSS_HASHTYPE_RSS_IPV6; return (hashconfig); } #endif int vi_full_init(struct vi_info *vi) { struct adapter *sc = vi->pi->adapter; struct ifnet *ifp = vi->ifp; uint16_t *rss; struct sge_rxq *rxq; int rc, i, j, hashen; #ifdef RSS int nbuckets = rss_getnumbuckets(); int hashconfig = rss_gethashconfig(); int extra; uint32_t raw_rss_key[RSS_KEYSIZE / sizeof(uint32_t)]; uint32_t rss_key[RSS_KEYSIZE / sizeof(uint32_t)]; #endif ASSERT_SYNCHRONIZED_OP(sc); KASSERT((vi->flags & VI_INIT_DONE) == 0, ("%s: VI_INIT_DONE already", __func__)); sysctl_ctx_init(&vi->ctx); vi->flags |= VI_SYSCTL_CTX; /* * Allocate tx/rx/fl queues for this VI. */ rc = t4_setup_vi_queues(vi); if (rc != 0) goto done; /* error message displayed already */ /* * Setup RSS for this VI. Save a copy of the RSS table for later use. */ if (vi->nrxq > vi->rss_size) { if_printf(ifp, "nrxq (%d) > hw RSS table size (%d); " "some queues will never receive traffic.\n", vi->nrxq, vi->rss_size); } else if (vi->rss_size % vi->nrxq) { if_printf(ifp, "nrxq (%d), hw RSS table size (%d); " "expect uneven traffic distribution.\n", vi->nrxq, vi->rss_size); } #ifdef RSS MPASS(RSS_KEYSIZE == 40); if (vi->nrxq != nbuckets) { if_printf(ifp, "nrxq (%d) != kernel RSS buckets (%d);" "performance will be impacted.\n", vi->nrxq, nbuckets); } rss_getkey((void *)&raw_rss_key[0]); for (i = 0; i < nitems(rss_key); i++) { rss_key[i] = htobe32(raw_rss_key[nitems(rss_key) - 1 - i]); } t4_write_rss_key(sc, &rss_key[0], -1); #endif rss = malloc(vi->rss_size * sizeof (*rss), M_CXGBE, M_ZERO | M_WAITOK); for (i = 0; i < vi->rss_size;) { #ifdef RSS j = rss_get_indirection_to_bucket(i); j %= vi->nrxq; rxq = &sc->sge.rxq[vi->first_rxq + j]; rss[i++] = rxq->iq.abs_id; #else for_each_rxq(vi, j, rxq) { rss[i++] = rxq->iq.abs_id; if (i == vi->rss_size) break; } #endif } rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, rss, vi->rss_size); if (rc != 0) { if_printf(ifp, "rss_config failed: %d\n", rc); goto done; } #ifdef RSS hashen = hashconfig_to_hashen(hashconfig); /* * We may have had to enable some hashes even though the global config * wants them disabled. This is a potential problem that must be * reported to the user. */ extra = hashen_to_hashconfig(hashen) ^ hashconfig; /* * If we consider only the supported hash types, then the enabled hashes * are a superset of the requested hashes. In other words, there cannot * be any supported hash that was requested but not enabled, but there * can be hashes that were not requested but had to be enabled. */ extra &= SUPPORTED_RSS_HASHTYPES; MPASS((extra & hashconfig) == 0); if (extra) { if_printf(ifp, "global RSS config (0x%x) cannot be accommodated.\n", hashconfig); } if (extra & RSS_HASHTYPE_RSS_IPV4) if_printf(ifp, "IPv4 2-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_TCP_IPV4) if_printf(ifp, "TCP/IPv4 4-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_IPV6) if_printf(ifp, "IPv6 2-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_TCP_IPV6) if_printf(ifp, "TCP/IPv6 4-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_UDP_IPV4) if_printf(ifp, "UDP/IPv4 4-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_UDP_IPV6) if_printf(ifp, "UDP/IPv6 4-tuple hashing forced on.\n"); #else hashen = F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN | F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN | F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_UDPEN; #endif rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, hashen, rss[0]); if (rc != 0) { if_printf(ifp, "rss hash/defaultq config failed: %d\n", rc); goto done; } vi->rss = rss; vi->flags |= VI_INIT_DONE; done: if (rc != 0) vi_full_uninit(vi); return (rc); } /* * Idempotent. */ int vi_full_uninit(struct vi_info *vi) { struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; int i; struct sge_rxq *rxq; struct sge_txq *txq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; struct sge_wrq *ofld_txq; #endif if (vi->flags & VI_INIT_DONE) { /* Need to quiesce queues. */ /* XXX: Only for the first VI? */ if (IS_MAIN_VI(vi)) quiesce_wrq(sc, &sc->sge.ctrlq[pi->port_id]); for_each_txq(vi, i, txq) { quiesce_txq(sc, txq); } #ifdef TCP_OFFLOAD for_each_ofld_txq(vi, i, ofld_txq) { quiesce_wrq(sc, ofld_txq); } #endif for_each_rxq(vi, i, rxq) { quiesce_iq(sc, &rxq->iq); quiesce_fl(sc, &rxq->fl); } #ifdef TCP_OFFLOAD for_each_ofld_rxq(vi, i, ofld_rxq) { quiesce_iq(sc, &ofld_rxq->iq); quiesce_fl(sc, &ofld_rxq->fl); } #endif free(vi->rss, M_CXGBE); free(vi->nm_rss, M_CXGBE); } t4_teardown_vi_queues(vi); vi->flags &= ~VI_INIT_DONE; return (0); } static void quiesce_txq(struct adapter *sc, struct sge_txq *txq) { struct sge_eq *eq = &txq->eq; struct sge_qstat *spg = (void *)&eq->desc[eq->sidx]; (void) sc; /* unused */ #ifdef INVARIANTS TXQ_LOCK(txq); MPASS((eq->flags & EQ_ENABLED) == 0); TXQ_UNLOCK(txq); #endif /* Wait for the mp_ring to empty. */ while (!mp_ring_is_idle(txq->r)) { mp_ring_check_drainage(txq->r, 0); pause("rquiesce", 1); } /* Then wait for the hardware to finish. */ while (spg->cidx != htobe16(eq->pidx)) pause("equiesce", 1); /* Finally, wait for the driver to reclaim all descriptors. */ while (eq->cidx != eq->pidx) pause("dquiesce", 1); } static void quiesce_wrq(struct adapter *sc, struct sge_wrq *wrq) { /* XXXTX */ } static void quiesce_iq(struct adapter *sc, struct sge_iq *iq) { (void) sc; /* unused */ /* Synchronize with the interrupt handler */ while (!atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_DISABLED)) pause("iqfree", 1); } static void quiesce_fl(struct adapter *sc, struct sge_fl *fl) { mtx_lock(&sc->sfl_lock); FL_LOCK(fl); fl->flags |= FL_DOOMED; FL_UNLOCK(fl); callout_stop(&sc->sfl_callout); mtx_unlock(&sc->sfl_lock); KASSERT((fl->flags & FL_STARVING) == 0, ("%s: still starving", __func__)); } static int t4_alloc_irq(struct adapter *sc, struct irq *irq, int rid, driver_intr_t *handler, void *arg, char *name) { int rc; irq->rid = rid; irq->res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &irq->rid, RF_SHAREABLE | RF_ACTIVE); if (irq->res == NULL) { device_printf(sc->dev, "failed to allocate IRQ for rid %d, name %s.\n", rid, name); return (ENOMEM); } rc = bus_setup_intr(sc->dev, irq->res, INTR_MPSAFE | INTR_TYPE_NET, NULL, handler, arg, &irq->tag); if (rc != 0) { device_printf(sc->dev, "failed to setup interrupt for rid %d, name %s: %d\n", rid, name, rc); } else if (name) bus_describe_intr(sc->dev, irq->res, irq->tag, name); return (rc); } static int t4_free_irq(struct adapter *sc, struct irq *irq) { if (irq->tag) bus_teardown_intr(sc->dev, irq->res, irq->tag); if (irq->res) bus_release_resource(sc->dev, SYS_RES_IRQ, irq->rid, irq->res); bzero(irq, sizeof(*irq)); return (0); } static void get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf) { regs->version = chip_id(sc) | chip_rev(sc) << 10; t4_get_regs(sc, buf, regs->len); } #define A_PL_INDIR_CMD 0x1f8 #define S_PL_AUTOINC 31 #define M_PL_AUTOINC 0x1U #define V_PL_AUTOINC(x) ((x) << S_PL_AUTOINC) #define G_PL_AUTOINC(x) (((x) >> S_PL_AUTOINC) & M_PL_AUTOINC) #define S_PL_VFID 20 #define M_PL_VFID 0xffU #define V_PL_VFID(x) ((x) << S_PL_VFID) #define G_PL_VFID(x) (((x) >> S_PL_VFID) & M_PL_VFID) #define S_PL_ADDR 0 #define M_PL_ADDR 0xfffffU #define V_PL_ADDR(x) ((x) << S_PL_ADDR) #define G_PL_ADDR(x) (((x) >> S_PL_ADDR) & M_PL_ADDR) #define A_PL_INDIR_DATA 0x1fc static uint64_t read_vf_stat(struct adapter *sc, unsigned int viid, int reg) { u32 stats[2]; mtx_assert(&sc->reg_lock, MA_OWNED); t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) | V_PL_VFID(G_FW_VIID_VIN(viid)) | V_PL_ADDR(VF_MPS_REG(reg))); stats[0] = t4_read_reg(sc, A_PL_INDIR_DATA); stats[1] = t4_read_reg(sc, A_PL_INDIR_DATA); return (((uint64_t)stats[1]) << 32 | stats[0]); } static void t4_get_vi_stats(struct adapter *sc, unsigned int viid, struct fw_vi_stats_vf *stats) { #define GET_STAT(name) \ read_vf_stat(sc, viid, A_MPS_VF_STAT_##name##_L) stats->tx_bcast_bytes = GET_STAT(TX_VF_BCAST_BYTES); stats->tx_bcast_frames = GET_STAT(TX_VF_BCAST_FRAMES); stats->tx_mcast_bytes = GET_STAT(TX_VF_MCAST_BYTES); stats->tx_mcast_frames = GET_STAT(TX_VF_MCAST_FRAMES); stats->tx_ucast_bytes = GET_STAT(TX_VF_UCAST_BYTES); stats->tx_ucast_frames = GET_STAT(TX_VF_UCAST_FRAMES); stats->tx_drop_frames = GET_STAT(TX_VF_DROP_FRAMES); stats->tx_offload_bytes = GET_STAT(TX_VF_OFFLOAD_BYTES); stats->tx_offload_frames = GET_STAT(TX_VF_OFFLOAD_FRAMES); stats->rx_bcast_bytes = GET_STAT(RX_VF_BCAST_BYTES); stats->rx_bcast_frames = GET_STAT(RX_VF_BCAST_FRAMES); stats->rx_mcast_bytes = GET_STAT(RX_VF_MCAST_BYTES); stats->rx_mcast_frames = GET_STAT(RX_VF_MCAST_FRAMES); stats->rx_ucast_bytes = GET_STAT(RX_VF_UCAST_BYTES); stats->rx_ucast_frames = GET_STAT(RX_VF_UCAST_FRAMES); stats->rx_err_frames = GET_STAT(RX_VF_ERR_FRAMES); #undef GET_STAT } static void t4_clr_vi_stats(struct adapter *sc, unsigned int viid) { int reg; t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) | V_PL_VFID(G_FW_VIID_VIN(viid)) | V_PL_ADDR(VF_MPS_REG(A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L))); for (reg = A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L; reg <= A_MPS_VF_STAT_RX_VF_ERR_FRAMES_H; reg += 4) t4_write_reg(sc, A_PL_INDIR_DATA, 0); } static void vi_refresh_stats(struct adapter *sc, struct vi_info *vi) { struct timeval tv; const struct timeval interval = {0, 250000}; /* 250ms */ if (!(vi->flags & VI_INIT_DONE)) return; getmicrotime(&tv); timevalsub(&tv, &interval); if (timevalcmp(&tv, &vi->last_refreshed, <)) return; mtx_lock(&sc->reg_lock); t4_get_vi_stats(sc, vi->viid, &vi->stats); getmicrotime(&vi->last_refreshed); mtx_unlock(&sc->reg_lock); } static void cxgbe_refresh_stats(struct adapter *sc, struct port_info *pi) { int i; u_int v, tnl_cong_drops; struct timeval tv; const struct timeval interval = {0, 250000}; /* 250ms */ getmicrotime(&tv); timevalsub(&tv, &interval); if (timevalcmp(&tv, &pi->last_refreshed, <)) return; tnl_cong_drops = 0; t4_get_port_stats(sc, pi->tx_chan, &pi->stats); for (i = 0; i < sc->chip_params->nchan; i++) { if (pi->rx_chan_map & (1 << i)) { mtx_lock(&sc->reg_lock); t4_read_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v, 1, A_TP_MIB_TNL_CNG_DROP_0 + i); mtx_unlock(&sc->reg_lock); tnl_cong_drops += v; } } pi->tnl_cong_drops = tnl_cong_drops; getmicrotime(&pi->last_refreshed); } static void cxgbe_tick(void *arg) { struct port_info *pi = arg; struct adapter *sc = pi->adapter; PORT_LOCK_ASSERT_OWNED(pi); cxgbe_refresh_stats(sc, pi); callout_schedule(&pi->tick, hz); } void vi_tick(void *arg) { struct vi_info *vi = arg; struct adapter *sc = vi->pi->adapter; vi_refresh_stats(sc, vi); callout_schedule(&vi->tick, hz); } static void cxgbe_vlan_config(void *arg, struct ifnet *ifp, uint16_t vid) { struct ifnet *vlan; if (arg != ifp || ifp->if_type != IFT_ETHER) return; vlan = VLAN_DEVAT(ifp, vid); VLAN_SETCOOKIE(vlan, ifp); } static int cpl_not_handled(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { #ifdef INVARIANTS panic("%s: opcode 0x%02x on iq %p with payload %p", __func__, rss->opcode, iq, m); #else log(LOG_ERR, "%s: opcode 0x%02x on iq %p with payload %p\n", __func__, rss->opcode, iq, m); m_freem(m); #endif return (EDOOFUS); } int t4_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h) { uintptr_t *loc, new; if (opcode >= nitems(sc->cpl_handler)) return (EINVAL); new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled; loc = (uintptr_t *) &sc->cpl_handler[opcode]; atomic_store_rel_ptr(loc, new); return (0); } static int an_not_handled(struct sge_iq *iq, const struct rsp_ctrl *ctrl) { #ifdef INVARIANTS panic("%s: async notification on iq %p (ctrl %p)", __func__, iq, ctrl); #else log(LOG_ERR, "%s: async notification on iq %p (ctrl %p)\n", __func__, iq, ctrl); #endif return (EDOOFUS); } int t4_register_an_handler(struct adapter *sc, an_handler_t h) { uintptr_t *loc, new; new = h ? (uintptr_t)h : (uintptr_t)an_not_handled; loc = (uintptr_t *) &sc->an_handler; atomic_store_rel_ptr(loc, new); return (0); } static int fw_msg_not_handled(struct adapter *sc, const __be64 *rpl) { const struct cpl_fw6_msg *cpl = __containerof(rpl, struct cpl_fw6_msg, data[0]); #ifdef INVARIANTS panic("%s: fw_msg type %d", __func__, cpl->type); #else log(LOG_ERR, "%s: fw_msg type %d\n", __func__, cpl->type); #endif return (EDOOFUS); } int t4_register_fw_msg_handler(struct adapter *sc, int type, fw_msg_handler_t h) { uintptr_t *loc, new; if (type >= nitems(sc->fw_msg_handler)) return (EINVAL); /* * These are dispatched by the handler for FW{4|6}_CPL_MSG using the CPL * handler dispatch table. Reject any attempt to install a handler for * this subtype. */ if (type == FW_TYPE_RSSCPL || type == FW6_TYPE_RSSCPL) return (EINVAL); new = h ? (uintptr_t)h : (uintptr_t)fw_msg_not_handled; loc = (uintptr_t *) &sc->fw_msg_handler[type]; atomic_store_rel_ptr(loc, new); return (0); } /* * Should match fw_caps_config_ enums in t4fw_interface.h */ static char *caps_decoder[] = { "\20\001IPMI\002NCSI", /* 0: NBM */ "\20\001PPP\002QFC\003DCBX", /* 1: link */ "\20\001INGRESS\002EGRESS", /* 2: switch */ "\20\001NIC\002VM\003IDS\004UM\005UM_ISGL" /* 3: NIC */ "\006HASHFILTER\007ETHOFLD", "\20\001TOE", /* 4: TOE */ "\20\001RDDP\002RDMAC", /* 5: RDMA */ "\20\001INITIATOR_PDU\002TARGET_PDU" /* 6: iSCSI */ "\003INITIATOR_CNXOFLD\004TARGET_CNXOFLD" "\005INITIATOR_SSNOFLD\006TARGET_SSNOFLD" "\007T10DIF" "\010INITIATOR_CMDOFLD\011TARGET_CMDOFLD", "\20\00KEYS", /* 7: TLS */ "\20\001INITIATOR\002TARGET\003CTRL_OFLD" /* 8: FCoE */ "\004PO_INITIATOR\005PO_TARGET", }; static void t4_sysctls(struct adapter *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; struct sysctl_oid_list *children, *c0; static char *doorbells = {"\20\1UDB\2WCWR\3UDBWC\4KDB"}; ctx = device_get_sysctl_ctx(sc->dev); /* * dev.t4nex.X. */ oid = device_get_sysctl_tree(sc->dev); c0 = children = SYSCTL_CHILDREN(oid); sc->sc_do_rxcopy = 1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "do_rx_copy", CTLFLAG_RW, &sc->sc_do_rxcopy, 1, "Do RX copy of small frames"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nports", CTLFLAG_RD, NULL, sc->params.nports, "# of ports"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD, NULL, chip_rev(sc), "chip hardware revision"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "tp_version", CTLFLAG_RD, sc->tp_version, 0, "TP microcode version"); if (sc->params.exprom_vers != 0) { SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "exprom_version", CTLFLAG_RD, sc->exprom_version, 0, "expansion ROM version"); } SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version", CTLFLAG_RD, sc->fw_version, 0, "firmware version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "cf", CTLFLAG_RD, sc->cfg_file, 0, "configuration file"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cfcsum", CTLFLAG_RD, NULL, sc->cfcsum, "config file checksum"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "doorbells", CTLTYPE_STRING | CTLFLAG_RD, doorbells, sc->doorbells, sysctl_bitfield, "A", "available doorbells"); #define SYSCTL_CAP(name, n, text) \ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, #name, \ CTLTYPE_STRING | CTLFLAG_RD, caps_decoder[n], sc->name, \ sysctl_bitfield, "A", "available " text "capabilities") SYSCTL_CAP(nbmcaps, 0, "NBM"); SYSCTL_CAP(linkcaps, 1, "link"); SYSCTL_CAP(switchcaps, 2, "switch"); SYSCTL_CAP(niccaps, 3, "NIC"); SYSCTL_CAP(toecaps, 4, "TCP offload"); SYSCTL_CAP(rdmacaps, 5, "RDMA"); SYSCTL_CAP(iscsicaps, 6, "iSCSI"); SYSCTL_CAP(tlscaps, 7, "TLS"); SYSCTL_CAP(fcoecaps, 8, "FCoE"); #undef SYSCTL_CAP SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD, NULL, sc->params.vpd.cclk, "core clock frequency (in KHz)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_timers", CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.timer_val, sizeof(sc->params.sge.timer_val), sysctl_int_array, "A", "interrupt holdoff timer values (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pkt_counts", CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.counter_val, sizeof(sc->params.sge.counter_val), sysctl_int_array, "A", "interrupt holdoff packet counter values"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nfilters", CTLFLAG_RD, NULL, sc->tids.nftids, "number of filters"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", CTLTYPE_INT | CTLFLAG_RD, sc, 0, sysctl_temperature, "I", "chip temperature (in Celsius)"); t4_sge_sysctls(sc, ctx, children); sc->lro_timeout = 100; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lro_timeout", CTLFLAG_RW, &sc->lro_timeout, 0, "lro inactive-flush timeout (in us)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "debug_flags", CTLFLAG_RW, &sc->debug_flags, 0, "flags to enable runtime debugging"); #ifdef SBUF_DRAIN /* * dev.t4nex.X.misc. Marked CTLFLAG_SKIP to avoid information overload. */ oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "misc", CTLFLAG_RD | CTLFLAG_SKIP, NULL, "logs and miscellaneous information"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cctrl", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cctrl, "A", "congestion control"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp0", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_ibq_obq, "A", "CIM IBQ 0 (TP0)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp1", CTLTYPE_STRING | CTLFLAG_RD, sc, 1, sysctl_cim_ibq_obq, "A", "CIM IBQ 1 (TP1)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ulp", CTLTYPE_STRING | CTLFLAG_RD, sc, 2, sysctl_cim_ibq_obq, "A", "CIM IBQ 2 (ULP)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge0", CTLTYPE_STRING | CTLFLAG_RD, sc, 3, sysctl_cim_ibq_obq, "A", "CIM IBQ 3 (SGE0)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge1", CTLTYPE_STRING | CTLFLAG_RD, sc, 4, sysctl_cim_ibq_obq, "A", "CIM IBQ 4 (SGE1)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ncsi", CTLTYPE_STRING | CTLFLAG_RD, sc, 5, sysctl_cim_ibq_obq, "A", "CIM IBQ 5 (NCSI)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, chip_id(sc) <= CHELSIO_T5 ? sysctl_cim_la : sysctl_cim_la_t6, "A", "CIM logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ma_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_ma_la, "A", "CIM MA logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp0", CTLTYPE_STRING | CTLFLAG_RD, sc, 0 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 0 (ULP0)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp1", CTLTYPE_STRING | CTLFLAG_RD, sc, 1 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 1 (ULP1)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp2", CTLTYPE_STRING | CTLFLAG_RD, sc, 2 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 2 (ULP2)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp3", CTLTYPE_STRING | CTLFLAG_RD, sc, 3 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 3 (ULP3)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge", CTLTYPE_STRING | CTLFLAG_RD, sc, 4 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 4 (SGE)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ncsi", CTLTYPE_STRING | CTLFLAG_RD, sc, 5 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 5 (NCSI)"); if (chip_id(sc) > CHELSIO_T4) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge0_rx", CTLTYPE_STRING | CTLFLAG_RD, sc, 6 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 6 (SGE0-RX)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge1_rx", CTLTYPE_STRING | CTLFLAG_RD, sc, 7 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 7 (SGE1-RX)"); } SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_pif_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_pif_la, "A", "CIM PIF logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_qcfg", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_qcfg, "A", "CIM queue configuration"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cpl_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cpl_stats, "A", "CPL statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ddp_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_ddp_stats, "A", "non-TCP DDP statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "devlog", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_devlog, "A", "firmware's device log"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoe_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_fcoe_stats, "A", "FCoE statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "hw_sched", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_hw_sched, "A", "hardware scheduler "); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "l2t", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_l2t, "A", "hardware L2 table"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "lb_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_lb_stats, "A", "loopback statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "meminfo", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_meminfo, "A", "memory regions"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "mps_tcam", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, chip_id(sc) <= CHELSIO_T5 ? sysctl_mps_tcam : sysctl_mps_tcam_t6, "A", "MPS TCAM entries"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "path_mtus", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_path_mtus, "A", "path MTUs"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pm_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_pm_stats, "A", "PM statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_rdma_stats, "A", "RDMA statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tcp_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tcp_stats, "A", "TCP statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tids", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tids, "A", "TID information"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_err_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_err_stats, "A", "TP error statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la_mask", CTLTYPE_INT | CTLFLAG_RW, sc, 0, sysctl_tp_la_mask, "I", "TP logic analyzer event capture mask"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_la, "A", "TP logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_rate", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tx_rate, "A", "Tx rate"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ulprx_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_ulprx_la, "A", "ULPRX logic analyzer"); if (is_t5(sc)) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "wcwr_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_wcwr_stats, "A", "write combined work requests"); } #endif #ifdef TCP_OFFLOAD if (is_offload(sc)) { /* * dev.t4nex.X.toe. */ oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "toe", CTLFLAG_RD, NULL, "TOE parameters"); children = SYSCTL_CHILDREN(oid); sc->tt.sndbuf = 256 * 1024; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sndbuf", CTLFLAG_RW, &sc->tt.sndbuf, 0, "max hardware send buffer size"); sc->tt.ddp = 0; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp", CTLFLAG_RW, &sc->tt.ddp, 0, "DDP allowed"); sc->tt.rx_coalesce = 1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_coalesce", CTLFLAG_RW, &sc->tt.rx_coalesce, 0, "receive coalescing"); sc->tt.tx_align = 1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_align", CTLFLAG_RW, &sc->tt.tx_align, 0, "chop and align payload"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timer_tick", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_tick, "A", "TP timer tick (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timestamp_tick", CTLTYPE_STRING | CTLFLAG_RD, sc, 1, sysctl_tp_tick, "A", "TCP timestamp tick (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_tick", CTLTYPE_STRING | CTLFLAG_RD, sc, 2, sysctl_tp_tick, "A", "DACK tick (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_timer", CTLTYPE_UINT | CTLFLAG_RD, sc, 0, sysctl_tp_dack_timer, "IU", "DACK timer (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_min", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MIN, sysctl_tp_timer, "LU", "Retransmit min (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_max", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MAX, sysctl_tp_timer, "LU", "Retransmit max (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_min", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MIN, sysctl_tp_timer, "LU", "Persist timer min (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_max", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MAX, sysctl_tp_timer, "LU", "Persist timer max (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_idle", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_IDLE, sysctl_tp_timer, "LU", "Keepidle idle timer (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_intvl", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_INTVL, sysctl_tp_timer, "LU", "Keepidle interval (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "initial_srtt", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_INIT_SRTT, sysctl_tp_timer, "LU", "Initial SRTT (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "finwait2_timer", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_FINWAIT2_TIMER, sysctl_tp_timer, "LU", "FINWAIT2 timer (us)"); } #endif } void vi_sysctls(struct vi_info *vi) { struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; struct sysctl_oid_list *children; ctx = device_get_sysctl_ctx(vi->dev); /* * dev.v?(cxgbe|cxl).X. */ oid = device_get_sysctl_tree(vi->dev); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "viid", CTLFLAG_RD, NULL, vi->viid, "VI identifer"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nrxq", CTLFLAG_RD, &vi->nrxq, 0, "# of rx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ntxq", CTLFLAG_RD, &vi->ntxq, 0, "# of tx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_rxq", CTLFLAG_RD, &vi->first_rxq, 0, "index of first rx queue"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_txq", CTLFLAG_RD, &vi->first_txq, 0, "index of first tx queue"); if (IS_MAIN_VI(vi)) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rsrv_noflowq", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_noflowq, "IU", "Reserve queue 0 for non-flowid packets"); } #ifdef TCP_OFFLOAD if (vi->nofldrxq != 0) { SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD, &vi->nofldrxq, 0, "# of rx queues for offloaded TCP connections"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldtxq", CTLFLAG_RD, &vi->nofldtxq, 0, "# of tx queues for offloaded TCP connections"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_rxq", CTLFLAG_RD, &vi->first_ofld_rxq, 0, "index of first TOE rx queue"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_txq", CTLFLAG_RD, &vi->first_ofld_txq, 0, "index of first TOE tx queue"); } #endif #ifdef DEV_NETMAP if (vi->nnmrxq != 0) { SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmrxq", CTLFLAG_RD, &vi->nnmrxq, 0, "# of netmap rx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmtxq", CTLFLAG_RD, &vi->nnmtxq, 0, "# of netmap tx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_rxq", CTLFLAG_RD, &vi->first_nm_rxq, 0, "index of first netmap rx queue"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_txq", CTLFLAG_RD, &vi->first_nm_txq, 0, "index of first netmap tx queue"); } #endif SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_tmr_idx, "I", "holdoff timer index"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_pktc_idx, "I", "holdoff packet counter index"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_rxq", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_rxq, "I", "rx queue size"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_txq", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_txq, "I", "tx queue size"); } static void cxgbe_sysctls(struct port_info *pi) { struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; struct sysctl_oid_list *children, *children2; struct adapter *sc = pi->adapter; int i; char name[16]; ctx = device_get_sysctl_ctx(pi->dev); /* * dev.cxgbe.X. */ oid = device_get_sysctl_tree(pi->dev); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkdnrc", CTLTYPE_STRING | CTLFLAG_RD, pi, 0, sysctl_linkdnrc, "A", "reason why link is down"); if (pi->port_type == FW_PORT_TYPE_BT_XAUI) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", CTLTYPE_INT | CTLFLAG_RD, pi, 0, sysctl_btphy, "I", "PHY temperature (in Celsius)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fw_version", CTLTYPE_INT | CTLFLAG_RD, pi, 1, sysctl_btphy, "I", "PHY firmware version"); } SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pause_settings", CTLTYPE_STRING | CTLFLAG_RW, pi, PAUSE_TX, sysctl_pause_settings, "A", "PAUSE settings (bit 0 = rx_pause, bit 1 = tx_pause)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "max_speed", CTLFLAG_RD, NULL, port_top_speed(pi), "max speed (in Gbps)"); /* * dev.(cxgbe|cxl).X.tc. */ oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "tc", CTLFLAG_RD, NULL, "Tx scheduler traffic classes"); for (i = 0; i < sc->chip_params->nsched_cls; i++) { struct tx_sched_class *tc = &pi->tc[i]; snprintf(name, sizeof(name), "%d", i); children2 = SYSCTL_CHILDREN(SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, name, CTLFLAG_RD, NULL, "traffic class")); SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "flags", CTLFLAG_RD, &tc->flags, 0, "flags"); SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "refcount", CTLFLAG_RD, &tc->refcount, 0, "references to this class"); #ifdef SBUF_DRAIN SYSCTL_ADD_PROC(ctx, children2, OID_AUTO, "params", CTLTYPE_STRING | CTLFLAG_RD, sc, (pi->port_id << 16) | i, sysctl_tc_params, "A", "traffic class parameters"); #endif } /* * dev.cxgbe.X.stats. */ oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "stats", CTLFLAG_RD, NULL, "port statistics"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_parse_error", CTLFLAG_RD, &pi->tx_parse_error, 0, "# of tx packets with invalid length or # of segments"); #define SYSCTL_ADD_T4_REG64(pi, name, desc, reg) \ SYSCTL_ADD_OID(ctx, children, OID_AUTO, name, \ CTLTYPE_U64 | CTLFLAG_RD, sc, reg, \ sysctl_handle_t4_reg64, "QU", desc) SYSCTL_ADD_T4_REG64(pi, "tx_octets", "# of octets in good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BYTES_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames", "total # of good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_FRAMES_L)); SYSCTL_ADD_T4_REG64(pi, "tx_bcast_frames", "# of broadcast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BCAST_L)); SYSCTL_ADD_T4_REG64(pi, "tx_mcast_frames", "# of multicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_MCAST_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ucast_frames", "# of unicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_UCAST_L)); SYSCTL_ADD_T4_REG64(pi, "tx_error_frames", "# of error frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_64", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_64B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_65_127", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_65B_127B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_128_255", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_128B_255B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_256_511", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_256B_511B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_512_1023", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_512B_1023B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_1024_1518", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1024B_1518B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_1519_max", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1519B_MAX_L)); SYSCTL_ADD_T4_REG64(pi, "tx_drop", "# of dropped tx frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_DROP_L)); SYSCTL_ADD_T4_REG64(pi, "tx_pause", "# of pause frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PAUSE_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp0", "# of PPP prio 0 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP0_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp1", "# of PPP prio 1 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP1_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp2", "# of PPP prio 2 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP2_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp3", "# of PPP prio 3 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP3_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp4", "# of PPP prio 4 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP4_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp5", "# of PPP prio 5 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP5_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp6", "# of PPP prio 6 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP6_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp7", "# of PPP prio 7 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP7_L)); SYSCTL_ADD_T4_REG64(pi, "rx_octets", "# of octets in good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BYTES_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames", "total # of good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_FRAMES_L)); SYSCTL_ADD_T4_REG64(pi, "rx_bcast_frames", "# of broadcast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BCAST_L)); SYSCTL_ADD_T4_REG64(pi, "rx_mcast_frames", "# of multicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MCAST_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ucast_frames", "# of unicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_UCAST_L)); SYSCTL_ADD_T4_REG64(pi, "rx_too_long", "# of frames exceeding MTU", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_jabber", "# of jabber frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_CRC_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_fcs_err", "# of frames received with bad FCS", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_CRC_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_len_err", "# of frames received with length error", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LEN_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_symbol_err", "symbol errors", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_SYM_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_runt", "# of short frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LESS_64B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_64", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_64B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_65_127", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_65B_127B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_128_255", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_128B_255B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_256_511", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_256B_511B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_512_1023", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_512B_1023B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_1024_1518", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1024B_1518B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_1519_max", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1519B_MAX_L)); SYSCTL_ADD_T4_REG64(pi, "rx_pause", "# of pause frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PAUSE_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp0", "# of PPP prio 0 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP0_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp1", "# of PPP prio 1 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP1_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp2", "# of PPP prio 2 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP2_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp3", "# of PPP prio 3 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP3_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp4", "# of PPP prio 4 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP4_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp5", "# of PPP prio 5 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP5_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp6", "# of PPP prio 6 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP6_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp7", "# of PPP prio 7 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP7_L)); #undef SYSCTL_ADD_T4_REG64 #define SYSCTL_ADD_T4_PORTSTAT(name, desc) \ SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, #name, CTLFLAG_RD, \ &pi->stats.name, desc) /* We get these from port_stats and they may be stale by up to 1s */ SYSCTL_ADD_T4_PORTSTAT(rx_ovflow0, "# drops due to buffer-group 0 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_ovflow1, "# drops due to buffer-group 1 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_ovflow2, "# drops due to buffer-group 2 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_ovflow3, "# drops due to buffer-group 3 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc0, "# of buffer-group 0 truncated packets"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc1, "# of buffer-group 1 truncated packets"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc2, "# of buffer-group 2 truncated packets"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc3, "# of buffer-group 3 truncated packets"); #undef SYSCTL_ADD_T4_PORTSTAT } static int sysctl_int_array(SYSCTL_HANDLER_ARGS) { int rc, *i, space = 0; struct sbuf sb; sbuf_new_for_sysctl(&sb, NULL, 64, req); for (i = arg1; arg2; arg2 -= sizeof(int), i++) { if (space) sbuf_printf(&sb, " "); sbuf_printf(&sb, "%d", *i); space = 1; } rc = sbuf_finish(&sb); sbuf_delete(&sb); return (rc); } static int sysctl_bitfield(SYSCTL_HANDLER_ARGS) { int rc; struct sbuf *sb; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "%b", (int)arg2, (char *)arg1); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_btphy(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; int op = arg2; struct adapter *sc = pi->adapter; u_int v; int rc; rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4btt"); if (rc) return (rc); /* XXX: magic numbers */ rc = -t4_mdio_rd(sc, sc->mbox, pi->mdio_addr, 0x1e, op ? 0x20 : 0xc820, &v); end_synchronized_op(sc, 0); if (rc) return (rc); if (op == 0) v /= 256; rc = sysctl_handle_int(oidp, &v, 0, req); return (rc); } static int sysctl_noflowq(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; int rc, val; val = vi->rsrv_noflowq; rc = sysctl_handle_int(oidp, &val, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if ((val >= 1) && (vi->ntxq > 1)) vi->rsrv_noflowq = 1; else vi->rsrv_noflowq = 0; return (rc); } static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int idx, rc, i; struct sge_rxq *rxq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; #endif uint8_t v; idx = vi->tmr_idx; rc = sysctl_handle_int(oidp, &idx, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (idx < 0 || idx >= SGE_NTIMERS) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4tmr"); if (rc) return (rc); v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->pktc_idx != -1); for_each_rxq(vi, i, rxq) { #ifdef atomic_store_rel_8 atomic_store_rel_8(&rxq->iq.intr_params, v); #else rxq->iq.intr_params = v; #endif } #ifdef TCP_OFFLOAD for_each_ofld_rxq(vi, i, ofld_rxq) { #ifdef atomic_store_rel_8 atomic_store_rel_8(&ofld_rxq->iq.intr_params, v); #else ofld_rxq->iq.intr_params = v; #endif } #endif vi->tmr_idx = idx; end_synchronized_op(sc, LOCK_HELD); return (0); } static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int idx, rc; idx = vi->pktc_idx; rc = sysctl_handle_int(oidp, &idx, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (idx < -1 || idx >= SGE_NCOUNTERS) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4pktc"); if (rc) return (rc); if (vi->flags & VI_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else vi->pktc_idx = idx; end_synchronized_op(sc, LOCK_HELD); return (rc); } static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int qsize, rc; qsize = vi->qsize_rxq; rc = sysctl_handle_int(oidp, &qsize, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (qsize < 128 || (qsize & 7)) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4rxqs"); if (rc) return (rc); if (vi->flags & VI_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else vi->qsize_rxq = qsize; end_synchronized_op(sc, LOCK_HELD); return (rc); } static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int qsize, rc; qsize = vi->qsize_txq; rc = sysctl_handle_int(oidp, &qsize, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (qsize < 128 || qsize > 65536) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4txqs"); if (rc) return (rc); if (vi->flags & VI_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else vi->qsize_txq = qsize; end_synchronized_op(sc, LOCK_HELD); return (rc); } static int sysctl_pause_settings(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; struct adapter *sc = pi->adapter; struct link_config *lc = &pi->link_cfg; int rc; if (req->newptr == NULL) { struct sbuf *sb; static char *bits = "\20\1PAUSE_RX\2PAUSE_TX"; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "%b", lc->fc & (PAUSE_TX | PAUSE_RX), bits); rc = sbuf_finish(sb); sbuf_delete(sb); } else { char s[2]; int n; s[0] = '0' + (lc->requested_fc & (PAUSE_TX | PAUSE_RX)); s[1] = 0; rc = sysctl_handle_string(oidp, s, sizeof(s), req); if (rc != 0) return(rc); if (s[1] != 0) return (EINVAL); if (s[0] < '0' || s[0] > '9') return (EINVAL); /* not a number */ n = s[0] - '0'; if (n & ~(PAUSE_TX | PAUSE_RX)) return (EINVAL); /* some other bit is set too */ rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4PAUSE"); if (rc) return (rc); if ((lc->requested_fc & (PAUSE_TX | PAUSE_RX)) != n) { int link_ok = lc->link_ok; lc->requested_fc &= ~(PAUSE_TX | PAUSE_RX); lc->requested_fc |= n; rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc); lc->link_ok = link_ok; /* restore */ } end_synchronized_op(sc, 0); } return (rc); } static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int reg = arg2; uint64_t val; val = t4_read_reg64(sc, reg); return (sysctl_handle_64(oidp, &val, 0, req)); } static int sysctl_temperature(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int rc, t; uint32_t param, val; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4temp"); if (rc) return (rc); param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) | V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_TMP); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); end_synchronized_op(sc, 0); if (rc) return (rc); /* unknown is returned as 0 but we display -1 in that case */ t = val == 0 ? -1 : val; rc = sysctl_handle_int(oidp, &t, 0, req); return (rc); } #ifdef SBUF_DRAIN static int sysctl_cctrl(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; uint16_t incr[NMTUS][NCCTRL_WIN]; static const char *dec_fac[] = { "0.5", "0.5625", "0.625", "0.6875", "0.75", "0.8125", "0.875", "0.9375" }; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); t4_read_cong_tbl(sc, incr); for (i = 0; i < NCCTRL_WIN; ++i) { sbuf_printf(sb, "%2d: %4u %4u %4u %4u %4u %4u %4u %4u\n", i, incr[0][i], incr[1][i], incr[2][i], incr[3][i], incr[4][i], incr[5][i], incr[6][i], incr[7][i]); sbuf_printf(sb, "%8u %4u %4u %4u %4u %4u %4u %4u %5u %s\n", incr[8][i], incr[9][i], incr[10][i], incr[11][i], incr[12][i], incr[13][i], incr[14][i], incr[15][i], sc->params.a_wnd[i], dec_fac[sc->params.b_wnd[i]]); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static const char *qname[CIM_NUM_IBQ + CIM_NUM_OBQ_T5] = { "TP0", "TP1", "ULP", "SGE0", "SGE1", "NC-SI", /* ibq's */ "ULP0", "ULP1", "ULP2", "ULP3", "SGE", "NC-SI", /* obq's */ "SGE0-RX", "SGE1-RX" /* additional obq's (T5 onwards) */ }; static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i, n, qid = arg2; uint32_t *buf, *p; char *qtype; u_int cim_num_obq = sc->chip_params->cim_num_obq; KASSERT(qid >= 0 && qid < CIM_NUM_IBQ + cim_num_obq, ("%s: bad qid %d\n", __func__, qid)); if (qid < CIM_NUM_IBQ) { /* inbound queue */ qtype = "IBQ"; n = 4 * CIM_IBQ_SIZE; buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = t4_read_cim_ibq(sc, qid, buf, n); } else { /* outbound queue */ qtype = "OBQ"; qid -= CIM_NUM_IBQ; n = 4 * cim_num_obq * CIM_OBQ_SIZE; buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = t4_read_cim_obq(sc, qid, buf, n); } if (rc < 0) { rc = -rc; goto done; } n = rc * sizeof(uint32_t); /* rc has # of words actually read */ rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) goto done; sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req); if (sb == NULL) { rc = ENOMEM; goto done; } sbuf_printf(sb, "%s%d %s", qtype , qid, qname[arg2]); for (i = 0, p = buf; i < n; i += 16, p += 4) sbuf_printf(sb, "\n%#06x: %08x %08x %08x %08x", i, p[0], p[1], p[2], p[3]); rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_cim_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int cfg; struct sbuf *sb; uint32_t *buf, *p; int rc; MPASS(chip_id(sc) <= CHELSIO_T5); rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg); if (rc != 0) return (rc); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = -t4_cim_read_la(sc, buf, NULL); if (rc != 0) goto done; sbuf_printf(sb, "Status Data PC%s", cfg & F_UPDBGLACAPTPCONLY ? "" : " LS0Stat LS0Addr LS0Data"); for (p = buf; p <= &buf[sc->params.cim_la_size - 8]; p += 8) { if (cfg & F_UPDBGLACAPTPCONLY) { sbuf_printf(sb, "\n %02x %08x %08x", p[5] & 0xff, p[6], p[7]); sbuf_printf(sb, "\n %02x %02x%06x %02x%06x", (p[3] >> 8) & 0xff, p[3] & 0xff, p[4] >> 8, p[4] & 0xff, p[5] >> 8); sbuf_printf(sb, "\n %02x %x%07x %x%07x", (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4, p[1] & 0xf, p[2] >> 4); } else { sbuf_printf(sb, "\n %02x %x%07x %x%07x %08x %08x " "%08x%08x%08x%08x", (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4, p[1] & 0xf, p[2] >> 4, p[2] & 0xf, p[3], p[4], p[5], p[6], p[7]); } } rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_cim_la_t6(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int cfg; struct sbuf *sb; uint32_t *buf, *p; int rc; MPASS(chip_id(sc) > CHELSIO_T5); rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg); if (rc != 0) return (rc); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = -t4_cim_read_la(sc, buf, NULL); if (rc != 0) goto done; sbuf_printf(sb, "Status Inst Data PC%s", cfg & F_UPDBGLACAPTPCONLY ? "" : " LS0Stat LS0Addr LS0Data LS1Stat LS1Addr LS1Data"); for (p = buf; p <= &buf[sc->params.cim_la_size - 10]; p += 10) { if (cfg & F_UPDBGLACAPTPCONLY) { sbuf_printf(sb, "\n %02x %08x %08x %08x", p[3] & 0xff, p[2], p[1], p[0]); sbuf_printf(sb, "\n %02x %02x%06x %02x%06x %02x%06x", (p[6] >> 8) & 0xff, p[6] & 0xff, p[5] >> 8, p[5] & 0xff, p[4] >> 8, p[4] & 0xff, p[3] >> 8); sbuf_printf(sb, "\n %02x %04x%04x %04x%04x %04x%04x", (p[9] >> 16) & 0xff, p[9] & 0xffff, p[8] >> 16, p[8] & 0xffff, p[7] >> 16, p[7] & 0xffff, p[6] >> 16); } else { sbuf_printf(sb, "\n %02x %04x%04x %04x%04x %04x%04x " "%08x %08x %08x %08x %08x %08x", (p[9] >> 16) & 0xff, p[9] & 0xffff, p[8] >> 16, p[8] & 0xffff, p[7] >> 16, p[7] & 0xffff, p[6] >> 16, p[2], p[1], p[0], p[5], p[4], p[3]); } } rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int i; struct sbuf *sb; uint32_t *buf, *p; int rc; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(2 * CIM_MALA_SIZE * 5 * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); t4_cim_read_ma_la(sc, buf, buf + 5 * CIM_MALA_SIZE); p = buf; for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) { sbuf_printf(sb, "\n%02x%08x%08x%08x%08x", p[4], p[3], p[2], p[1], p[0]); } sbuf_printf(sb, "\n\nCnt ID Tag UE Data RDY VLD"); for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) { sbuf_printf(sb, "\n%3u %2u %x %u %08x%08x %u %u", (p[2] >> 10) & 0xff, (p[2] >> 7) & 7, (p[2] >> 3) & 0xf, (p[2] >> 2) & 1, (p[1] >> 2) | ((p[2] & 3) << 30), (p[0] >> 2) | ((p[1] & 3) << 30), (p[0] >> 1) & 1, p[0] & 1); } rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int i; struct sbuf *sb; uint32_t *buf, *p; int rc; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(2 * CIM_PIFLA_SIZE * 6 * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); t4_cim_read_pif_la(sc, buf, buf + 6 * CIM_PIFLA_SIZE, NULL, NULL); p = buf; sbuf_printf(sb, "Cntl ID DataBE Addr Data"); for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) { sbuf_printf(sb, "\n %02x %02x %04x %08x %08x%08x%08x%08x", (p[5] >> 22) & 0xff, (p[5] >> 16) & 0x3f, p[5] & 0xffff, p[4], p[3], p[2], p[1], p[0]); } sbuf_printf(sb, "\n\nCntl ID Data"); for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) { sbuf_printf(sb, "\n %02x %02x %08x%08x%08x%08x", (p[4] >> 6) & 0xff, p[4] & 0x3f, p[3], p[2], p[1], p[0]); } rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; uint16_t base[CIM_NUM_IBQ + CIM_NUM_OBQ_T5]; uint16_t size[CIM_NUM_IBQ + CIM_NUM_OBQ_T5]; uint16_t thres[CIM_NUM_IBQ]; uint32_t obq_wr[2 * CIM_NUM_OBQ_T5], *wr = obq_wr; uint32_t stat[4 * (CIM_NUM_IBQ + CIM_NUM_OBQ_T5)], *p = stat; u_int cim_num_obq, ibq_rdaddr, obq_rdaddr, nq; cim_num_obq = sc->chip_params->cim_num_obq; if (is_t4(sc)) { ibq_rdaddr = A_UP_IBQ_0_RDADDR; obq_rdaddr = A_UP_OBQ_0_REALADDR; } else { ibq_rdaddr = A_UP_IBQ_0_SHADOW_RDADDR; obq_rdaddr = A_UP_OBQ_0_SHADOW_REALADDR; } nq = CIM_NUM_IBQ + cim_num_obq; rc = -t4_cim_read(sc, ibq_rdaddr, 4 * nq, stat); if (rc == 0) rc = -t4_cim_read(sc, obq_rdaddr, 2 * cim_num_obq, obq_wr); if (rc != 0) return (rc); t4_read_cimq_cfg(sc, base, size, thres); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "Queue Base Size Thres RdPtr WrPtr SOP EOP Avail"); for (i = 0; i < CIM_NUM_IBQ; i++, p += 4) sbuf_printf(sb, "\n%7s %5x %5u %5u %6x %4x %4u %4u %5u", qname[i], base[i], size[i], thres[i], G_IBQRDADDR(p[0]), G_IBQWRADDR(p[1]), G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]), G_QUEREMFLITS(p[2]) * 16); for ( ; i < nq; i++, p += 4, wr += 2) sbuf_printf(sb, "\n%7s %5x %5u %12x %4x %4u %4u %5u", qname[i], base[i], size[i], G_QUERDADDR(p[0]) & 0x3fff, wr[0] - base[i], G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]), G_QUEREMFLITS(p[2]) * 16); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_cpl_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); mtx_lock(&sc->reg_lock); t4_tp_get_cpl_stats(sc, &stats); mtx_unlock(&sc->reg_lock); if (sc->chip_params->nchan > 2) { sbuf_printf(sb, " channel 0 channel 1" " channel 2 channel 3"); sbuf_printf(sb, "\nCPL requests: %10u %10u %10u %10u", stats.req[0], stats.req[1], stats.req[2], stats.req[3]); sbuf_printf(sb, "\nCPL responses: %10u %10u %10u %10u", stats.rsp[0], stats.rsp[1], stats.rsp[2], stats.rsp[3]); } else { sbuf_printf(sb, " channel 0 channel 1"); sbuf_printf(sb, "\nCPL requests: %10u %10u", stats.req[0], stats.req[1]); sbuf_printf(sb, "\nCPL responses: %10u %10u", stats.rsp[0], stats.rsp[1]); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_usm_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_get_usm_stats(sc, &stats); sbuf_printf(sb, "Frames: %u\n", stats.frames); sbuf_printf(sb, "Octets: %ju\n", stats.octets); sbuf_printf(sb, "Drops: %u", stats.drops); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static const char * const devlog_level_strings[] = { [FW_DEVLOG_LEVEL_EMERG] = "EMERG", [FW_DEVLOG_LEVEL_CRIT] = "CRIT", [FW_DEVLOG_LEVEL_ERR] = "ERR", [FW_DEVLOG_LEVEL_NOTICE] = "NOTICE", [FW_DEVLOG_LEVEL_INFO] = "INFO", [FW_DEVLOG_LEVEL_DEBUG] = "DEBUG" }; static const char * const devlog_facility_strings[] = { [FW_DEVLOG_FACILITY_CORE] = "CORE", [FW_DEVLOG_FACILITY_CF] = "CF", [FW_DEVLOG_FACILITY_SCHED] = "SCHED", [FW_DEVLOG_FACILITY_TIMER] = "TIMER", [FW_DEVLOG_FACILITY_RES] = "RES", [FW_DEVLOG_FACILITY_HW] = "HW", [FW_DEVLOG_FACILITY_FLR] = "FLR", [FW_DEVLOG_FACILITY_DMAQ] = "DMAQ", [FW_DEVLOG_FACILITY_PHY] = "PHY", [FW_DEVLOG_FACILITY_MAC] = "MAC", [FW_DEVLOG_FACILITY_PORT] = "PORT", [FW_DEVLOG_FACILITY_VI] = "VI", [FW_DEVLOG_FACILITY_FILTER] = "FILTER", [FW_DEVLOG_FACILITY_ACL] = "ACL", [FW_DEVLOG_FACILITY_TM] = "TM", [FW_DEVLOG_FACILITY_QFC] = "QFC", [FW_DEVLOG_FACILITY_DCB] = "DCB", [FW_DEVLOG_FACILITY_ETH] = "ETH", [FW_DEVLOG_FACILITY_OFLD] = "OFLD", [FW_DEVLOG_FACILITY_RI] = "RI", [FW_DEVLOG_FACILITY_ISCSI] = "ISCSI", [FW_DEVLOG_FACILITY_FCOE] = "FCOE", [FW_DEVLOG_FACILITY_FOISCSI] = "FOISCSI", [FW_DEVLOG_FACILITY_FOFCOE] = "FOFCOE", [FW_DEVLOG_FACILITY_CHNET] = "CHNET", }; static int sysctl_devlog(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct devlog_params *dparams = &sc->params.devlog; struct fw_devlog_e *buf, *e; int i, j, rc, nentries, first = 0; struct sbuf *sb; uint64_t ftstamp = UINT64_MAX; if (dparams->addr == 0) return (ENXIO); buf = malloc(dparams->size, M_CXGBE, M_NOWAIT); if (buf == NULL) return (ENOMEM); rc = read_via_memwin(sc, 1, dparams->addr, (void *)buf, dparams->size); if (rc != 0) goto done; nentries = dparams->size / sizeof(struct fw_devlog_e); for (i = 0; i < nentries; i++) { e = &buf[i]; if (e->timestamp == 0) break; /* end */ e->timestamp = be64toh(e->timestamp); e->seqno = be32toh(e->seqno); for (j = 0; j < 8; j++) e->params[j] = be32toh(e->params[j]); if (e->timestamp < ftstamp) { ftstamp = e->timestamp; first = i; } } if (buf[first].timestamp == 0) goto done; /* nothing in the log */ rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) goto done; sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) { rc = ENOMEM; goto done; } sbuf_printf(sb, "%10s %15s %8s %8s %s\n", "Seq#", "Tstamp", "Level", "Facility", "Message"); i = first; do { e = &buf[i]; if (e->timestamp == 0) break; /* end */ sbuf_printf(sb, "%10d %15ju %8s %8s ", e->seqno, e->timestamp, (e->level < nitems(devlog_level_strings) ? devlog_level_strings[e->level] : "UNKNOWN"), (e->facility < nitems(devlog_facility_strings) ? devlog_facility_strings[e->facility] : "UNKNOWN")); sbuf_printf(sb, e->fmt, e->params[0], e->params[1], e->params[2], e->params[3], e->params[4], e->params[5], e->params[6], e->params[7]); if (++i == nentries) i = 0; } while (i != first); rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_fcoe_stats stats[MAX_NCHAN]; int i, nchan = sc->chip_params->nchan; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); for (i = 0; i < nchan; i++) t4_get_fcoe_stats(sc, i, &stats[i]); if (nchan > 2) { sbuf_printf(sb, " channel 0 channel 1" " channel 2 channel 3"); sbuf_printf(sb, "\noctetsDDP: %16ju %16ju %16ju %16ju", stats[0].octets_ddp, stats[1].octets_ddp, stats[2].octets_ddp, stats[3].octets_ddp); sbuf_printf(sb, "\nframesDDP: %16u %16u %16u %16u", stats[0].frames_ddp, stats[1].frames_ddp, stats[2].frames_ddp, stats[3].frames_ddp); sbuf_printf(sb, "\nframesDrop: %16u %16u %16u %16u", stats[0].frames_drop, stats[1].frames_drop, stats[2].frames_drop, stats[3].frames_drop); } else { sbuf_printf(sb, " channel 0 channel 1"); sbuf_printf(sb, "\noctetsDDP: %16ju %16ju", stats[0].octets_ddp, stats[1].octets_ddp); sbuf_printf(sb, "\nframesDDP: %16u %16u", stats[0].frames_ddp, stats[1].frames_ddp); sbuf_printf(sb, "\nframesDrop: %16u %16u", stats[0].frames_drop, stats[1].frames_drop); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; unsigned int map, kbps, ipg, mode; unsigned int pace_tab[NTX_SCHED]; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); map = t4_read_reg(sc, A_TP_TX_MOD_QUEUE_REQ_MAP); mode = G_TIMERMODE(t4_read_reg(sc, A_TP_MOD_CONFIG)); t4_read_pace_tbl(sc, pace_tab); sbuf_printf(sb, "Scheduler Mode Channel Rate (Kbps) " "Class IPG (0.1 ns) Flow IPG (us)"); for (i = 0; i < NTX_SCHED; ++i, map >>= 2) { t4_get_tx_sched(sc, i, &kbps, &ipg); sbuf_printf(sb, "\n %u %-5s %u ", i, (mode & (1 << i)) ? "flow" : "class", map & 3); if (kbps) sbuf_printf(sb, "%9u ", kbps); else sbuf_printf(sb, " disabled "); if (ipg) sbuf_printf(sb, "%13u ", ipg); else sbuf_printf(sb, " disabled "); if (pace_tab[i]) sbuf_printf(sb, "%10u", pace_tab[i]); else sbuf_printf(sb, " disabled"); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i, j; uint64_t *p0, *p1; struct lb_port_stats s[2]; static const char *stat_name[] = { "OctetsOK:", "FramesOK:", "BcastFrames:", "McastFrames:", "UcastFrames:", "ErrorFrames:", "Frames64:", "Frames65To127:", "Frames128To255:", "Frames256To511:", "Frames512To1023:", "Frames1024To1518:", "Frames1519ToMax:", "FramesDropped:", "BG0FramesDropped:", "BG1FramesDropped:", "BG2FramesDropped:", "BG3FramesDropped:", "BG0FramesTrunc:", "BG1FramesTrunc:", "BG2FramesTrunc:", "BG3FramesTrunc:" }; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); memset(s, 0, sizeof(s)); for (i = 0; i < sc->chip_params->nchan; i += 2) { t4_get_lb_stats(sc, i, &s[0]); t4_get_lb_stats(sc, i + 1, &s[1]); p0 = &s[0].octets; p1 = &s[1].octets; sbuf_printf(sb, "%s Loopback %u" " Loopback %u", i == 0 ? "" : "\n", i, i + 1); for (j = 0; j < nitems(stat_name); j++) sbuf_printf(sb, "\n%-17s %20ju %20ju", stat_name[j], *p0++, *p1++); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS) { int rc = 0; struct port_info *pi = arg1; struct sbuf *sb; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 64, req); if (sb == NULL) return (ENOMEM); if (pi->linkdnrc < 0) sbuf_printf(sb, "n/a"); else sbuf_printf(sb, "%s", t4_link_down_rc_str(pi->linkdnrc)); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } struct mem_desc { unsigned int base; unsigned int limit; unsigned int idx; }; static int mem_desc_cmp(const void *a, const void *b) { return ((const struct mem_desc *)a)->base - ((const struct mem_desc *)b)->base; } static void mem_region_show(struct sbuf *sb, const char *name, unsigned int from, unsigned int to) { unsigned int size; if (from == to) return; size = to - from + 1; if (size == 0) return; /* XXX: need humanize_number(3) in libkern for a more readable 'size' */ sbuf_printf(sb, "%-15s %#x-%#x [%u]\n", name, from, to, size); } static int sysctl_meminfo(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i, n; uint32_t lo, hi, used, alloc; static const char *memory[] = {"EDC0:", "EDC1:", "MC:", "MC0:", "MC1:"}; static const char *region[] = { "DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:", "Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:", "Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:", "TDDP region:", "TPT region:", "STAG region:", "RQ region:", "RQUDP region:", "PBL region:", "TXPBL region:", "DBVFIFO region:", "ULPRX state:", "ULPTX state:", "On-chip queues:" }; struct mem_desc avail[4]; struct mem_desc mem[nitems(region) + 3]; /* up to 3 holes */ struct mem_desc *md = mem; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); for (i = 0; i < nitems(mem); i++) { mem[i].limit = 0; mem[i].idx = i; } /* Find and sort the populated memory ranges */ i = 0; lo = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); if (lo & F_EDRAM0_ENABLE) { hi = t4_read_reg(sc, A_MA_EDRAM0_BAR); avail[i].base = G_EDRAM0_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EDRAM0_SIZE(hi) << 20); avail[i].idx = 0; i++; } if (lo & F_EDRAM1_ENABLE) { hi = t4_read_reg(sc, A_MA_EDRAM1_BAR); avail[i].base = G_EDRAM1_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EDRAM1_SIZE(hi) << 20); avail[i].idx = 1; i++; } if (lo & F_EXT_MEM_ENABLE) { hi = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR); avail[i].base = G_EXT_MEM_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EXT_MEM_SIZE(hi) << 20); avail[i].idx = is_t5(sc) ? 3 : 2; /* Call it MC0 for T5 */ i++; } if (is_t5(sc) && lo & F_EXT_MEM1_ENABLE) { hi = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR); avail[i].base = G_EXT_MEM1_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EXT_MEM1_SIZE(hi) << 20); avail[i].idx = 4; i++; } if (!i) /* no memory available */ return 0; qsort(avail, i, sizeof(struct mem_desc), mem_desc_cmp); (md++)->base = t4_read_reg(sc, A_SGE_DBQ_CTXT_BADDR); (md++)->base = t4_read_reg(sc, A_SGE_IMSG_CTXT_BADDR); (md++)->base = t4_read_reg(sc, A_SGE_FLM_CACHE_BADDR); (md++)->base = t4_read_reg(sc, A_TP_CMM_TCB_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_TIMER_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_RX_FLST_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_TX_FLST_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_PS_FLST_BASE); /* the next few have explicit upper bounds */ md->base = t4_read_reg(sc, A_TP_PMM_TX_BASE); md->limit = md->base - 1 + t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE) * G_PMTXMAXPAGE(t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE)); md++; md->base = t4_read_reg(sc, A_TP_PMM_RX_BASE); md->limit = md->base - 1 + t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) * G_PMRXMAXPAGE(t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE)); md++; if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) { if (chip_id(sc) <= CHELSIO_T5) md->base = t4_read_reg(sc, A_LE_DB_HASH_TID_BASE); else md->base = t4_read_reg(sc, A_LE_DB_HASH_TBL_BASE_ADDR); md->limit = 0; } else { md->base = 0; md->idx = nitems(region); /* hide it */ } md++; #define ulp_region(reg) \ md->base = t4_read_reg(sc, A_ULP_ ## reg ## _LLIMIT);\ (md++)->limit = t4_read_reg(sc, A_ULP_ ## reg ## _ULIMIT) ulp_region(RX_ISCSI); ulp_region(RX_TDDP); ulp_region(TX_TPT); ulp_region(RX_STAG); ulp_region(RX_RQ); ulp_region(RX_RQUDP); ulp_region(RX_PBL); ulp_region(TX_PBL); #undef ulp_region md->base = 0; md->idx = nitems(region); if (!is_t4(sc)) { uint32_t size = 0; uint32_t sge_ctrl = t4_read_reg(sc, A_SGE_CONTROL2); uint32_t fifo_size = t4_read_reg(sc, A_SGE_DBVFIFO_SIZE); if (is_t5(sc)) { if (sge_ctrl & F_VFIFO_ENABLE) size = G_DBVFIFO_SIZE(fifo_size); } else size = G_T6_DBVFIFO_SIZE(fifo_size); if (size) { md->base = G_BASEADDR(t4_read_reg(sc, A_SGE_DBVFIFO_BADDR)); md->limit = md->base + (size << 2) - 1; } } md++; md->base = t4_read_reg(sc, A_ULP_RX_CTX_BASE); md->limit = 0; md++; md->base = t4_read_reg(sc, A_ULP_TX_ERR_TABLE_BASE); md->limit = 0; md++; md->base = sc->vres.ocq.start; if (sc->vres.ocq.size) md->limit = md->base + sc->vres.ocq.size - 1; else md->idx = nitems(region); /* hide it */ md++; /* add any address-space holes, there can be up to 3 */ for (n = 0; n < i - 1; n++) if (avail[n].limit < avail[n + 1].base) (md++)->base = avail[n].limit; if (avail[n].limit) (md++)->base = avail[n].limit; n = md - mem; qsort(mem, n, sizeof(struct mem_desc), mem_desc_cmp); for (lo = 0; lo < i; lo++) mem_region_show(sb, memory[avail[lo].idx], avail[lo].base, avail[lo].limit - 1); sbuf_printf(sb, "\n"); for (i = 0; i < n; i++) { if (mem[i].idx >= nitems(region)) continue; /* skip holes */ if (!mem[i].limit) mem[i].limit = i < n - 1 ? mem[i + 1].base - 1 : ~0; mem_region_show(sb, region[mem[i].idx], mem[i].base, mem[i].limit); } sbuf_printf(sb, "\n"); lo = t4_read_reg(sc, A_CIM_SDRAM_BASE_ADDR); hi = t4_read_reg(sc, A_CIM_SDRAM_ADDR_SIZE) + lo - 1; mem_region_show(sb, "uP RAM:", lo, hi); lo = t4_read_reg(sc, A_CIM_EXTMEM2_BASE_ADDR); hi = t4_read_reg(sc, A_CIM_EXTMEM2_ADDR_SIZE) + lo - 1; mem_region_show(sb, "uP Extmem2:", lo, hi); lo = t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE); sbuf_printf(sb, "\n%u Rx pages of size %uKiB for %u channels\n", G_PMRXMAXPAGE(lo), t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) >> 10, (lo & F_PMRXNUMCHN) ? 2 : 1); lo = t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE); hi = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE); sbuf_printf(sb, "%u Tx pages of size %u%ciB for %u channels\n", G_PMTXMAXPAGE(lo), hi >= (1 << 20) ? (hi >> 20) : (hi >> 10), hi >= (1 << 20) ? 'M' : 'K', 1 << G_PMTXNUMCHN(lo)); sbuf_printf(sb, "%u p-structs\n", t4_read_reg(sc, A_TP_CMM_MM_MAX_PSTRUCT)); for (i = 0; i < 4; i++) { if (chip_id(sc) > CHELSIO_T5) lo = t4_read_reg(sc, A_MPS_RX_MAC_BG_PG_CNT0 + i * 4); else lo = t4_read_reg(sc, A_MPS_RX_PG_RSV0 + i * 4); if (is_t5(sc)) { used = G_T5_USED(lo); alloc = G_T5_ALLOC(lo); } else { used = G_USED(lo); alloc = G_ALLOC(lo); } /* For T6 these are MAC buffer groups */ sbuf_printf(sb, "\nPort %d using %u pages out of %u allocated", i, used, alloc); } for (i = 0; i < sc->chip_params->nchan; i++) { if (chip_id(sc) > CHELSIO_T5) lo = t4_read_reg(sc, A_MPS_RX_LPBK_BG_PG_CNT0 + i * 4); else lo = t4_read_reg(sc, A_MPS_RX_PG_RSV4 + i * 4); if (is_t5(sc)) { used = G_T5_USED(lo); alloc = G_T5_ALLOC(lo); } else { used = G_USED(lo); alloc = G_ALLOC(lo); } /* For T6 these are MAC buffer groups */ sbuf_printf(sb, "\nLoopback %d using %u pages out of %u allocated", i, used, alloc); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static inline void tcamxy2valmask(uint64_t x, uint64_t y, uint8_t *addr, uint64_t *mask) { *mask = x | y; y = htobe64(y); memcpy(addr, (char *)&y + 2, ETHER_ADDR_LEN); } static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; MPASS(chip_id(sc) <= CHELSIO_T5); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "Idx Ethernet address Mask Vld Ports PF" " VF Replication P0 P1 P2 P3 ML"); for (i = 0; i < sc->chip_params->mps_tcam_size; i++) { uint64_t tcamx, tcamy, mask; uint32_t cls_lo, cls_hi; uint8_t addr[ETHER_ADDR_LEN]; tcamy = t4_read_reg64(sc, MPS_CLS_TCAM_Y_L(i)); tcamx = t4_read_reg64(sc, MPS_CLS_TCAM_X_L(i)); if (tcamx & tcamy) continue; tcamxy2valmask(tcamx, tcamy, addr, &mask); cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i)); cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i)); sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x %012jx" " %c %#x%4u%4d", i, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], (uintmax_t)mask, (cls_lo & F_SRAM_VLD) ? 'Y' : 'N', G_PORTMAP(cls_hi), G_PF(cls_lo), (cls_lo & F_VF_VALID) ? G_VF(cls_lo) : -1); if (cls_lo & F_REPLICATE) { struct fw_ldst_cmd ldst_cmd; memset(&ldst_cmd, 0, sizeof(ldst_cmd)); ldst_cmd.op_to_addrspace = htobe32(V_FW_CMD_OP(FW_LDST_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ | V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS)); ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd)); ldst_cmd.u.mps.rplc.fid_idx = htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) | V_FW_LDST_CMD_IDX(i)); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4mps"); if (rc) break; rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd, sizeof(ldst_cmd), &ldst_cmd); end_synchronized_op(sc, 0); if (rc != 0) { sbuf_printf(sb, "%36d", rc); rc = 0; } else { sbuf_printf(sb, " %08x %08x %08x %08x", be32toh(ldst_cmd.u.mps.rplc.rplc127_96), be32toh(ldst_cmd.u.mps.rplc.rplc95_64), be32toh(ldst_cmd.u.mps.rplc.rplc63_32), be32toh(ldst_cmd.u.mps.rplc.rplc31_0)); } } else sbuf_printf(sb, "%36s", ""); sbuf_printf(sb, "%4u%3u%3u%3u %#3x", G_SRAM_PRIO0(cls_lo), G_SRAM_PRIO1(cls_lo), G_SRAM_PRIO2(cls_lo), G_SRAM_PRIO3(cls_lo), (cls_lo >> S_MULTILISTEN0) & 0xf); } if (rc) (void) sbuf_finish(sb); else rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; MPASS(chip_id(sc) > CHELSIO_T5); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "Idx Ethernet address Mask VNI Mask" " IVLAN Vld DIP_Hit Lookup Port Vld Ports PF VF" " Replication" " P0 P1 P2 P3 ML\n"); for (i = 0; i < sc->chip_params->mps_tcam_size; i++) { uint8_t dip_hit, vlan_vld, lookup_type, port_num; uint16_t ivlan; uint64_t tcamx, tcamy, val, mask; uint32_t cls_lo, cls_hi, ctl, data2, vnix, vniy; uint8_t addr[ETHER_ADDR_LEN]; ctl = V_CTLREQID(1) | V_CTLCMDTYPE(0) | V_CTLXYBITSEL(0); if (i < 256) ctl |= V_CTLTCAMINDEX(i) | V_CTLTCAMSEL(0); else ctl |= V_CTLTCAMINDEX(i - 256) | V_CTLTCAMSEL(1); t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl); val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1); tcamy = G_DMACH(val) << 32; tcamy |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1); data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1); lookup_type = G_DATALKPTYPE(data2); port_num = G_DATAPORTNUM(data2); if (lookup_type && lookup_type != M_DATALKPTYPE) { /* Inner header VNI */ vniy = ((data2 & F_DATAVIDH2) << 23) | (G_DATAVIDH1(data2) << 16) | G_VIDL(val); dip_hit = data2 & F_DATADIPHIT; vlan_vld = 0; } else { vniy = 0; dip_hit = 0; vlan_vld = data2 & F_DATAVIDH2; ivlan = G_VIDL(val); } ctl |= V_CTLXYBITSEL(1); t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl); val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1); tcamx = G_DMACH(val) << 32; tcamx |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1); data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1); if (lookup_type && lookup_type != M_DATALKPTYPE) { /* Inner header VNI mask */ vnix = ((data2 & F_DATAVIDH2) << 23) | (G_DATAVIDH1(data2) << 16) | G_VIDL(val); } else vnix = 0; if (tcamx & tcamy) continue; tcamxy2valmask(tcamx, tcamy, addr, &mask); cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i)); cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i)); if (lookup_type && lookup_type != M_DATALKPTYPE) { sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x " "%012jx %06x %06x - - %3c" " 'I' %4x %3c %#x%4u%4d", i, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], (uintmax_t)mask, vniy, vnix, dip_hit ? 'Y' : 'N', port_num, cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N', G_PORTMAP(cls_hi), G_T6_PF(cls_lo), cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1); } else { sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x " "%012jx - - ", i, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], (uintmax_t)mask); if (vlan_vld) sbuf_printf(sb, "%4u Y ", ivlan); else sbuf_printf(sb, " - N "); sbuf_printf(sb, "- %3c %4x %3c %#x%4u%4d", lookup_type ? 'I' : 'O', port_num, cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N', G_PORTMAP(cls_hi), G_T6_PF(cls_lo), cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1); } if (cls_lo & F_T6_REPLICATE) { struct fw_ldst_cmd ldst_cmd; memset(&ldst_cmd, 0, sizeof(ldst_cmd)); ldst_cmd.op_to_addrspace = htobe32(V_FW_CMD_OP(FW_LDST_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ | V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS)); ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd)); ldst_cmd.u.mps.rplc.fid_idx = htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) | V_FW_LDST_CMD_IDX(i)); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t6mps"); if (rc) break; rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd, sizeof(ldst_cmd), &ldst_cmd); end_synchronized_op(sc, 0); if (rc != 0) { sbuf_printf(sb, "%72d", rc); rc = 0; } else { sbuf_printf(sb, " %08x %08x %08x %08x" " %08x %08x %08x %08x", be32toh(ldst_cmd.u.mps.rplc.rplc255_224), be32toh(ldst_cmd.u.mps.rplc.rplc223_192), be32toh(ldst_cmd.u.mps.rplc.rplc191_160), be32toh(ldst_cmd.u.mps.rplc.rplc159_128), be32toh(ldst_cmd.u.mps.rplc.rplc127_96), be32toh(ldst_cmd.u.mps.rplc.rplc95_64), be32toh(ldst_cmd.u.mps.rplc.rplc63_32), be32toh(ldst_cmd.u.mps.rplc.rplc31_0)); } } else sbuf_printf(sb, "%72s", ""); sbuf_printf(sb, "%4u%3u%3u%3u %#x", G_T6_SRAM_PRIO0(cls_lo), G_T6_SRAM_PRIO1(cls_lo), G_T6_SRAM_PRIO2(cls_lo), G_T6_SRAM_PRIO3(cls_lo), (cls_lo >> S_T6_MULTILISTEN0) & 0xf); } if (rc) (void) sbuf_finish(sb); else rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; uint16_t mtus[NMTUS]; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_read_mtu_tbl(sc, mtus, NULL); sbuf_printf(sb, "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u", mtus[0], mtus[1], mtus[2], mtus[3], mtus[4], mtus[5], mtus[6], mtus[7], mtus[8], mtus[9], mtus[10], mtus[11], mtus[12], mtus[13], mtus[14], mtus[15]); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; uint32_t tx_cnt[MAX_PM_NSTATS], rx_cnt[MAX_PM_NSTATS]; uint64_t tx_cyc[MAX_PM_NSTATS], rx_cyc[MAX_PM_NSTATS]; static const char *tx_stats[MAX_PM_NSTATS] = { "Read:", "Write bypass:", "Write mem:", "Bypass + mem:", "Tx FIFO wait", NULL, "Tx latency" }; static const char *rx_stats[MAX_PM_NSTATS] = { "Read:", "Write bypass:", "Write mem:", "Flush:", " Rx FIFO wait", NULL, "Rx latency" }; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_pmtx_get_stats(sc, tx_cnt, tx_cyc); t4_pmrx_get_stats(sc, rx_cnt, rx_cyc); sbuf_printf(sb, " Tx pcmds Tx bytes"); for (i = 0; i < 4; i++) { sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i], tx_cyc[i]); } sbuf_printf(sb, "\n Rx pcmds Rx bytes"); for (i = 0; i < 4; i++) { sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i], rx_cyc[i]); } if (chip_id(sc) > CHELSIO_T5) { sbuf_printf(sb, "\n Total wait Total occupancy"); sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i], tx_cyc[i]); sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i], rx_cyc[i]); i += 2; MPASS(i < nitems(tx_stats)); sbuf_printf(sb, "\n Reads Total wait"); sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i], tx_cyc[i]); sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i], rx_cyc[i]); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_rdma_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); mtx_lock(&sc->reg_lock); t4_tp_get_rdma_stats(sc, &stats); mtx_unlock(&sc->reg_lock); sbuf_printf(sb, "NoRQEModDefferals: %u\n", stats.rqe_dfr_mod); sbuf_printf(sb, "NoRQEPktDefferals: %u", stats.rqe_dfr_pkt); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_tcp_stats v4, v6; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); mtx_lock(&sc->reg_lock); t4_tp_get_tcp_stats(sc, &v4, &v6); mtx_unlock(&sc->reg_lock); sbuf_printf(sb, " IP IPv6\n"); sbuf_printf(sb, "OutRsts: %20u %20u\n", v4.tcp_out_rsts, v6.tcp_out_rsts); sbuf_printf(sb, "InSegs: %20ju %20ju\n", v4.tcp_in_segs, v6.tcp_in_segs); sbuf_printf(sb, "OutSegs: %20ju %20ju\n", v4.tcp_out_segs, v6.tcp_out_segs); sbuf_printf(sb, "RetransSegs: %20ju %20ju", v4.tcp_retrans_segs, v6.tcp_retrans_segs); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tids(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tid_info *t = &sc->tids; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); if (t->natids) { sbuf_printf(sb, "ATID range: 0-%u, in use: %u\n", t->natids - 1, t->atids_in_use); } if (t->ntids) { if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) { uint32_t b = t4_read_reg(sc, A_LE_DB_SERVER_INDEX) / 4; if (b) { sbuf_printf(sb, "TID range: 0-%u, %u-%u", b - 1, t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4, t->ntids - 1); } else { sbuf_printf(sb, "TID range: %u-%u", t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4, t->ntids - 1); } } else sbuf_printf(sb, "TID range: 0-%u", t->ntids - 1); sbuf_printf(sb, ", in use: %u\n", atomic_load_acq_int(&t->tids_in_use)); } if (t->nstids) { sbuf_printf(sb, "STID range: %u-%u, in use: %u\n", t->stid_base, t->stid_base + t->nstids - 1, t->stids_in_use); } if (t->nftids) { sbuf_printf(sb, "FTID range: %u-%u\n", t->ftid_base, t->ftid_base + t->nftids - 1); } if (t->netids) { sbuf_printf(sb, "ETID range: %u-%u\n", t->etid_base, t->etid_base + t->netids - 1); } sbuf_printf(sb, "HW TID usage: %u IP users, %u IPv6 users", t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV4), t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV6)); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_err_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); mtx_lock(&sc->reg_lock); t4_tp_get_err_stats(sc, &stats); mtx_unlock(&sc->reg_lock); if (sc->chip_params->nchan > 2) { sbuf_printf(sb, " channel 0 channel 1" " channel 2 channel 3\n"); sbuf_printf(sb, "macInErrs: %10u %10u %10u %10u\n", stats.mac_in_errs[0], stats.mac_in_errs[1], stats.mac_in_errs[2], stats.mac_in_errs[3]); sbuf_printf(sb, "hdrInErrs: %10u %10u %10u %10u\n", stats.hdr_in_errs[0], stats.hdr_in_errs[1], stats.hdr_in_errs[2], stats.hdr_in_errs[3]); sbuf_printf(sb, "tcpInErrs: %10u %10u %10u %10u\n", stats.tcp_in_errs[0], stats.tcp_in_errs[1], stats.tcp_in_errs[2], stats.tcp_in_errs[3]); sbuf_printf(sb, "tcp6InErrs: %10u %10u %10u %10u\n", stats.tcp6_in_errs[0], stats.tcp6_in_errs[1], stats.tcp6_in_errs[2], stats.tcp6_in_errs[3]); sbuf_printf(sb, "tnlCongDrops: %10u %10u %10u %10u\n", stats.tnl_cong_drops[0], stats.tnl_cong_drops[1], stats.tnl_cong_drops[2], stats.tnl_cong_drops[3]); sbuf_printf(sb, "tnlTxDrops: %10u %10u %10u %10u\n", stats.tnl_tx_drops[0], stats.tnl_tx_drops[1], stats.tnl_tx_drops[2], stats.tnl_tx_drops[3]); sbuf_printf(sb, "ofldVlanDrops: %10u %10u %10u %10u\n", stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1], stats.ofld_vlan_drops[2], stats.ofld_vlan_drops[3]); sbuf_printf(sb, "ofldChanDrops: %10u %10u %10u %10u\n\n", stats.ofld_chan_drops[0], stats.ofld_chan_drops[1], stats.ofld_chan_drops[2], stats.ofld_chan_drops[3]); } else { sbuf_printf(sb, " channel 0 channel 1\n"); sbuf_printf(sb, "macInErrs: %10u %10u\n", stats.mac_in_errs[0], stats.mac_in_errs[1]); sbuf_printf(sb, "hdrInErrs: %10u %10u\n", stats.hdr_in_errs[0], stats.hdr_in_errs[1]); sbuf_printf(sb, "tcpInErrs: %10u %10u\n", stats.tcp_in_errs[0], stats.tcp_in_errs[1]); sbuf_printf(sb, "tcp6InErrs: %10u %10u\n", stats.tcp6_in_errs[0], stats.tcp6_in_errs[1]); sbuf_printf(sb, "tnlCongDrops: %10u %10u\n", stats.tnl_cong_drops[0], stats.tnl_cong_drops[1]); sbuf_printf(sb, "tnlTxDrops: %10u %10u\n", stats.tnl_tx_drops[0], stats.tnl_tx_drops[1]); sbuf_printf(sb, "ofldVlanDrops: %10u %10u\n", stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1]); sbuf_printf(sb, "ofldChanDrops: %10u %10u\n\n", stats.ofld_chan_drops[0], stats.ofld_chan_drops[1]); } sbuf_printf(sb, "ofldNoNeigh: %u\nofldCongDefer: %u", stats.ofld_no_neigh, stats.ofld_cong_defer); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct tp_params *tpp = &sc->params.tp; u_int mask; int rc; mask = tpp->la_mask >> 16; rc = sysctl_handle_int(oidp, &mask, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (mask > 0xffff) return (EINVAL); tpp->la_mask = mask << 16; t4_set_reg_field(sc, A_TP_DBG_LA_CONFIG, 0xffff0000U, tpp->la_mask); return (0); } struct field_desc { const char *name; u_int start; u_int width; }; static void field_desc_show(struct sbuf *sb, uint64_t v, const struct field_desc *f) { char buf[32]; int line_size = 0; while (f->name) { uint64_t mask = (1ULL << f->width) - 1; int len = snprintf(buf, sizeof(buf), "%s: %ju", f->name, ((uintmax_t)v >> f->start) & mask); if (line_size + len >= 79) { line_size = 8; sbuf_printf(sb, "\n "); } sbuf_printf(sb, "%s ", buf); line_size += len + 1; f++; } sbuf_printf(sb, "\n"); } static const struct field_desc tp_la0[] = { { "RcfOpCodeOut", 60, 4 }, { "State", 56, 4 }, { "WcfState", 52, 4 }, { "RcfOpcSrcOut", 50, 2 }, { "CRxError", 49, 1 }, { "ERxError", 48, 1 }, { "SanityFailed", 47, 1 }, { "SpuriousMsg", 46, 1 }, { "FlushInputMsg", 45, 1 }, { "FlushInputCpl", 44, 1 }, { "RssUpBit", 43, 1 }, { "RssFilterHit", 42, 1 }, { "Tid", 32, 10 }, { "InitTcb", 31, 1 }, { "LineNumber", 24, 7 }, { "Emsg", 23, 1 }, { "EdataOut", 22, 1 }, { "Cmsg", 21, 1 }, { "CdataOut", 20, 1 }, { "EreadPdu", 19, 1 }, { "CreadPdu", 18, 1 }, { "TunnelPkt", 17, 1 }, { "RcfPeerFin", 16, 1 }, { "RcfReasonOut", 12, 4 }, { "TxCchannel", 10, 2 }, { "RcfTxChannel", 8, 2 }, { "RxEchannel", 6, 2 }, { "RcfRxChannel", 5, 1 }, { "RcfDataOutSrdy", 4, 1 }, { "RxDvld", 3, 1 }, { "RxOoDvld", 2, 1 }, { "RxCongestion", 1, 1 }, { "TxCongestion", 0, 1 }, { NULL } }; static const struct field_desc tp_la1[] = { { "CplCmdIn", 56, 8 }, { "CplCmdOut", 48, 8 }, { "ESynOut", 47, 1 }, { "EAckOut", 46, 1 }, { "EFinOut", 45, 1 }, { "ERstOut", 44, 1 }, { "SynIn", 43, 1 }, { "AckIn", 42, 1 }, { "FinIn", 41, 1 }, { "RstIn", 40, 1 }, { "DataIn", 39, 1 }, { "DataInVld", 38, 1 }, { "PadIn", 37, 1 }, { "RxBufEmpty", 36, 1 }, { "RxDdp", 35, 1 }, { "RxFbCongestion", 34, 1 }, { "TxFbCongestion", 33, 1 }, { "TxPktSumSrdy", 32, 1 }, { "RcfUlpType", 28, 4 }, { "Eread", 27, 1 }, { "Ebypass", 26, 1 }, { "Esave", 25, 1 }, { "Static0", 24, 1 }, { "Cread", 23, 1 }, { "Cbypass", 22, 1 }, { "Csave", 21, 1 }, { "CPktOut", 20, 1 }, { "RxPagePoolFull", 18, 2 }, { "RxLpbkPkt", 17, 1 }, { "TxLpbkPkt", 16, 1 }, { "RxVfValid", 15, 1 }, { "SynLearned", 14, 1 }, { "SetDelEntry", 13, 1 }, { "SetInvEntry", 12, 1 }, { "CpcmdDvld", 11, 1 }, { "CpcmdSave", 10, 1 }, { "RxPstructsFull", 8, 2 }, { "EpcmdDvld", 7, 1 }, { "EpcmdFlush", 6, 1 }, { "EpcmdTrimPrefix", 5, 1 }, { "EpcmdTrimPostfix", 4, 1 }, { "ERssIp4Pkt", 3, 1 }, { "ERssIp6Pkt", 2, 1 }, { "ERssTcpUdpPkt", 1, 1 }, { "ERssFceFipPkt", 0, 1 }, { NULL } }; static const struct field_desc tp_la2[] = { { "CplCmdIn", 56, 8 }, { "MpsVfVld", 55, 1 }, { "MpsPf", 52, 3 }, { "MpsVf", 44, 8 }, { "SynIn", 43, 1 }, { "AckIn", 42, 1 }, { "FinIn", 41, 1 }, { "RstIn", 40, 1 }, { "DataIn", 39, 1 }, { "DataInVld", 38, 1 }, { "PadIn", 37, 1 }, { "RxBufEmpty", 36, 1 }, { "RxDdp", 35, 1 }, { "RxFbCongestion", 34, 1 }, { "TxFbCongestion", 33, 1 }, { "TxPktSumSrdy", 32, 1 }, { "RcfUlpType", 28, 4 }, { "Eread", 27, 1 }, { "Ebypass", 26, 1 }, { "Esave", 25, 1 }, { "Static0", 24, 1 }, { "Cread", 23, 1 }, { "Cbypass", 22, 1 }, { "Csave", 21, 1 }, { "CPktOut", 20, 1 }, { "RxPagePoolFull", 18, 2 }, { "RxLpbkPkt", 17, 1 }, { "TxLpbkPkt", 16, 1 }, { "RxVfValid", 15, 1 }, { "SynLearned", 14, 1 }, { "SetDelEntry", 13, 1 }, { "SetInvEntry", 12, 1 }, { "CpcmdDvld", 11, 1 }, { "CpcmdSave", 10, 1 }, { "RxPstructsFull", 8, 2 }, { "EpcmdDvld", 7, 1 }, { "EpcmdFlush", 6, 1 }, { "EpcmdTrimPrefix", 5, 1 }, { "EpcmdTrimPostfix", 4, 1 }, { "ERssIp4Pkt", 3, 1 }, { "ERssIp6Pkt", 2, 1 }, { "ERssTcpUdpPkt", 1, 1 }, { "ERssFceFipPkt", 0, 1 }, { NULL } }; static void tp_la_show(struct sbuf *sb, uint64_t *p, int idx) { field_desc_show(sb, *p, tp_la0); } static void tp_la_show2(struct sbuf *sb, uint64_t *p, int idx) { if (idx) sbuf_printf(sb, "\n"); field_desc_show(sb, p[0], tp_la0); if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL) field_desc_show(sb, p[1], tp_la0); } static void tp_la_show3(struct sbuf *sb, uint64_t *p, int idx) { if (idx) sbuf_printf(sb, "\n"); field_desc_show(sb, p[0], tp_la0); if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL) field_desc_show(sb, p[1], (p[0] & (1 << 17)) ? tp_la2 : tp_la1); } static int sysctl_tp_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; uint64_t *buf, *p; int rc; u_int i, inc; void (*show_func)(struct sbuf *, uint64_t *, int); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(TPLA_SIZE * sizeof(uint64_t), M_CXGBE, M_ZERO | M_WAITOK); t4_tp_read_la(sc, buf, NULL); p = buf; switch (G_DBGLAMODE(t4_read_reg(sc, A_TP_DBG_LA_CONFIG))) { case 2: inc = 2; show_func = tp_la_show2; break; case 3: inc = 2; show_func = tp_la_show3; break; default: inc = 1; show_func = tp_la_show; } for (i = 0; i < TPLA_SIZE / inc; i++, p += inc) (*show_func)(sb, p, i); rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; u64 nrate[MAX_NCHAN], orate[MAX_NCHAN]; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_get_chan_txrate(sc, nrate, orate); if (sc->chip_params->nchan > 2) { sbuf_printf(sb, " channel 0 channel 1" " channel 2 channel 3\n"); sbuf_printf(sb, "NIC B/s: %10ju %10ju %10ju %10ju\n", nrate[0], nrate[1], nrate[2], nrate[3]); sbuf_printf(sb, "Offload B/s: %10ju %10ju %10ju %10ju", orate[0], orate[1], orate[2], orate[3]); } else { sbuf_printf(sb, " channel 0 channel 1\n"); sbuf_printf(sb, "NIC B/s: %10ju %10ju\n", nrate[0], nrate[1]); sbuf_printf(sb, "Offload B/s: %10ju %10ju", orate[0], orate[1]); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; uint32_t *buf, *p; int rc, i; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(ULPRX_LA_SIZE * 8 * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); t4_ulprx_read_la(sc, buf); p = buf; sbuf_printf(sb, " Pcmd Type Message" " Data"); for (i = 0; i < ULPRX_LA_SIZE; i++, p += 8) { sbuf_printf(sb, "\n%08x%08x %4x %08x %08x%08x%08x%08x", p[1], p[0], p[2], p[3], p[7], p[6], p[5], p[4]); } rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, v; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); v = t4_read_reg(sc, A_SGE_STAT_CFG); if (G_STATSOURCE_T5(v) == 7) { if (G_STATMODE(v) == 0) { sbuf_printf(sb, "total %d, incomplete %d", t4_read_reg(sc, A_SGE_STAT_TOTAL), t4_read_reg(sc, A_SGE_STAT_MATCH)); } else if (G_STATMODE(v) == 1) { sbuf_printf(sb, "total %d, data overflow %d", t4_read_reg(sc, A_SGE_STAT_TOTAL), t4_read_reg(sc, A_SGE_STAT_MATCH)); } } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tc_params(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct tx_sched_class *tc; struct t4_sched_class_params p; struct sbuf *sb; int i, rc, port_id, flags, mbps, gbps; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); port_id = arg2 >> 16; MPASS(port_id < sc->params.nports); MPASS(sc->port[port_id] != NULL); i = arg2 & 0xffff; MPASS(i < sc->chip_params->nsched_cls); tc = &sc->port[port_id]->tc[i]; rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4tc_p"); if (rc) goto done; flags = tc->flags; p = tc->params; end_synchronized_op(sc, LOCK_HELD); if ((flags & TX_SC_OK) == 0) { sbuf_printf(sb, "none"); goto done; } if (p.level == SCHED_CLASS_LEVEL_CL_WRR) { sbuf_printf(sb, "cl-wrr weight %u", p.weight); goto done; } else if (p.level == SCHED_CLASS_LEVEL_CL_RL) sbuf_printf(sb, "cl-rl"); else if (p.level == SCHED_CLASS_LEVEL_CH_RL) sbuf_printf(sb, "ch-rl"); else { rc = ENXIO; goto done; } if (p.ratemode == SCHED_CLASS_RATEMODE_REL) { /* XXX: top speed or actual link speed? */ gbps = port_top_speed(sc->port[port_id]); sbuf_printf(sb, " %u%% of %uGbps", p.maxrate, gbps); } else if (p.ratemode == SCHED_CLASS_RATEMODE_ABS) { switch (p.rateunit) { case SCHED_CLASS_RATEUNIT_BITS: mbps = p.maxrate / 1000; gbps = p.maxrate / 1000000; if (p.maxrate == gbps * 1000000) sbuf_printf(sb, " %uGbps", gbps); else if (p.maxrate == mbps * 1000) sbuf_printf(sb, " %uMbps", mbps); else sbuf_printf(sb, " %uKbps", p.maxrate); break; case SCHED_CLASS_RATEUNIT_PKTS: sbuf_printf(sb, " %upps", p.maxrate); break; default: rc = ENXIO; goto done; } } switch (p.mode) { case SCHED_CLASS_MODE_CLASS: sbuf_printf(sb, " aggregate"); break; case SCHED_CLASS_MODE_FLOW: sbuf_printf(sb, " per-flow"); break; default: rc = ENXIO; goto done; } done: if (rc == 0) rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } #endif #ifdef TCP_OFFLOAD static void unit_conv(char *buf, size_t len, u_int val, u_int factor) { u_int rem = val % factor; if (rem == 0) snprintf(buf, len, "%u", val / factor); else { while (rem % 10 == 0) rem /= 10; snprintf(buf, len, "%u.%u", val / factor, rem); } } static int sysctl_tp_tick(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; char buf[16]; u_int res, re; u_int cclk_ps = 1000000000 / sc->params.vpd.cclk; res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION); switch (arg2) { case 0: /* timer_tick */ re = G_TIMERRESOLUTION(res); break; case 1: /* TCP timestamp tick */ re = G_TIMESTAMPRESOLUTION(res); break; case 2: /* DACK tick */ re = G_DELAYEDACKRESOLUTION(res); break; default: return (EDOOFUS); } unit_conv(buf, sizeof(buf), (cclk_ps << re), 1000000); return (sysctl_handle_string(oidp, buf, sizeof(buf), req)); } static int sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int res, dack_re, v; u_int cclk_ps = 1000000000 / sc->params.vpd.cclk; res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION); dack_re = G_DELAYEDACKRESOLUTION(res); v = ((cclk_ps << dack_re) / 1000000) * t4_read_reg(sc, A_TP_DACK_TIMER); return (sysctl_handle_int(oidp, &v, 0, req)); } static int sysctl_tp_timer(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int reg = arg2; u_int tre; u_long tp_tick_us, v; u_int cclk_ps = 1000000000 / sc->params.vpd.cclk; MPASS(reg == A_TP_RXT_MIN || reg == A_TP_RXT_MAX || reg == A_TP_PERS_MIN || reg == A_TP_PERS_MAX || reg == A_TP_KEEP_IDLE || A_TP_KEEP_INTVL || reg == A_TP_INIT_SRTT || reg == A_TP_FINWAIT2_TIMER); tre = G_TIMERRESOLUTION(t4_read_reg(sc, A_TP_TIMER_RESOLUTION)); tp_tick_us = (cclk_ps << tre) / 1000000; if (reg == A_TP_INIT_SRTT) v = tp_tick_us * G_INITSRTT(t4_read_reg(sc, reg)); else v = tp_tick_us * t4_read_reg(sc, reg); return (sysctl_handle_long(oidp, &v, 0, req)); } #endif static uint32_t fconf_iconf_to_mode(uint32_t fconf, uint32_t iconf) { uint32_t mode; mode = T4_FILTER_IPv4 | T4_FILTER_IPv6 | T4_FILTER_IP_SADDR | T4_FILTER_IP_DADDR | T4_FILTER_IP_SPORT | T4_FILTER_IP_DPORT; if (fconf & F_FRAGMENTATION) mode |= T4_FILTER_IP_FRAGMENT; if (fconf & F_MPSHITTYPE) mode |= T4_FILTER_MPS_HIT_TYPE; if (fconf & F_MACMATCH) mode |= T4_FILTER_MAC_IDX; if (fconf & F_ETHERTYPE) mode |= T4_FILTER_ETH_TYPE; if (fconf & F_PROTOCOL) mode |= T4_FILTER_IP_PROTO; if (fconf & F_TOS) mode |= T4_FILTER_IP_TOS; if (fconf & F_VLAN) mode |= T4_FILTER_VLAN; if (fconf & F_VNIC_ID) { mode |= T4_FILTER_VNIC; if (iconf & F_VNIC) mode |= T4_FILTER_IC_VNIC; } if (fconf & F_PORT) mode |= T4_FILTER_PORT; if (fconf & F_FCOE) mode |= T4_FILTER_FCoE; return (mode); } static uint32_t mode_to_fconf(uint32_t mode) { uint32_t fconf = 0; if (mode & T4_FILTER_IP_FRAGMENT) fconf |= F_FRAGMENTATION; if (mode & T4_FILTER_MPS_HIT_TYPE) fconf |= F_MPSHITTYPE; if (mode & T4_FILTER_MAC_IDX) fconf |= F_MACMATCH; if (mode & T4_FILTER_ETH_TYPE) fconf |= F_ETHERTYPE; if (mode & T4_FILTER_IP_PROTO) fconf |= F_PROTOCOL; if (mode & T4_FILTER_IP_TOS) fconf |= F_TOS; if (mode & T4_FILTER_VLAN) fconf |= F_VLAN; if (mode & T4_FILTER_VNIC) fconf |= F_VNIC_ID; if (mode & T4_FILTER_PORT) fconf |= F_PORT; if (mode & T4_FILTER_FCoE) fconf |= F_FCOE; return (fconf); } static uint32_t mode_to_iconf(uint32_t mode) { if (mode & T4_FILTER_IC_VNIC) return (F_VNIC); return (0); } static int check_fspec_against_fconf_iconf(struct adapter *sc, struct t4_filter_specification *fs) { struct tp_params *tpp = &sc->params.tp; uint32_t fconf = 0; if (fs->val.frag || fs->mask.frag) fconf |= F_FRAGMENTATION; if (fs->val.matchtype || fs->mask.matchtype) fconf |= F_MPSHITTYPE; if (fs->val.macidx || fs->mask.macidx) fconf |= F_MACMATCH; if (fs->val.ethtype || fs->mask.ethtype) fconf |= F_ETHERTYPE; if (fs->val.proto || fs->mask.proto) fconf |= F_PROTOCOL; if (fs->val.tos || fs->mask.tos) fconf |= F_TOS; if (fs->val.vlan_vld || fs->mask.vlan_vld) fconf |= F_VLAN; if (fs->val.ovlan_vld || fs->mask.ovlan_vld) { fconf |= F_VNIC_ID; if (tpp->ingress_config & F_VNIC) return (EINVAL); } if (fs->val.pfvf_vld || fs->mask.pfvf_vld) { fconf |= F_VNIC_ID; if ((tpp->ingress_config & F_VNIC) == 0) return (EINVAL); } if (fs->val.iport || fs->mask.iport) fconf |= F_PORT; if (fs->val.fcoe || fs->mask.fcoe) fconf |= F_FCOE; if ((tpp->vlan_pri_map | fconf) != tpp->vlan_pri_map) return (E2BIG); return (0); } static int get_filter_mode(struct adapter *sc, uint32_t *mode) { struct tp_params *tpp = &sc->params.tp; /* * We trust the cached values of the relevant TP registers. This means * things work reliably only if writes to those registers are always via * t4_set_filter_mode. */ *mode = fconf_iconf_to_mode(tpp->vlan_pri_map, tpp->ingress_config); return (0); } static int set_filter_mode(struct adapter *sc, uint32_t mode) { struct tp_params *tpp = &sc->params.tp; uint32_t fconf, iconf; int rc; iconf = mode_to_iconf(mode); if ((iconf ^ tpp->ingress_config) & F_VNIC) { /* * For now we just complain if A_TP_INGRESS_CONFIG is not * already set to the correct value for the requested filter * mode. It's not clear if it's safe to write to this register * on the fly. (And we trust the cached value of the register). */ return (EBUSY); } fconf = mode_to_fconf(mode); rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4setfm"); if (rc) return (rc); if (sc->tids.ftids_in_use > 0) { rc = EBUSY; goto done; } #ifdef TCP_OFFLOAD if (uld_active(sc, ULD_TOM)) { rc = EBUSY; goto done; } #endif rc = -t4_set_filter_mode(sc, fconf); done: end_synchronized_op(sc, LOCK_HELD); return (rc); } static inline uint64_t get_filter_hits(struct adapter *sc, uint32_t fid) { uint32_t tcb_addr; tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE) + (fid + sc->tids.ftid_base) * TCB_SIZE; if (is_t4(sc)) { uint64_t hits; read_via_memwin(sc, 0, tcb_addr + 16, (uint32_t *)&hits, 8); return (be64toh(hits)); } else { uint32_t hits; read_via_memwin(sc, 0, tcb_addr + 24, &hits, 4); return (be32toh(hits)); } } static int get_filter(struct adapter *sc, struct t4_filter *t) { int i, rc, nfilters = sc->tids.nftids; struct filter_entry *f; rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4getf"); if (rc) return (rc); if (sc->tids.ftids_in_use == 0 || sc->tids.ftid_tab == NULL || t->idx >= nfilters) { t->idx = 0xffffffff; goto done; } f = &sc->tids.ftid_tab[t->idx]; for (i = t->idx; i < nfilters; i++, f++) { if (f->valid) { t->idx = i; t->l2tidx = f->l2t ? f->l2t->idx : 0; t->smtidx = f->smtidx; if (f->fs.hitcnts) t->hits = get_filter_hits(sc, t->idx); else t->hits = UINT64_MAX; t->fs = f->fs; goto done; } } t->idx = 0xffffffff; done: end_synchronized_op(sc, LOCK_HELD); return (0); } static int set_filter(struct adapter *sc, struct t4_filter *t) { unsigned int nfilters, nports; struct filter_entry *f; int i, rc; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setf"); if (rc) return (rc); nfilters = sc->tids.nftids; nports = sc->params.nports; if (nfilters == 0) { rc = ENOTSUP; goto done; } if (!(sc->flags & FULL_INIT_DONE)) { rc = EAGAIN; goto done; } if (t->idx >= nfilters) { rc = EINVAL; goto done; } /* Validate against the global filter mode and ingress config */ rc = check_fspec_against_fconf_iconf(sc, &t->fs); if (rc != 0) goto done; if (t->fs.action == FILTER_SWITCH && t->fs.eport >= nports) { rc = EINVAL; goto done; } if (t->fs.val.iport >= nports) { rc = EINVAL; goto done; } /* Can't specify an iq if not steering to it */ if (!t->fs.dirsteer && t->fs.iq) { rc = EINVAL; goto done; } /* IPv6 filter idx must be 4 aligned */ if (t->fs.type == 1 && ((t->idx & 0x3) || t->idx + 4 >= nfilters)) { rc = EINVAL; goto done; } if (sc->tids.ftid_tab == NULL) { KASSERT(sc->tids.ftids_in_use == 0, ("%s: no memory allocated but filters_in_use > 0", __func__)); sc->tids.ftid_tab = malloc(sizeof (struct filter_entry) * nfilters, M_CXGBE, M_NOWAIT | M_ZERO); if (sc->tids.ftid_tab == NULL) { rc = ENOMEM; goto done; } mtx_init(&sc->tids.ftid_lock, "T4 filters", 0, MTX_DEF); } for (i = 0; i < 4; i++) { f = &sc->tids.ftid_tab[t->idx + i]; if (f->pending || f->valid) { rc = EBUSY; goto done; } if (f->locked) { rc = EPERM; goto done; } if (t->fs.type == 0) break; } f = &sc->tids.ftid_tab[t->idx]; f->fs = t->fs; rc = set_filter_wr(sc, t->idx); done: end_synchronized_op(sc, 0); if (rc == 0) { mtx_lock(&sc->tids.ftid_lock); for (;;) { if (f->pending == 0) { rc = f->valid ? 0 : EIO; break; } if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock, PCATCH, "t4setfw", 0)) { rc = EINPROGRESS; break; } } mtx_unlock(&sc->tids.ftid_lock); } return (rc); } static int del_filter(struct adapter *sc, struct t4_filter *t) { unsigned int nfilters; struct filter_entry *f; int rc; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4delf"); if (rc) return (rc); nfilters = sc->tids.nftids; if (nfilters == 0) { rc = ENOTSUP; goto done; } if (sc->tids.ftid_tab == NULL || sc->tids.ftids_in_use == 0 || t->idx >= nfilters) { rc = EINVAL; goto done; } if (!(sc->flags & FULL_INIT_DONE)) { rc = EAGAIN; goto done; } f = &sc->tids.ftid_tab[t->idx]; if (f->pending) { rc = EBUSY; goto done; } if (f->locked) { rc = EPERM; goto done; } if (f->valid) { t->fs = f->fs; /* extra info for the caller */ rc = del_filter_wr(sc, t->idx); } done: end_synchronized_op(sc, 0); if (rc == 0) { mtx_lock(&sc->tids.ftid_lock); for (;;) { if (f->pending == 0) { rc = f->valid ? EIO : 0; break; } if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock, PCATCH, "t4delfw", 0)) { rc = EINPROGRESS; break; } } mtx_unlock(&sc->tids.ftid_lock); } return (rc); } static void clear_filter(struct filter_entry *f) { if (f->l2t) t4_l2t_release(f->l2t); bzero(f, sizeof (*f)); } static int set_filter_wr(struct adapter *sc, int fidx) { struct filter_entry *f = &sc->tids.ftid_tab[fidx]; struct fw_filter_wr *fwr; unsigned int ftid, vnic_vld, vnic_vld_mask; struct wrq_cookie cookie; ASSERT_SYNCHRONIZED_OP(sc); if (f->fs.newdmac || f->fs.newvlan) { /* This filter needs an L2T entry; allocate one. */ f->l2t = t4_l2t_alloc_switching(sc->l2t); if (f->l2t == NULL) return (EAGAIN); if (t4_l2t_set_switching(sc, f->l2t, f->fs.vlan, f->fs.eport, f->fs.dmac)) { t4_l2t_release(f->l2t); f->l2t = NULL; return (ENOMEM); } } /* Already validated against fconf, iconf */ MPASS((f->fs.val.pfvf_vld & f->fs.val.ovlan_vld) == 0); MPASS((f->fs.mask.pfvf_vld & f->fs.mask.ovlan_vld) == 0); if (f->fs.val.pfvf_vld || f->fs.val.ovlan_vld) vnic_vld = 1; else vnic_vld = 0; if (f->fs.mask.pfvf_vld || f->fs.mask.ovlan_vld) vnic_vld_mask = 1; else vnic_vld_mask = 0; ftid = sc->tids.ftid_base + fidx; fwr = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*fwr), 16), &cookie); if (fwr == NULL) return (ENOMEM); bzero(fwr, sizeof(*fwr)); fwr->op_pkd = htobe32(V_FW_WR_OP(FW_FILTER_WR)); fwr->len16_pkd = htobe32(FW_LEN16(*fwr)); fwr->tid_to_iq = htobe32(V_FW_FILTER_WR_TID(ftid) | V_FW_FILTER_WR_RQTYPE(f->fs.type) | V_FW_FILTER_WR_NOREPLY(0) | V_FW_FILTER_WR_IQ(f->fs.iq)); fwr->del_filter_to_l2tix = htobe32(V_FW_FILTER_WR_RPTTID(f->fs.rpttid) | V_FW_FILTER_WR_DROP(f->fs.action == FILTER_DROP) | V_FW_FILTER_WR_DIRSTEER(f->fs.dirsteer) | V_FW_FILTER_WR_MASKHASH(f->fs.maskhash) | V_FW_FILTER_WR_DIRSTEERHASH(f->fs.dirsteerhash) | V_FW_FILTER_WR_LPBK(f->fs.action == FILTER_SWITCH) | V_FW_FILTER_WR_DMAC(f->fs.newdmac) | V_FW_FILTER_WR_SMAC(f->fs.newsmac) | V_FW_FILTER_WR_INSVLAN(f->fs.newvlan == VLAN_INSERT || f->fs.newvlan == VLAN_REWRITE) | V_FW_FILTER_WR_RMVLAN(f->fs.newvlan == VLAN_REMOVE || f->fs.newvlan == VLAN_REWRITE) | V_FW_FILTER_WR_HITCNTS(f->fs.hitcnts) | V_FW_FILTER_WR_TXCHAN(f->fs.eport) | V_FW_FILTER_WR_PRIO(f->fs.prio) | V_FW_FILTER_WR_L2TIX(f->l2t ? f->l2t->idx : 0)); fwr->ethtype = htobe16(f->fs.val.ethtype); fwr->ethtypem = htobe16(f->fs.mask.ethtype); fwr->frag_to_ovlan_vldm = (V_FW_FILTER_WR_FRAG(f->fs.val.frag) | V_FW_FILTER_WR_FRAGM(f->fs.mask.frag) | V_FW_FILTER_WR_IVLAN_VLD(f->fs.val.vlan_vld) | V_FW_FILTER_WR_OVLAN_VLD(vnic_vld) | V_FW_FILTER_WR_IVLAN_VLDM(f->fs.mask.vlan_vld) | V_FW_FILTER_WR_OVLAN_VLDM(vnic_vld_mask)); fwr->smac_sel = 0; fwr->rx_chan_rx_rpl_iq = htobe16(V_FW_FILTER_WR_RX_CHAN(0) | V_FW_FILTER_WR_RX_RPL_IQ(sc->sge.fwq.abs_id)); fwr->maci_to_matchtypem = htobe32(V_FW_FILTER_WR_MACI(f->fs.val.macidx) | V_FW_FILTER_WR_MACIM(f->fs.mask.macidx) | V_FW_FILTER_WR_FCOE(f->fs.val.fcoe) | V_FW_FILTER_WR_FCOEM(f->fs.mask.fcoe) | V_FW_FILTER_WR_PORT(f->fs.val.iport) | V_FW_FILTER_WR_PORTM(f->fs.mask.iport) | V_FW_FILTER_WR_MATCHTYPE(f->fs.val.matchtype) | V_FW_FILTER_WR_MATCHTYPEM(f->fs.mask.matchtype)); fwr->ptcl = f->fs.val.proto; fwr->ptclm = f->fs.mask.proto; fwr->ttyp = f->fs.val.tos; fwr->ttypm = f->fs.mask.tos; fwr->ivlan = htobe16(f->fs.val.vlan); fwr->ivlanm = htobe16(f->fs.mask.vlan); fwr->ovlan = htobe16(f->fs.val.vnic); fwr->ovlanm = htobe16(f->fs.mask.vnic); bcopy(f->fs.val.dip, fwr->lip, sizeof (fwr->lip)); bcopy(f->fs.mask.dip, fwr->lipm, sizeof (fwr->lipm)); bcopy(f->fs.val.sip, fwr->fip, sizeof (fwr->fip)); bcopy(f->fs.mask.sip, fwr->fipm, sizeof (fwr->fipm)); fwr->lp = htobe16(f->fs.val.dport); fwr->lpm = htobe16(f->fs.mask.dport); fwr->fp = htobe16(f->fs.val.sport); fwr->fpm = htobe16(f->fs.mask.sport); if (f->fs.newsmac) bcopy(f->fs.smac, fwr->sma, sizeof (fwr->sma)); f->pending = 1; sc->tids.ftids_in_use++; commit_wrq_wr(&sc->sge.mgmtq, fwr, &cookie); return (0); } static int del_filter_wr(struct adapter *sc, int fidx) { struct filter_entry *f = &sc->tids.ftid_tab[fidx]; struct fw_filter_wr *fwr; unsigned int ftid; struct wrq_cookie cookie; ftid = sc->tids.ftid_base + fidx; fwr = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*fwr), 16), &cookie); if (fwr == NULL) return (ENOMEM); bzero(fwr, sizeof (*fwr)); t4_mk_filtdelwr(ftid, fwr, sc->sge.fwq.abs_id); f->pending = 1; commit_wrq_wr(&sc->sge.mgmtq, fwr, &cookie); return (0); } int t4_filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_set_tcb_rpl *rpl = (const void *)(rss + 1); unsigned int idx = GET_TID(rpl); unsigned int rc; struct filter_entry *f; KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, rss->opcode)); if (is_ftid(sc, idx)) { idx -= sc->tids.ftid_base; f = &sc->tids.ftid_tab[idx]; rc = G_COOKIE(rpl->cookie); mtx_lock(&sc->tids.ftid_lock); if (rc == FW_FILTER_WR_FLT_ADDED) { KASSERT(f->pending, ("%s: filter[%u] isn't pending.", __func__, idx)); f->smtidx = (be64toh(rpl->oldval) >> 24) & 0xff; f->pending = 0; /* asynchronous setup completed */ f->valid = 1; } else { if (rc != FW_FILTER_WR_FLT_DELETED) { /* Add or delete failed, display an error */ log(LOG_ERR, "filter %u setup failed with error %u\n", idx, rc); } clear_filter(f); sc->tids.ftids_in_use--; } wakeup(&sc->tids.ftid_tab); mtx_unlock(&sc->tids.ftid_lock); } return (0); } static int get_sge_context(struct adapter *sc, struct t4_sge_context *cntxt) { int rc; if (cntxt->cid > M_CTXTQID) return (EINVAL); if (cntxt->mem_id != CTXT_EGRESS && cntxt->mem_id != CTXT_INGRESS && cntxt->mem_id != CTXT_FLM && cntxt->mem_id != CTXT_CNM) return (EINVAL); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ctxt"); if (rc) return (rc); if (sc->flags & FW_OK) { rc = -t4_sge_ctxt_rd(sc, sc->mbox, cntxt->cid, cntxt->mem_id, &cntxt->data[0]); if (rc == 0) goto done; } /* * Read via firmware failed or wasn't even attempted. Read directly via * the backdoor. */ rc = -t4_sge_ctxt_rd_bd(sc, cntxt->cid, cntxt->mem_id, &cntxt->data[0]); done: end_synchronized_op(sc, 0); return (rc); } static int load_fw(struct adapter *sc, struct t4_data *fw) { int rc; uint8_t *fw_data; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldfw"); if (rc) return (rc); if (sc->flags & FULL_INIT_DONE) { rc = EBUSY; goto done; } fw_data = malloc(fw->len, M_CXGBE, M_WAITOK); if (fw_data == NULL) { rc = ENOMEM; goto done; } rc = copyin(fw->data, fw_data, fw->len); if (rc == 0) rc = -t4_load_fw(sc, fw_data, fw->len); free(fw_data, M_CXGBE); done: end_synchronized_op(sc, 0); return (rc); } #define MAX_READ_BUF_SIZE (128 * 1024) static int read_card_mem(struct adapter *sc, int win, struct t4_mem_range *mr) { uint32_t addr, remaining, n; uint32_t *buf; int rc; uint8_t *dst; rc = validate_mem_range(sc, mr->addr, mr->len); if (rc != 0) return (rc); buf = malloc(min(mr->len, MAX_READ_BUF_SIZE), M_CXGBE, M_WAITOK); addr = mr->addr; remaining = mr->len; dst = (void *)mr->data; while (remaining) { n = min(remaining, MAX_READ_BUF_SIZE); read_via_memwin(sc, 2, addr, buf, n); rc = copyout(buf, dst, n); if (rc != 0) break; dst += n; remaining -= n; addr += n; } free(buf, M_CXGBE); return (rc); } #undef MAX_READ_BUF_SIZE static int read_i2c(struct adapter *sc, struct t4_i2c_data *i2cd) { int rc; if (i2cd->len == 0 || i2cd->port_id >= sc->params.nports) return (EINVAL); if (i2cd->len > sizeof(i2cd->data)) return (EFBIG); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4i2crd"); if (rc) return (rc); rc = -t4_i2c_rd(sc, sc->mbox, i2cd->port_id, i2cd->dev_addr, i2cd->offset, i2cd->len, &i2cd->data[0]); end_synchronized_op(sc, 0); return (rc); } static int in_range(int val, int lo, int hi) { return (val < 0 || (val <= hi && val >= lo)); } static int set_sched_class_config(struct adapter *sc, int minmax) { int rc; if (minmax < 0) return (EINVAL); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4sscc"); if (rc) return (rc); rc = -t4_sched_config(sc, FW_SCHED_TYPE_PKTSCHED, minmax, 1); end_synchronized_op(sc, 0); return (rc); } static int set_sched_class_params(struct adapter *sc, struct t4_sched_class_params *p, int sleep_ok) { int rc, top_speed, fw_level, fw_mode, fw_rateunit, fw_ratemode; struct port_info *pi; struct tx_sched_class *tc; if (p->level == SCHED_CLASS_LEVEL_CL_RL) fw_level = FW_SCHED_PARAMS_LEVEL_CL_RL; else if (p->level == SCHED_CLASS_LEVEL_CL_WRR) fw_level = FW_SCHED_PARAMS_LEVEL_CL_WRR; else if (p->level == SCHED_CLASS_LEVEL_CH_RL) fw_level = FW_SCHED_PARAMS_LEVEL_CH_RL; else return (EINVAL); if (p->mode == SCHED_CLASS_MODE_CLASS) fw_mode = FW_SCHED_PARAMS_MODE_CLASS; else if (p->mode == SCHED_CLASS_MODE_FLOW) fw_mode = FW_SCHED_PARAMS_MODE_FLOW; else return (EINVAL); if (p->rateunit == SCHED_CLASS_RATEUNIT_BITS) fw_rateunit = FW_SCHED_PARAMS_UNIT_BITRATE; else if (p->rateunit == SCHED_CLASS_RATEUNIT_PKTS) fw_rateunit = FW_SCHED_PARAMS_UNIT_PKTRATE; else return (EINVAL); if (p->ratemode == SCHED_CLASS_RATEMODE_REL) fw_ratemode = FW_SCHED_PARAMS_RATE_REL; else if (p->ratemode == SCHED_CLASS_RATEMODE_ABS) fw_ratemode = FW_SCHED_PARAMS_RATE_ABS; else return (EINVAL); /* Vet our parameters ... */ if (!in_range(p->channel, 0, sc->chip_params->nchan - 1)) return (ERANGE); pi = sc->port[sc->chan_map[p->channel]]; if (pi == NULL) return (ENXIO); MPASS(pi->tx_chan == p->channel); top_speed = port_top_speed(pi) * 1000000; /* Gbps -> Kbps */ if (!in_range(p->cl, 0, sc->chip_params->nsched_cls) || !in_range(p->minrate, 0, top_speed) || !in_range(p->maxrate, 0, top_speed) || !in_range(p->weight, 0, 100)) return (ERANGE); /* * Translate any unset parameters into the firmware's * nomenclature and/or fail the call if the parameters * are required ... */ if (p->rateunit < 0 || p->ratemode < 0 || p->channel < 0 || p->cl < 0) return (EINVAL); if (p->minrate < 0) p->minrate = 0; if (p->maxrate < 0) { if (p->level == SCHED_CLASS_LEVEL_CL_RL || p->level == SCHED_CLASS_LEVEL_CH_RL) return (EINVAL); else p->maxrate = 0; } if (p->weight < 0) { if (p->level == SCHED_CLASS_LEVEL_CL_WRR) return (EINVAL); else p->weight = 0; } if (p->pktsize < 0) { if (p->level == SCHED_CLASS_LEVEL_CL_RL || p->level == SCHED_CLASS_LEVEL_CH_RL) return (EINVAL); else p->pktsize = 0; } rc = begin_synchronized_op(sc, NULL, sleep_ok ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4sscp"); if (rc) return (rc); tc = &pi->tc[p->cl]; tc->params = *p; rc = -t4_sched_params(sc, FW_SCHED_TYPE_PKTSCHED, fw_level, fw_mode, fw_rateunit, fw_ratemode, p->channel, p->cl, p->minrate, p->maxrate, p->weight, p->pktsize, sleep_ok); if (rc == 0) tc->flags |= TX_SC_OK; else { /* * Unknown state at this point, see tc->params for what was * attempted. */ tc->flags &= ~TX_SC_OK; } end_synchronized_op(sc, sleep_ok ? 0 : LOCK_HELD); return (rc); } static int set_sched_class(struct adapter *sc, struct t4_sched_params *p) { if (p->type != SCHED_CLASS_TYPE_PACKET) return (EINVAL); if (p->subcmd == SCHED_CLASS_SUBCMD_CONFIG) return (set_sched_class_config(sc, p->u.config.minmax)); if (p->subcmd == SCHED_CLASS_SUBCMD_PARAMS) return (set_sched_class_params(sc, &p->u.params, 1)); return (EINVAL); } static int set_sched_queue(struct adapter *sc, struct t4_sched_queue *p) { struct port_info *pi = NULL; struct vi_info *vi; struct sge_txq *txq; uint32_t fw_mnem, fw_queue, fw_class; int i, rc; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setsq"); if (rc) return (rc); if (p->port >= sc->params.nports) { rc = EINVAL; goto done; } /* XXX: Only supported for the main VI. */ pi = sc->port[p->port]; vi = &pi->vi[0]; if (!(vi->flags & VI_INIT_DONE)) { /* tx queues not set up yet */ rc = EAGAIN; goto done; } if (!in_range(p->queue, 0, vi->ntxq - 1) || !in_range(p->cl, 0, sc->chip_params->nsched_cls - 1)) { rc = EINVAL; goto done; } /* * Create a template for the FW_PARAMS_CMD mnemonic and value (TX * Scheduling Class in this case). */ fw_mnem = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH)); fw_class = p->cl < 0 ? 0xffffffff : p->cl; /* * If op.queue is non-negative, then we're only changing the scheduling * on a single specified TX queue. */ if (p->queue >= 0) { txq = &sc->sge.txq[vi->first_txq + p->queue]; fw_queue = (fw_mnem | V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id)); rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue, &fw_class); goto done; } /* * Change the scheduling on all the TX queues for the * interface. */ for_each_txq(vi, i, txq) { fw_queue = (fw_mnem | V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id)); rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_queue, &fw_class); if (rc) goto done; } rc = 0; done: end_synchronized_op(sc, 0); return (rc); } int t4_os_find_pci_capability(struct adapter *sc, int cap) { int i; return (pci_find_cap(sc->dev, cap, &i) == 0 ? i : 0); } int t4_os_pci_save_state(struct adapter *sc) { device_t dev; struct pci_devinfo *dinfo; dev = sc->dev; dinfo = device_get_ivars(dev); pci_cfg_save(dev, dinfo, 0); return (0); } int t4_os_pci_restore_state(struct adapter *sc) { device_t dev; struct pci_devinfo *dinfo; dev = sc->dev; dinfo = device_get_ivars(dev); pci_cfg_restore(dev, dinfo); return (0); } void t4_os_portmod_changed(const struct adapter *sc, int idx) { struct port_info *pi = sc->port[idx]; struct vi_info *vi; struct ifnet *ifp; int v; static const char *mod_str[] = { NULL, "LR", "SR", "ER", "TWINAX", "active TWINAX", "LRM" }; for_each_vi(pi, v, vi) { build_medialist(pi, &vi->media); } ifp = pi->vi[0].ifp; if (pi->mod_type == FW_PORT_MOD_TYPE_NONE) if_printf(ifp, "transceiver unplugged.\n"); else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN) if_printf(ifp, "unknown transceiver inserted.\n"); else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED) if_printf(ifp, "unsupported transceiver inserted.\n"); else if (pi->mod_type > 0 && pi->mod_type < nitems(mod_str)) { if_printf(ifp, "%s transceiver inserted.\n", mod_str[pi->mod_type]); } else { if_printf(ifp, "transceiver (type %d) inserted.\n", pi->mod_type); } } void t4_os_link_changed(struct adapter *sc, int idx, int link_stat, int reason) { struct port_info *pi = sc->port[idx]; struct vi_info *vi; struct ifnet *ifp; int v; if (link_stat) pi->linkdnrc = -1; else { if (reason >= 0) pi->linkdnrc = reason; } for_each_vi(pi, v, vi) { ifp = vi->ifp; if (ifp == NULL) continue; if (link_stat) { ifp->if_baudrate = IF_Mbps(pi->link_cfg.speed); if_link_state_change(ifp, LINK_STATE_UP); } else { if_link_state_change(ifp, LINK_STATE_DOWN); } } } void t4_iterate(void (*func)(struct adapter *, void *), void *arg) { struct adapter *sc; sx_slock(&t4_list_lock); SLIST_FOREACH(sc, &t4_list, link) { /* * func should not make any assumptions about what state sc is * in - the only guarantee is that sc->sc_lock is a valid lock. */ func(sc, arg); } sx_sunlock(&t4_list_lock); } static int t4_open(struct cdev *dev, int flags, int type, struct thread *td) { return (0); } static int t4_close(struct cdev *dev, int flags, int type, struct thread *td) { return (0); } static int t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag, struct thread *td) { int rc; struct adapter *sc = dev->si_drv1; rc = priv_check(td, PRIV_DRIVER); if (rc != 0) return (rc); switch (cmd) { case CHELSIO_T4_GETREG: { struct t4_reg *edata = (struct t4_reg *)data; if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len) return (EFAULT); if (edata->size == 4) edata->val = t4_read_reg(sc, edata->addr); else if (edata->size == 8) edata->val = t4_read_reg64(sc, edata->addr); else return (EINVAL); break; } case CHELSIO_T4_SETREG: { struct t4_reg *edata = (struct t4_reg *)data; if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len) return (EFAULT); if (edata->size == 4) { if (edata->val & 0xffffffff00000000) return (EINVAL); t4_write_reg(sc, edata->addr, (uint32_t) edata->val); } else if (edata->size == 8) t4_write_reg64(sc, edata->addr, edata->val); else return (EINVAL); break; } case CHELSIO_T4_REGDUMP: { struct t4_regdump *regs = (struct t4_regdump *)data; int reglen = is_t4(sc) ? T4_REGDUMP_SIZE : T5_REGDUMP_SIZE; uint8_t *buf; if (regs->len < reglen) { regs->len = reglen; /* hint to the caller */ return (ENOBUFS); } regs->len = reglen; buf = malloc(reglen, M_CXGBE, M_WAITOK | M_ZERO); get_regs(sc, regs, buf); rc = copyout(buf, regs->data, reglen); free(buf, M_CXGBE); break; } case CHELSIO_T4_GET_FILTER_MODE: rc = get_filter_mode(sc, (uint32_t *)data); break; case CHELSIO_T4_SET_FILTER_MODE: rc = set_filter_mode(sc, *(uint32_t *)data); break; case CHELSIO_T4_GET_FILTER: rc = get_filter(sc, (struct t4_filter *)data); break; case CHELSIO_T4_SET_FILTER: rc = set_filter(sc, (struct t4_filter *)data); break; case CHELSIO_T4_DEL_FILTER: rc = del_filter(sc, (struct t4_filter *)data); break; case CHELSIO_T4_GET_SGE_CONTEXT: rc = get_sge_context(sc, (struct t4_sge_context *)data); break; case CHELSIO_T4_LOAD_FW: rc = load_fw(sc, (struct t4_data *)data); break; case CHELSIO_T4_GET_MEM: rc = read_card_mem(sc, 2, (struct t4_mem_range *)data); break; case CHELSIO_T4_GET_I2C: rc = read_i2c(sc, (struct t4_i2c_data *)data); break; case CHELSIO_T4_CLEAR_STATS: { int i, v; u_int port_id = *(uint32_t *)data; struct port_info *pi; struct vi_info *vi; if (port_id >= sc->params.nports) return (EINVAL); pi = sc->port[port_id]; /* MAC stats */ t4_clr_port_stats(sc, pi->tx_chan); pi->tx_parse_error = 0; mtx_lock(&sc->reg_lock); for_each_vi(pi, v, vi) { if (vi->flags & VI_INIT_DONE) t4_clr_vi_stats(sc, vi->viid); } mtx_unlock(&sc->reg_lock); /* * Since this command accepts a port, clear stats for * all VIs on this port. */ for_each_vi(pi, v, vi) { if (vi->flags & VI_INIT_DONE) { struct sge_rxq *rxq; struct sge_txq *txq; struct sge_wrq *wrq; for_each_rxq(vi, i, rxq) { #if defined(INET) || defined(INET6) rxq->lro.lro_queued = 0; rxq->lro.lro_flushed = 0; #endif rxq->rxcsum = 0; rxq->vlan_extraction = 0; } for_each_txq(vi, i, txq) { txq->txcsum = 0; txq->tso_wrs = 0; txq->vlan_insertion = 0; txq->imm_wrs = 0; txq->sgl_wrs = 0; txq->txpkt_wrs = 0; txq->txpkts0_wrs = 0; txq->txpkts1_wrs = 0; txq->txpkts0_pkts = 0; txq->txpkts1_pkts = 0; mp_ring_reset_stats(txq->r); } #ifdef TCP_OFFLOAD /* nothing to clear for each ofld_rxq */ for_each_ofld_txq(vi, i, wrq) { wrq->tx_wrs_direct = 0; wrq->tx_wrs_copied = 0; } #endif if (IS_MAIN_VI(vi)) { wrq = &sc->sge.ctrlq[pi->port_id]; wrq->tx_wrs_direct = 0; wrq->tx_wrs_copied = 0; } } } break; } case CHELSIO_T4_SCHED_CLASS: rc = set_sched_class(sc, (struct t4_sched_params *)data); break; case CHELSIO_T4_SCHED_QUEUE: rc = set_sched_queue(sc, (struct t4_sched_queue *)data); break; case CHELSIO_T4_GET_TRACER: rc = t4_get_tracer(sc, (struct t4_tracer *)data); break; case CHELSIO_T4_SET_TRACER: rc = t4_set_tracer(sc, (struct t4_tracer *)data); break; default: rc = EINVAL; } return (rc); } void t4_db_full(struct adapter *sc) { CXGBE_UNIMPLEMENTED(__func__); } void t4_db_dropped(struct adapter *sc) { CXGBE_UNIMPLEMENTED(__func__); } #ifdef TCP_OFFLOAD void t4_iscsi_init(struct adapter *sc, u_int tag_mask, const u_int *pgsz_order) { t4_write_reg(sc, A_ULP_RX_ISCSI_TAGMASK, tag_mask); t4_write_reg(sc, A_ULP_RX_ISCSI_PSZ, V_HPZ0(pgsz_order[0]) | V_HPZ1(pgsz_order[1]) | V_HPZ2(pgsz_order[2]) | V_HPZ3(pgsz_order[3])); } static int toe_capability(struct vi_info *vi, int enable) { int rc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; ASSERT_SYNCHRONIZED_OP(sc); if (!is_offload(sc)) return (ENODEV); if (enable) { if ((vi->ifp->if_capenable & IFCAP_TOE) != 0) { /* TOE is already enabled. */ return (0); } /* * We need the port's queues around so that we're able to send * and receive CPLs to/from the TOE even if the ifnet for this * port has never been UP'd administratively. */ if (!(vi->flags & VI_INIT_DONE)) { - rc = cxgbe_init_synchronized(vi); + rc = vi_full_init(vi); if (rc) return (rc); } if (!(pi->vi[0].flags & VI_INIT_DONE)) { - rc = cxgbe_init_synchronized(&pi->vi[0]); + rc = vi_full_init(&pi->vi[0]); if (rc) return (rc); } if (isset(&sc->offload_map, pi->port_id)) { /* TOE is enabled on another VI of this port. */ pi->uld_vis++; return (0); } if (!uld_active(sc, ULD_TOM)) { rc = t4_activate_uld(sc, ULD_TOM); if (rc == EAGAIN) { log(LOG_WARNING, "You must kldload t4_tom.ko before trying " "to enable TOE on a cxgbe interface.\n"); } if (rc != 0) return (rc); KASSERT(sc->tom_softc != NULL, ("%s: TOM activated but softc NULL", __func__)); KASSERT(uld_active(sc, ULD_TOM), ("%s: TOM activated but flag not set", __func__)); } /* Activate iWARP and iSCSI too, if the modules are loaded. */ if (!uld_active(sc, ULD_IWARP)) (void) t4_activate_uld(sc, ULD_IWARP); if (!uld_active(sc, ULD_ISCSI)) (void) t4_activate_uld(sc, ULD_ISCSI); pi->uld_vis++; setbit(&sc->offload_map, pi->port_id); } else { pi->uld_vis--; if (!isset(&sc->offload_map, pi->port_id) || pi->uld_vis > 0) return (0); KASSERT(uld_active(sc, ULD_TOM), ("%s: TOM never initialized?", __func__)); clrbit(&sc->offload_map, pi->port_id); } return (0); } /* * Add an upper layer driver to the global list. */ int t4_register_uld(struct uld_info *ui) { int rc = 0; struct uld_info *u; sx_xlock(&t4_uld_list_lock); SLIST_FOREACH(u, &t4_uld_list, link) { if (u->uld_id == ui->uld_id) { rc = EEXIST; goto done; } } SLIST_INSERT_HEAD(&t4_uld_list, ui, link); ui->refcount = 0; done: sx_xunlock(&t4_uld_list_lock); return (rc); } int t4_unregister_uld(struct uld_info *ui) { int rc = EINVAL; struct uld_info *u; sx_xlock(&t4_uld_list_lock); SLIST_FOREACH(u, &t4_uld_list, link) { if (u == ui) { if (ui->refcount > 0) { rc = EBUSY; goto done; } SLIST_REMOVE(&t4_uld_list, ui, uld_info, link); rc = 0; goto done; } } done: sx_xunlock(&t4_uld_list_lock); return (rc); } int t4_activate_uld(struct adapter *sc, int id) { int rc; struct uld_info *ui; ASSERT_SYNCHRONIZED_OP(sc); if (id < 0 || id > ULD_MAX) return (EINVAL); rc = EAGAIN; /* kldoad the module with this ULD and try again. */ sx_slock(&t4_uld_list_lock); SLIST_FOREACH(ui, &t4_uld_list, link) { if (ui->uld_id == id) { if (!(sc->flags & FULL_INIT_DONE)) { rc = adapter_full_init(sc); if (rc != 0) break; } rc = ui->activate(sc); if (rc == 0) { setbit(&sc->active_ulds, id); ui->refcount++; } break; } } sx_sunlock(&t4_uld_list_lock); return (rc); } int t4_deactivate_uld(struct adapter *sc, int id) { int rc; struct uld_info *ui; ASSERT_SYNCHRONIZED_OP(sc); if (id < 0 || id > ULD_MAX) return (EINVAL); rc = ENXIO; sx_slock(&t4_uld_list_lock); SLIST_FOREACH(ui, &t4_uld_list, link) { if (ui->uld_id == id) { rc = ui->deactivate(sc); if (rc == 0) { clrbit(&sc->active_ulds, id); ui->refcount--; } break; } } sx_sunlock(&t4_uld_list_lock); return (rc); } int uld_active(struct adapter *sc, int uld_id) { MPASS(uld_id >= 0 && uld_id <= ULD_MAX); return (isset(&sc->active_ulds, uld_id)); } #endif /* * Come up with reasonable defaults for some of the tunables, provided they're * not set by the user (in which case we'll use the values as is). */ static void tweak_tunables(void) { int nc = mp_ncpus; /* our snapshot of the number of CPUs */ if (t4_ntxq10g < 1) { #ifdef RSS t4_ntxq10g = rss_getnumbuckets(); #else t4_ntxq10g = min(nc, NTXQ_10G); #endif } if (t4_ntxq1g < 1) { #ifdef RSS /* XXX: way too many for 1GbE? */ t4_ntxq1g = rss_getnumbuckets(); #else t4_ntxq1g = min(nc, NTXQ_1G); #endif } if (t4_ntxq_vi < 1) t4_ntxq_vi = min(nc, NTXQ_VI); if (t4_nrxq10g < 1) { #ifdef RSS t4_nrxq10g = rss_getnumbuckets(); #else t4_nrxq10g = min(nc, NRXQ_10G); #endif } if (t4_nrxq1g < 1) { #ifdef RSS /* XXX: way too many for 1GbE? */ t4_nrxq1g = rss_getnumbuckets(); #else t4_nrxq1g = min(nc, NRXQ_1G); #endif } if (t4_nrxq_vi < 1) t4_nrxq_vi = min(nc, NRXQ_VI); #ifdef TCP_OFFLOAD if (t4_nofldtxq10g < 1) t4_nofldtxq10g = min(nc, NOFLDTXQ_10G); if (t4_nofldtxq1g < 1) t4_nofldtxq1g = min(nc, NOFLDTXQ_1G); if (t4_nofldtxq_vi < 1) t4_nofldtxq_vi = min(nc, NOFLDTXQ_VI); if (t4_nofldrxq10g < 1) t4_nofldrxq10g = min(nc, NOFLDRXQ_10G); if (t4_nofldrxq1g < 1) t4_nofldrxq1g = min(nc, NOFLDRXQ_1G); if (t4_nofldrxq_vi < 1) t4_nofldrxq_vi = min(nc, NOFLDRXQ_VI); if (t4_toecaps_allowed == -1) t4_toecaps_allowed = FW_CAPS_CONFIG_TOE; if (t4_rdmacaps_allowed == -1) { t4_rdmacaps_allowed = FW_CAPS_CONFIG_RDMA_RDDP | FW_CAPS_CONFIG_RDMA_RDMAC; } if (t4_iscsicaps_allowed == -1) { t4_iscsicaps_allowed = FW_CAPS_CONFIG_ISCSI_INITIATOR_PDU | FW_CAPS_CONFIG_ISCSI_TARGET_PDU | FW_CAPS_CONFIG_ISCSI_T10DIF; } #else if (t4_toecaps_allowed == -1) t4_toecaps_allowed = 0; if (t4_rdmacaps_allowed == -1) t4_rdmacaps_allowed = 0; if (t4_iscsicaps_allowed == -1) t4_iscsicaps_allowed = 0; #endif #ifdef DEV_NETMAP if (t4_nnmtxq_vi < 1) t4_nnmtxq_vi = min(nc, NNMTXQ_VI); if (t4_nnmrxq_vi < 1) t4_nnmrxq_vi = min(nc, NNMRXQ_VI); #endif if (t4_tmr_idx_10g < 0 || t4_tmr_idx_10g >= SGE_NTIMERS) t4_tmr_idx_10g = TMR_IDX_10G; if (t4_pktc_idx_10g < -1 || t4_pktc_idx_10g >= SGE_NCOUNTERS) t4_pktc_idx_10g = PKTC_IDX_10G; if (t4_tmr_idx_1g < 0 || t4_tmr_idx_1g >= SGE_NTIMERS) t4_tmr_idx_1g = TMR_IDX_1G; if (t4_pktc_idx_1g < -1 || t4_pktc_idx_1g >= SGE_NCOUNTERS) t4_pktc_idx_1g = PKTC_IDX_1G; if (t4_qsize_txq < 128) t4_qsize_txq = 128; if (t4_qsize_rxq < 128) t4_qsize_rxq = 128; while (t4_qsize_rxq & 7) t4_qsize_rxq++; t4_intr_types &= INTR_MSIX | INTR_MSI | INTR_INTX; } #ifdef DDB static void t4_dump_tcb(struct adapter *sc, int tid) { uint32_t base, i, j, off, pf, reg, save, tcb_addr, win_pos; reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2); save = t4_read_reg(sc, reg); base = sc->memwin[2].mw_base; /* Dump TCB for the tid */ tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE); tcb_addr += tid * TCB_SIZE; if (is_t4(sc)) { pf = 0; win_pos = tcb_addr & ~0xf; /* start must be 16B aligned */ } else { pf = V_PFNUM(sc->pf); win_pos = tcb_addr & ~0x7f; /* start must be 128B aligned */ } t4_write_reg(sc, reg, win_pos | pf); t4_read_reg(sc, reg); off = tcb_addr - win_pos; for (i = 0; i < 4; i++) { uint32_t buf[8]; for (j = 0; j < 8; j++, off += 4) buf[j] = htonl(t4_read_reg(sc, base + off)); db_printf("%08x %08x %08x %08x %08x %08x %08x %08x\n", buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]); } t4_write_reg(sc, reg, save); t4_read_reg(sc, reg); } static void t4_dump_devlog(struct adapter *sc) { struct devlog_params *dparams = &sc->params.devlog; struct fw_devlog_e e; int i, first, j, m, nentries, rc; uint64_t ftstamp = UINT64_MAX; if (dparams->start == 0) { db_printf("devlog params not valid\n"); return; } nentries = dparams->size / sizeof(struct fw_devlog_e); m = fwmtype_to_hwmtype(dparams->memtype); /* Find the first entry. */ first = -1; for (i = 0; i < nentries && !db_pager_quit; i++) { rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e), sizeof(e), (void *)&e); if (rc != 0) break; if (e.timestamp == 0) break; e.timestamp = be64toh(e.timestamp); if (e.timestamp < ftstamp) { ftstamp = e.timestamp; first = i; } } if (first == -1) return; i = first; do { rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e), sizeof(e), (void *)&e); if (rc != 0) return; if (e.timestamp == 0) return; e.timestamp = be64toh(e.timestamp); e.seqno = be32toh(e.seqno); for (j = 0; j < 8; j++) e.params[j] = be32toh(e.params[j]); db_printf("%10d %15ju %8s %8s ", e.seqno, e.timestamp, (e.level < nitems(devlog_level_strings) ? devlog_level_strings[e.level] : "UNKNOWN"), (e.facility < nitems(devlog_facility_strings) ? devlog_facility_strings[e.facility] : "UNKNOWN")); db_printf(e.fmt, e.params[0], e.params[1], e.params[2], e.params[3], e.params[4], e.params[5], e.params[6], e.params[7]); if (++i == nentries) i = 0; } while (i != first && !db_pager_quit); } static struct command_table db_t4_table = LIST_HEAD_INITIALIZER(db_t4_table); _DB_SET(_show, t4, NULL, db_show_table, 0, &db_t4_table); DB_FUNC(devlog, db_show_devlog, db_t4_table, CS_OWN, NULL) { device_t dev; int t; bool valid; valid = false; t = db_read_token(); if (t == tIDENT) { dev = device_lookup_by_name(db_tok_string); valid = true; } db_skip_to_eol(); if (!valid) { db_printf("usage: show t4 devlog \n"); return; } if (dev == NULL) { db_printf("device not found\n"); return; } t4_dump_devlog(device_get_softc(dev)); } DB_FUNC(tcb, db_show_t4tcb, db_t4_table, CS_OWN, NULL) { device_t dev; int radix, tid, t; bool valid; valid = false; radix = db_radix; db_radix = 10; t = db_read_token(); if (t == tIDENT) { dev = device_lookup_by_name(db_tok_string); t = db_read_token(); if (t == tNUMBER) { tid = db_tok_number; valid = true; } } db_radix = radix; db_skip_to_eol(); if (!valid) { db_printf("usage: show t4 tcb \n"); return; } if (dev == NULL) { db_printf("device not found\n"); return; } if (tid < 0) { db_printf("invalid tid\n"); return; } t4_dump_tcb(device_get_softc(dev), tid); } #endif static struct sx mlu; /* mod load unload */ SX_SYSINIT(cxgbe_mlu, &mlu, "cxgbe mod load/unload"); static int mod_event(module_t mod, int cmd, void *arg) { int rc = 0; static int loaded = 0; switch (cmd) { case MOD_LOAD: sx_xlock(&mlu); if (loaded++ == 0) { t4_sge_modload(); sx_init(&t4_list_lock, "T4/T5 adapters"); SLIST_INIT(&t4_list); #ifdef TCP_OFFLOAD sx_init(&t4_uld_list_lock, "T4/T5 ULDs"); SLIST_INIT(&t4_uld_list); #endif t4_tracer_modload(); tweak_tunables(); } sx_xunlock(&mlu); break; case MOD_UNLOAD: sx_xlock(&mlu); if (--loaded == 0) { int tries; sx_slock(&t4_list_lock); if (!SLIST_EMPTY(&t4_list)) { rc = EBUSY; sx_sunlock(&t4_list_lock); goto done_unload; } #ifdef TCP_OFFLOAD sx_slock(&t4_uld_list_lock); if (!SLIST_EMPTY(&t4_uld_list)) { rc = EBUSY; sx_sunlock(&t4_uld_list_lock); sx_sunlock(&t4_list_lock); goto done_unload; } #endif tries = 0; while (tries++ < 5 && t4_sge_extfree_refs() != 0) { uprintf("%ju clusters with custom free routine " "still is use.\n", t4_sge_extfree_refs()); pause("t4unload", 2 * hz); } #ifdef TCP_OFFLOAD sx_sunlock(&t4_uld_list_lock); #endif sx_sunlock(&t4_list_lock); if (t4_sge_extfree_refs() == 0) { t4_tracer_modunload(); #ifdef TCP_OFFLOAD sx_destroy(&t4_uld_list_lock); #endif sx_destroy(&t4_list_lock); t4_sge_modunload(); loaded = 0; } else { rc = EBUSY; loaded++; /* undo earlier decrement */ } } done_unload: sx_xunlock(&mlu); break; } return (rc); } static devclass_t t4_devclass, t5_devclass; static devclass_t cxgbe_devclass, cxl_devclass; static devclass_t vcxgbe_devclass, vcxl_devclass; DRIVER_MODULE(t4nex, pci, t4_driver, t4_devclass, mod_event, 0); MODULE_VERSION(t4nex, 1); MODULE_DEPEND(t4nex, firmware, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(t4nex, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ DRIVER_MODULE(t5nex, pci, t5_driver, t5_devclass, mod_event, 0); MODULE_VERSION(t5nex, 1); MODULE_DEPEND(t5nex, firmware, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(t5nex, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ DRIVER_MODULE(cxgbe, t4nex, cxgbe_driver, cxgbe_devclass, 0, 0); MODULE_VERSION(cxgbe, 1); DRIVER_MODULE(cxl, t5nex, cxl_driver, cxl_devclass, 0, 0); MODULE_VERSION(cxl, 1); DRIVER_MODULE(vcxgbe, cxgbe, vcxgbe_driver, vcxgbe_devclass, 0, 0); MODULE_VERSION(vcxgbe, 1); DRIVER_MODULE(vcxl, cxl, vcxl_driver, vcxl_devclass, 0, 0); MODULE_VERSION(vcxl, 1); Index: projects/vnet/sys/dev/cxgbe/tom/t4_listen.c =================================================================== --- projects/vnet/sys/dev/cxgbe/tom/t4_listen.c (revision 302276) +++ projects/vnet/sys/dev/cxgbe/tom/t4_listen.c (revision 302277) @@ -1,1600 +1,1600 @@ /*- * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #ifdef TCP_OFFLOAD #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TCPSTATES #include #include #include #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "tom/t4_tom_l2t.h" #include "tom/t4_tom.h" /* stid services */ static int alloc_stid(struct adapter *, struct listen_ctx *, int); static struct listen_ctx *lookup_stid(struct adapter *, int); static void free_stid(struct adapter *, struct listen_ctx *); /* lctx services */ static struct listen_ctx *alloc_lctx(struct adapter *, struct inpcb *, struct vi_info *); static int free_lctx(struct adapter *, struct listen_ctx *); static void hold_lctx(struct listen_ctx *); static void listen_hash_add(struct adapter *, struct listen_ctx *); static struct listen_ctx *listen_hash_find(struct adapter *, struct inpcb *); static struct listen_ctx *listen_hash_del(struct adapter *, struct inpcb *); static struct inpcb *release_lctx(struct adapter *, struct listen_ctx *); static inline void save_qids_in_mbuf(struct mbuf *, struct vi_info *); static inline void get_qids_from_mbuf(struct mbuf *m, int *, int *); static void send_reset_synqe(struct toedev *, struct synq_entry *); static int alloc_stid(struct adapter *sc, struct listen_ctx *lctx, int isipv6) { struct tid_info *t = &sc->tids; u_int stid, n, f, mask; struct stid_region *sr = &lctx->stid_region; /* * An IPv6 server needs 2 naturally aligned stids (1 stid = 4 cells) in * the TCAM. The start of the stid region is properly aligned (the chip * requires each region to be 128-cell aligned). */ n = isipv6 ? 2 : 1; mask = n - 1; KASSERT((t->stid_base & mask) == 0 && (t->nstids & mask) == 0, ("%s: stid region (%u, %u) not properly aligned. n = %u", __func__, t->stid_base, t->nstids, n)); mtx_lock(&t->stid_lock); if (n > t->nstids - t->stids_in_use) { mtx_unlock(&t->stid_lock); return (-1); } if (t->nstids_free_head >= n) { /* * This allocation will definitely succeed because the region * starts at a good alignment and we just checked we have enough * stids free. */ f = t->nstids_free_head & mask; t->nstids_free_head -= n + f; stid = t->nstids_free_head; TAILQ_INSERT_HEAD(&t->stids, sr, link); } else { struct stid_region *s; stid = t->nstids_free_head; TAILQ_FOREACH(s, &t->stids, link) { stid += s->used + s->free; f = stid & mask; if (s->free >= n + f) { stid -= n + f; s->free -= n + f; TAILQ_INSERT_AFTER(&t->stids, s, sr, link); goto allocated; } } if (__predict_false(stid != t->nstids)) { panic("%s: stids TAILQ (%p) corrupt." " At %d instead of %d at the end of the queue.", __func__, &t->stids, stid, t->nstids); } mtx_unlock(&t->stid_lock); return (-1); } allocated: sr->used = n; sr->free = f; t->stids_in_use += n; t->stid_tab[stid] = lctx; mtx_unlock(&t->stid_lock); KASSERT(((stid + t->stid_base) & mask) == 0, ("%s: EDOOFUS.", __func__)); return (stid + t->stid_base); } static struct listen_ctx * lookup_stid(struct adapter *sc, int stid) { struct tid_info *t = &sc->tids; return (t->stid_tab[stid - t->stid_base]); } static void free_stid(struct adapter *sc, struct listen_ctx *lctx) { struct tid_info *t = &sc->tids; struct stid_region *sr = &lctx->stid_region; struct stid_region *s; KASSERT(sr->used > 0, ("%s: nonsense free (%d)", __func__, sr->used)); mtx_lock(&t->stid_lock); s = TAILQ_PREV(sr, stid_head, link); if (s != NULL) s->free += sr->used + sr->free; else t->nstids_free_head += sr->used + sr->free; KASSERT(t->stids_in_use >= sr->used, ("%s: stids_in_use (%u) < stids being freed (%u)", __func__, t->stids_in_use, sr->used)); t->stids_in_use -= sr->used; TAILQ_REMOVE(&t->stids, sr, link); mtx_unlock(&t->stid_lock); } static struct listen_ctx * alloc_lctx(struct adapter *sc, struct inpcb *inp, struct vi_info *vi) { struct listen_ctx *lctx; INP_WLOCK_ASSERT(inp); lctx = malloc(sizeof(struct listen_ctx), M_CXGBE, M_NOWAIT | M_ZERO); if (lctx == NULL) return (NULL); lctx->stid = alloc_stid(sc, lctx, inp->inp_vflag & INP_IPV6); if (lctx->stid < 0) { free(lctx, M_CXGBE); return (NULL); } if (inp->inp_vflag & INP_IPV6 && !IN6_ARE_ADDR_EQUAL(&in6addr_any, &inp->in6p_laddr)) { struct tom_data *td = sc->tom_softc; lctx->ce = hold_lip(td, &inp->in6p_laddr); if (lctx->ce == NULL) { free(lctx, M_CXGBE); return (NULL); } } lctx->ctrlq = &sc->sge.ctrlq[vi->pi->port_id]; lctx->ofld_rxq = &sc->sge.ofld_rxq[vi->first_ofld_rxq]; refcount_init(&lctx->refcount, 1); TAILQ_INIT(&lctx->synq); lctx->inp = inp; in_pcbref(inp); return (lctx); } /* Don't call this directly, use release_lctx instead */ static int free_lctx(struct adapter *sc, struct listen_ctx *lctx) { struct inpcb *inp = lctx->inp; struct tom_data *td = sc->tom_softc; INP_WLOCK_ASSERT(inp); KASSERT(lctx->refcount == 0, ("%s: refcount %d", __func__, lctx->refcount)); KASSERT(TAILQ_EMPTY(&lctx->synq), ("%s: synq not empty.", __func__)); KASSERT(lctx->stid >= 0, ("%s: bad stid %d.", __func__, lctx->stid)); CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, inp %p", __func__, lctx->stid, lctx, lctx->inp); if (lctx->ce) release_lip(td, lctx->ce); free_stid(sc, lctx); free(lctx, M_CXGBE); return (in_pcbrele_wlocked(inp)); } static void hold_lctx(struct listen_ctx *lctx) { refcount_acquire(&lctx->refcount); } static inline uint32_t listen_hashfn(void *key, u_long mask) { return (fnv_32_buf(&key, sizeof(key), FNV1_32_INIT) & mask); } /* * Add a listen_ctx entry to the listen hash table. */ static void listen_hash_add(struct adapter *sc, struct listen_ctx *lctx) { struct tom_data *td = sc->tom_softc; int bucket = listen_hashfn(lctx->inp, td->listen_mask); mtx_lock(&td->lctx_hash_lock); LIST_INSERT_HEAD(&td->listen_hash[bucket], lctx, link); td->lctx_count++; mtx_unlock(&td->lctx_hash_lock); } /* * Look for the listening socket's context entry in the hash and return it. */ static struct listen_ctx * listen_hash_find(struct adapter *sc, struct inpcb *inp) { struct tom_data *td = sc->tom_softc; int bucket = listen_hashfn(inp, td->listen_mask); struct listen_ctx *lctx; mtx_lock(&td->lctx_hash_lock); LIST_FOREACH(lctx, &td->listen_hash[bucket], link) { if (lctx->inp == inp) break; } mtx_unlock(&td->lctx_hash_lock); return (lctx); } /* * Removes the listen_ctx structure for inp from the hash and returns it. */ static struct listen_ctx * listen_hash_del(struct adapter *sc, struct inpcb *inp) { struct tom_data *td = sc->tom_softc; int bucket = listen_hashfn(inp, td->listen_mask); struct listen_ctx *lctx, *l; mtx_lock(&td->lctx_hash_lock); LIST_FOREACH_SAFE(lctx, &td->listen_hash[bucket], link, l) { if (lctx->inp == inp) { LIST_REMOVE(lctx, link); td->lctx_count--; break; } } mtx_unlock(&td->lctx_hash_lock); return (lctx); } /* * Releases a hold on the lctx. Must be called with the listening socket's inp * locked. The inp may be freed by this function and it returns NULL to * indicate this. */ static struct inpcb * release_lctx(struct adapter *sc, struct listen_ctx *lctx) { struct inpcb *inp = lctx->inp; int inp_freed = 0; INP_WLOCK_ASSERT(inp); if (refcount_release(&lctx->refcount)) inp_freed = free_lctx(sc, lctx); return (inp_freed ? NULL : inp); } static void send_reset_synqe(struct toedev *tod, struct synq_entry *synqe) { struct adapter *sc = tod->tod_softc; struct mbuf *m = synqe->syn; struct ifnet *ifp = m->m_pkthdr.rcvif; struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct l2t_entry *e = &sc->l2t->l2tab[synqe->l2e_idx]; struct wrqe *wr; struct fw_flowc_wr *flowc; struct cpl_abort_req *req; int txqid, rxqid, flowclen; struct sge_wrq *ofld_txq; struct sge_ofld_rxq *ofld_rxq; const int nparams = 6; unsigned int pfvf = G_FW_VIID_PFN(vi->viid) << S_FW_VIID_PFN; INP_WLOCK_ASSERT(synqe->lctx->inp); CTR5(KTR_CXGBE, "%s: synqe %p (0x%x), tid %d%s", __func__, synqe, synqe->flags, synqe->tid, synqe->flags & TPF_ABORT_SHUTDOWN ? " (abort already in progress)" : ""); if (synqe->flags & TPF_ABORT_SHUTDOWN) return; /* abort already in progress */ synqe->flags |= TPF_ABORT_SHUTDOWN; get_qids_from_mbuf(m, &txqid, &rxqid); ofld_txq = &sc->sge.ofld_txq[txqid]; ofld_rxq = &sc->sge.ofld_rxq[rxqid]; /* The wrqe will have two WRs - a flowc followed by an abort_req */ flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); wr = alloc_wrqe(roundup2(flowclen, EQ_ESIZE) + sizeof(*req), ofld_txq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } flowc = wrtod(wr); req = (void *)((caddr_t)flowc + roundup2(flowclen, EQ_ESIZE)); /* First the flowc ... */ memset(flowc, 0, wr->wr_len); flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | V_FW_FLOWC_WR_NPARAMS(nparams)); flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) | V_FW_WR_FLOWID(synqe->tid)); flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; flowc->mnemval[0].val = htobe32(pfvf); flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; flowc->mnemval[1].val = htobe32(pi->tx_chan); flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; flowc->mnemval[2].val = htobe32(pi->tx_chan); flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; flowc->mnemval[3].val = htobe32(ofld_rxq->iq.abs_id); flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDBUF; flowc->mnemval[4].val = htobe32(512); flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_MSS; flowc->mnemval[5].val = htobe32(512); synqe->flags |= TPF_FLOWC_WR_SENT; /* ... then ABORT request */ INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, synqe->tid); req->rsvd0 = 0; /* don't have a snd_nxt */ req->rsvd1 = 1; /* no data sent yet */ req->cmd = CPL_ABORT_SEND_RST; t4_l2t_send(sc, wr, e); } static int create_server(struct adapter *sc, struct listen_ctx *lctx) { struct wrqe *wr; struct cpl_pass_open_req *req; struct inpcb *inp = lctx->inp; wr = alloc_wrqe(sizeof(*req), lctx->ctrlq); if (wr == NULL) { log(LOG_ERR, "%s: allocation failure", __func__); return (ENOMEM); } req = wrtod(wr); INIT_TP_WR(req, 0); OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, lctx->stid)); req->local_port = inp->inp_lport; req->peer_port = 0; req->local_ip = inp->inp_laddr.s_addr; req->peer_ip = 0; req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan)); req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) | F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id)); t4_wrq_tx(sc, wr); return (0); } static int create_server6(struct adapter *sc, struct listen_ctx *lctx) { struct wrqe *wr; struct cpl_pass_open_req6 *req; struct inpcb *inp = lctx->inp; wr = alloc_wrqe(sizeof(*req), lctx->ctrlq); if (wr == NULL) { log(LOG_ERR, "%s: allocation failure", __func__); return (ENOMEM); } req = wrtod(wr); INIT_TP_WR(req, 0); OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ6, lctx->stid)); req->local_port = inp->inp_lport; req->peer_port = 0; req->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0]; req->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8]; req->peer_ip_hi = 0; req->peer_ip_lo = 0; req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan)); req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) | F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id)); t4_wrq_tx(sc, wr); return (0); } static int destroy_server(struct adapter *sc, struct listen_ctx *lctx) { struct wrqe *wr; struct cpl_close_listsvr_req *req; wr = alloc_wrqe(sizeof(*req), lctx->ctrlq); if (wr == NULL) { /* XXX */ panic("%s: allocation failure.", __func__); } req = wrtod(wr); INIT_TP_WR(req, 0); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, lctx->stid)); req->reply_ctrl = htobe16(lctx->ofld_rxq->iq.abs_id); req->rsvd = htobe16(0); t4_wrq_tx(sc, wr); return (0); } /* * Start a listening server by sending a passive open request to HW. * * Can't take adapter lock here and access to sc->flags, * sc->offload_map, if_capenable are all race prone. */ int t4_listen_start(struct toedev *tod, struct tcpcb *tp) { struct adapter *sc = tod->tod_softc; struct vi_info *vi; struct port_info *pi; struct inpcb *inp = tp->t_inpcb; struct listen_ctx *lctx; int i, rc, v; INP_WLOCK_ASSERT(inp); /* Don't start a hardware listener for any loopback address. */ if (inp->inp_vflag & INP_IPV6 && IN6_IS_ADDR_LOOPBACK(&inp->in6p_laddr)) return (0); if (!(inp->inp_vflag & INP_IPV6) && IN_LOOPBACK(ntohl(inp->inp_laddr.s_addr))) return (0); #if 0 ADAPTER_LOCK(sc); if (IS_BUSY(sc)) { log(LOG_ERR, "%s: listen request ignored, %s is busy", __func__, device_get_nameunit(sc->dev)); goto done; } KASSERT(uld_active(sc, ULD_TOM), ("%s: TOM not initialized", __func__)); #endif /* - * Find a running VI with IFCAP_TOE (4 or 6). We'll use the first + * Find an initialized VI with IFCAP_TOE (4 or 6). We'll use the first * such VI's queues to send the passive open and receive the reply to * it. * * XXX: need a way to mark a port in use by offload. if_cxgbe should * then reject any attempt to bring down such a port (and maybe reject * attempts to disable IFCAP_TOE on that port too?). */ for_each_port(sc, i) { pi = sc->port[i]; for_each_vi(pi, v, vi) { - if (vi->ifp->if_drv_flags & IFF_DRV_RUNNING && + if (vi->flags & VI_INIT_DONE && vi->ifp->if_capenable & IFCAP_TOE) goto found; } } goto done; /* no port that's UP with IFCAP_TOE enabled */ found: if (listen_hash_find(sc, inp) != NULL) goto done; /* already setup */ lctx = alloc_lctx(sc, inp, vi); if (lctx == NULL) { log(LOG_ERR, "%s: listen request ignored, %s couldn't allocate lctx\n", __func__, device_get_nameunit(sc->dev)); goto done; } listen_hash_add(sc, lctx); CTR6(KTR_CXGBE, "%s: stid %u (%s), lctx %p, inp %p vflag 0x%x", __func__, lctx->stid, tcpstates[tp->t_state], lctx, inp, inp->inp_vflag); if (inp->inp_vflag & INP_IPV6) rc = create_server6(sc, lctx); else rc = create_server(sc, lctx); if (rc != 0) { log(LOG_ERR, "%s: %s failed to create hw listener: %d.\n", __func__, device_get_nameunit(sc->dev), rc); (void) listen_hash_del(sc, inp); inp = release_lctx(sc, lctx); /* can't be freed, host stack has a reference */ KASSERT(inp != NULL, ("%s: inp freed", __func__)); goto done; } lctx->flags |= LCTX_RPL_PENDING; done: #if 0 ADAPTER_UNLOCK(sc); #endif return (0); } int t4_listen_stop(struct toedev *tod, struct tcpcb *tp) { struct listen_ctx *lctx; struct adapter *sc = tod->tod_softc; struct inpcb *inp = tp->t_inpcb; struct synq_entry *synqe; INP_WLOCK_ASSERT(inp); lctx = listen_hash_del(sc, inp); if (lctx == NULL) return (ENOENT); /* no hardware listener for this inp */ CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, flags %x", __func__, lctx->stid, lctx, lctx->flags); /* * If the reply to the PASS_OPEN is still pending we'll wait for it to * arrive and clean up when it does. */ if (lctx->flags & LCTX_RPL_PENDING) { KASSERT(TAILQ_EMPTY(&lctx->synq), ("%s: synq not empty.", __func__)); return (EINPROGRESS); } /* * The host stack will abort all the connections on the listening * socket's so_comp. It doesn't know about the connections on the synq * so we need to take care of those. */ TAILQ_FOREACH(synqe, &lctx->synq, link) { if (synqe->flags & TPF_SYNQE_HAS_L2TE) send_reset_synqe(tod, synqe); } destroy_server(sc, lctx); return (0); } static inline void hold_synqe(struct synq_entry *synqe) { refcount_acquire(&synqe->refcnt); } static inline void release_synqe(struct synq_entry *synqe) { if (refcount_release(&synqe->refcnt)) { int needfree = synqe->flags & TPF_SYNQE_NEEDFREE; m_freem(synqe->syn); if (needfree) free(synqe, M_CXGBE); } } void t4_syncache_added(struct toedev *tod __unused, void *arg) { struct synq_entry *synqe = arg; hold_synqe(synqe); } void t4_syncache_removed(struct toedev *tod __unused, void *arg) { struct synq_entry *synqe = arg; release_synqe(synqe); } /* XXX */ extern void tcp_dooptions(struct tcpopt *, u_char *, int, int); int t4_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m) { struct adapter *sc = tod->tod_softc; struct synq_entry *synqe = arg; struct wrqe *wr; struct l2t_entry *e; struct tcpopt to; struct ip *ip = mtod(m, struct ip *); struct tcphdr *th; wr = (struct wrqe *)atomic_readandclear_ptr(&synqe->wr); if (wr == NULL) { m_freem(m); return (EALREADY); } if (ip->ip_v == IPVERSION) th = (void *)(ip + 1); else th = (void *)((struct ip6_hdr *)ip + 1); bzero(&to, sizeof(to)); tcp_dooptions(&to, (void *)(th + 1), (th->th_off << 2) - sizeof(*th), TO_SYN); /* save these for later */ synqe->iss = be32toh(th->th_seq); synqe->ts = to.to_tsval; if (is_t5(sc)) { struct cpl_t5_pass_accept_rpl *rpl5 = wrtod(wr); rpl5->iss = th->th_seq; } e = &sc->l2t->l2tab[synqe->l2e_idx]; t4_l2t_send(sc, wr, e); m_freem(m); /* don't need this any more */ return (0); } static int do_pass_open_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_pass_open_rpl *cpl = (const void *)(rss + 1); int stid = GET_TID(cpl); unsigned int status = cpl->status; struct listen_ctx *lctx = lookup_stid(sc, stid); struct inpcb *inp = lctx->inp; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_PASS_OPEN_RPL, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__)); INP_WLOCK(inp); CTR4(KTR_CXGBE, "%s: stid %d, status %u, flags 0x%x", __func__, stid, status, lctx->flags); lctx->flags &= ~LCTX_RPL_PENDING; if (status != CPL_ERR_NONE) log(LOG_ERR, "listener (stid %u) failed: %d\n", stid, status); #ifdef INVARIANTS /* * If the inp has been dropped (listening socket closed) then * listen_stop must have run and taken the inp out of the hash. */ if (inp->inp_flags & INP_DROPPED) { KASSERT(listen_hash_del(sc, inp) == NULL, ("%s: inp %p still in listen hash", __func__, inp)); } #endif if (inp->inp_flags & INP_DROPPED && status != CPL_ERR_NONE) { if (release_lctx(sc, lctx) != NULL) INP_WUNLOCK(inp); return (status); } /* * Listening socket stopped listening earlier and now the chip tells us * it has started the hardware listener. Stop it; the lctx will be * released in do_close_server_rpl. */ if (inp->inp_flags & INP_DROPPED) { destroy_server(sc, lctx); INP_WUNLOCK(inp); return (status); } /* * Failed to start hardware listener. Take inp out of the hash and * release our reference on it. An error message has been logged * already. */ if (status != CPL_ERR_NONE) { listen_hash_del(sc, inp); if (release_lctx(sc, lctx) != NULL) INP_WUNLOCK(inp); return (status); } /* hardware listener open for business */ INP_WUNLOCK(inp); return (status); } static int do_close_server_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_close_listsvr_rpl *cpl = (const void *)(rss + 1); int stid = GET_TID(cpl); unsigned int status = cpl->status; struct listen_ctx *lctx = lookup_stid(sc, stid); struct inpcb *inp = lctx->inp; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_CLOSE_LISTSRV_RPL, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__)); CTR3(KTR_CXGBE, "%s: stid %u, status %u", __func__, stid, status); if (status != CPL_ERR_NONE) { log(LOG_ERR, "%s: failed (%u) to close listener for stid %u\n", __func__, status, stid); return (status); } INP_WLOCK(inp); inp = release_lctx(sc, lctx); if (inp != NULL) INP_WUNLOCK(inp); return (status); } static void done_with_synqe(struct adapter *sc, struct synq_entry *synqe) { struct listen_ctx *lctx = synqe->lctx; struct inpcb *inp = lctx->inp; struct vi_info *vi = synqe->syn->m_pkthdr.rcvif->if_softc; struct l2t_entry *e = &sc->l2t->l2tab[synqe->l2e_idx]; INP_WLOCK_ASSERT(inp); TAILQ_REMOVE(&lctx->synq, synqe, link); inp = release_lctx(sc, lctx); if (inp) INP_WUNLOCK(inp); remove_tid(sc, synqe->tid); release_tid(sc, synqe->tid, &sc->sge.ctrlq[vi->pi->port_id]); t4_l2t_release(e); release_synqe(synqe); /* removed from synq list */ } int do_abort_req_synqe(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct synq_entry *synqe = lookup_tid(sc, tid); struct listen_ctx *lctx = synqe->lctx; struct inpcb *inp = lctx->inp; int txqid; struct sge_wrq *ofld_txq; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_ABORT_REQ_RSS, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__)); CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d", __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status); if (negative_advice(cpl->status)) return (0); /* Ignore negative advice */ INP_WLOCK(inp); get_qids_from_mbuf(synqe->syn, &txqid, NULL); ofld_txq = &sc->sge.ofld_txq[txqid]; /* * If we'd initiated an abort earlier the reply to it is responsible for * cleaning up resources. Otherwise we tear everything down right here * right now. We owe the T4 a CPL_ABORT_RPL no matter what. */ if (synqe->flags & TPF_ABORT_SHUTDOWN) { INP_WUNLOCK(inp); goto done; } done_with_synqe(sc, synqe); /* inp lock released by done_with_synqe */ done: send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST); return (0); } int do_abort_rpl_synqe(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct synq_entry *synqe = lookup_tid(sc, tid); struct listen_ctx *lctx = synqe->lctx; struct inpcb *inp = lctx->inp; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_ABORT_RPL_RSS, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__)); CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d", __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status); INP_WLOCK(inp); KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, ("%s: wasn't expecting abort reply for synqe %p (0x%x)", __func__, synqe, synqe->flags)); done_with_synqe(sc, synqe); /* inp lock released by done_with_synqe */ return (0); } void t4_offload_socket(struct toedev *tod, void *arg, struct socket *so) { struct adapter *sc = tod->tod_softc; struct synq_entry *synqe = arg; #ifdef INVARIANTS struct inpcb *inp = sotoinpcb(so); #endif struct cpl_pass_establish *cpl = mtod(synqe->syn, void *); struct toepcb *toep = *(struct toepcb **)(cpl + 1); INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */ INP_WLOCK_ASSERT(inp); KASSERT(synqe->flags & TPF_SYNQE, ("%s: %p not a synq_entry?", __func__, arg)); offload_socket(so, toep); make_established(toep, cpl->snd_isn, cpl->rcv_isn, cpl->tcp_opt); toep->flags |= TPF_CPL_PENDING; update_tid(sc, synqe->tid, toep); synqe->flags |= TPF_SYNQE_EXPANDED; } static inline void save_qids_in_mbuf(struct mbuf *m, struct vi_info *vi) { uint32_t txqid, rxqid; txqid = (arc4random() % vi->nofldtxq) + vi->first_ofld_txq; rxqid = (arc4random() % vi->nofldrxq) + vi->first_ofld_rxq; m->m_pkthdr.flowid = (txqid << 16) | (rxqid & 0xffff); } static inline void get_qids_from_mbuf(struct mbuf *m, int *txqid, int *rxqid) { if (txqid) *txqid = m->m_pkthdr.flowid >> 16; if (rxqid) *rxqid = m->m_pkthdr.flowid & 0xffff; } /* * Use the trailing space in the mbuf in which the PASS_ACCEPT_REQ arrived to * store some state temporarily. */ static struct synq_entry * mbuf_to_synqe(struct mbuf *m) { int len = roundup2(sizeof (struct synq_entry), 8); int tspace = M_TRAILINGSPACE(m); struct synq_entry *synqe = NULL; if (tspace < len) { synqe = malloc(sizeof(*synqe), M_CXGBE, M_NOWAIT); if (synqe == NULL) return (NULL); synqe->flags = TPF_SYNQE | TPF_SYNQE_NEEDFREE; } else { synqe = (void *)(m->m_data + m->m_len + tspace - len); synqe->flags = TPF_SYNQE; } return (synqe); } static void t4opt_to_tcpopt(const struct tcp_options *t4opt, struct tcpopt *to) { bzero(to, sizeof(*to)); if (t4opt->mss) { to->to_flags |= TOF_MSS; to->to_mss = be16toh(t4opt->mss); } if (t4opt->wsf) { to->to_flags |= TOF_SCALE; to->to_wscale = t4opt->wsf; } if (t4opt->tstamp) to->to_flags |= TOF_TS; if (t4opt->sack) to->to_flags |= TOF_SACKPERM; } /* * Options2 for passive open. */ static uint32_t calc_opt2p(struct adapter *sc, struct port_info *pi, int rxqid, const struct tcp_options *tcpopt, struct tcphdr *th, int ulp_mode) { struct sge_ofld_rxq *ofld_rxq = &sc->sge.ofld_rxq[rxqid]; uint32_t opt2; opt2 = V_TX_QUEUE(sc->params.tp.tx_modq[pi->tx_chan]) | F_RSS_QUEUE_VALID | V_RSS_QUEUE(ofld_rxq->iq.abs_id); if (V_tcp_do_rfc1323) { if (tcpopt->tstamp) opt2 |= F_TSTAMPS_EN; if (tcpopt->sack) opt2 |= F_SACK_EN; if (tcpopt->wsf <= 14) opt2 |= F_WND_SCALE_EN; } if (V_tcp_do_ecn && th->th_flags & (TH_ECE | TH_CWR)) opt2 |= F_CCTRL_ECN; /* RX_COALESCE is always a valid value (0 or M_RX_COALESCE). */ if (is_t4(sc)) opt2 |= F_RX_COALESCE_VALID; else { opt2 |= F_T5_OPT_2_VALID; opt2 |= F_CONG_CNTRL_VALID; /* OPT_2_ISS really, for T5 */ } if (sc->tt.rx_coalesce) opt2 |= V_RX_COALESCE(M_RX_COALESCE); #ifdef USE_DDP_RX_FLOW_CONTROL if (ulp_mode == ULP_MODE_TCPDDP) opt2 |= F_RX_FC_VALID | F_RX_FC_DDP; #endif return htobe32(opt2); } static void pass_accept_req_to_protohdrs(const struct mbuf *m, struct in_conninfo *inc, struct tcphdr *th) { const struct cpl_pass_accept_req *cpl = mtod(m, const void *); const struct ether_header *eh; unsigned int hlen = be32toh(cpl->hdr_len); uintptr_t l3hdr; const struct tcphdr *tcp; eh = (const void *)(cpl + 1); l3hdr = ((uintptr_t)eh + G_ETH_HDR_LEN(hlen)); tcp = (const void *)(l3hdr + G_IP_HDR_LEN(hlen)); if (inc) { bzero(inc, sizeof(*inc)); inc->inc_fport = tcp->th_sport; inc->inc_lport = tcp->th_dport; if (((struct ip *)l3hdr)->ip_v == IPVERSION) { const struct ip *ip = (const void *)l3hdr; inc->inc_faddr = ip->ip_src; inc->inc_laddr = ip->ip_dst; } else { const struct ip6_hdr *ip6 = (const void *)l3hdr; inc->inc_flags |= INC_ISIPV6; inc->inc6_faddr = ip6->ip6_src; inc->inc6_laddr = ip6->ip6_dst; } } if (th) { bcopy(tcp, th, sizeof(*th)); tcp_fields_to_host(th); /* just like tcp_input */ } } static struct l2t_entry * get_l2te_for_nexthop(struct port_info *pi, struct ifnet *ifp, struct in_conninfo *inc) { struct l2t_entry *e; struct sockaddr_in6 sin6; struct sockaddr *dst = (void *)&sin6; if (inc->inc_flags & INC_ISIPV6) { struct nhop6_basic nh6; bzero(dst, sizeof(struct sockaddr_in6)); dst->sa_len = sizeof(struct sockaddr_in6); dst->sa_family = AF_INET6; if (IN6_IS_ADDR_LINKLOCAL(&inc->inc6_laddr)) { /* no need for route lookup */ e = t4_l2t_get(pi, ifp, dst); return (e); } if (fib6_lookup_nh_basic(RT_DEFAULT_FIB, &inc->inc6_faddr, 0, 0, 0, &nh6) != 0) return (NULL); if (nh6.nh_ifp != ifp) return (NULL); ((struct sockaddr_in6 *)dst)->sin6_addr = nh6.nh_addr; } else { struct nhop4_basic nh4; dst->sa_len = sizeof(struct sockaddr_in); dst->sa_family = AF_INET; if (fib4_lookup_nh_basic(RT_DEFAULT_FIB, inc->inc_faddr, 0, 0, &nh4) != 0) return (NULL); if (nh4.nh_ifp != ifp) return (NULL); ((struct sockaddr_in *)dst)->sin_addr = nh4.nh_addr; } e = t4_l2t_get(pi, ifp, dst); return (e); } #define REJECT_PASS_ACCEPT() do { \ reject_reason = __LINE__; \ goto reject; \ } while (0) /* * The context associated with a tid entry via insert_tid could be a synq_entry * or a toepcb. The only way CPL handlers can tell is via a bit in these flags. */ CTASSERT(offsetof(struct toepcb, flags) == offsetof(struct synq_entry, flags)); /* * Incoming SYN on a listening socket. * * XXX: Every use of ifp in this routine has a bad race with up/down, toe/-toe, * etc. */ static int do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; struct toedev *tod; const struct cpl_pass_accept_req *cpl = mtod(m, const void *); struct cpl_pass_accept_rpl *rpl; struct wrqe *wr; unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid)); unsigned int tid = GET_TID(cpl); struct listen_ctx *lctx = lookup_stid(sc, stid); struct inpcb *inp; struct socket *so; struct in_conninfo inc; struct tcphdr th; struct tcpopt to; struct port_info *pi; struct vi_info *vi; struct ifnet *hw_ifp, *ifp; struct l2t_entry *e = NULL; int rscale, mtu_idx, rx_credits, rxqid, ulp_mode; struct synq_entry *synqe = NULL; int reject_reason, v; uint16_t vid; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_PASS_ACCEPT_REQ, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__)); CTR4(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p", __func__, stid, tid, lctx); pass_accept_req_to_protohdrs(m, &inc, &th); t4opt_to_tcpopt(&cpl->tcpopt, &to); pi = sc->port[G_SYN_INTF(be16toh(cpl->l2info))]; /* * Use the MAC index to lookup the associated VI. If this SYN * didn't match a perfect MAC filter, punt. */ if (!(be16toh(cpl->l2info) & F_SYN_XACT_MATCH)) { m_freem(m); m = NULL; REJECT_PASS_ACCEPT(); } for_each_vi(pi, v, vi) { if (vi->xact_addr_filt == G_SYN_MAC_IDX(be16toh(cpl->l2info))) goto found; } m_freem(m); m = NULL; REJECT_PASS_ACCEPT(); found: hw_ifp = vi->ifp; /* the (v)cxgbeX ifnet */ m->m_pkthdr.rcvif = hw_ifp; tod = TOEDEV(hw_ifp); /* * Figure out if there is a pseudo interface (vlan, lagg, etc.) * involved. Don't offload if the SYN had a VLAN tag and the vid * doesn't match anything on this interface. * * XXX: lagg support, lagg + vlan support. */ vid = EVL_VLANOFTAG(be16toh(cpl->vlan)); if (vid != 0xfff) { ifp = VLAN_DEVAT(hw_ifp, vid); if (ifp == NULL) REJECT_PASS_ACCEPT(); } else ifp = hw_ifp; /* * Don't offload if the peer requested a TCP option that's not known to * the silicon. */ if (cpl->tcpopt.unknown) REJECT_PASS_ACCEPT(); if (inc.inc_flags & INC_ISIPV6) { /* Don't offload if the ifcap isn't enabled */ if ((ifp->if_capenable & IFCAP_TOE6) == 0) REJECT_PASS_ACCEPT(); /* * SYN must be directed to an IP6 address on this ifnet. This * is more restrictive than in6_localip. */ if (!in6_ifhasaddr(ifp, &inc.inc6_laddr)) REJECT_PASS_ACCEPT(); } else { /* Don't offload if the ifcap isn't enabled */ if ((ifp->if_capenable & IFCAP_TOE4) == 0) REJECT_PASS_ACCEPT(); /* * SYN must be directed to an IP address on this ifnet. This * is more restrictive than in_localip. */ if (!in_ifhasaddr(ifp, inc.inc_laddr)) REJECT_PASS_ACCEPT(); } e = get_l2te_for_nexthop(pi, ifp, &inc); if (e == NULL) REJECT_PASS_ACCEPT(); synqe = mbuf_to_synqe(m); if (synqe == NULL) REJECT_PASS_ACCEPT(); wr = alloc_wrqe(is_t4(sc) ? sizeof(struct cpl_pass_accept_rpl) : sizeof(struct cpl_t5_pass_accept_rpl), &sc->sge.ctrlq[pi->port_id]); if (wr == NULL) REJECT_PASS_ACCEPT(); rpl = wrtod(wr); INP_INFO_RLOCK(&V_tcbinfo); /* for 4-tuple check */ /* Don't offload if the 4-tuple is already in use */ if (toe_4tuple_check(&inc, &th, ifp) != 0) { INP_INFO_RUNLOCK(&V_tcbinfo); free(wr, M_CXGBE); REJECT_PASS_ACCEPT(); } INP_INFO_RUNLOCK(&V_tcbinfo); inp = lctx->inp; /* listening socket, not owned by TOE */ INP_WLOCK(inp); /* Don't offload if the listening socket has closed */ if (__predict_false(inp->inp_flags & INP_DROPPED)) { /* * The listening socket has closed. The reply from the TOE to * our CPL_CLOSE_LISTSRV_REQ will ultimately release all * resources tied to this listen context. */ INP_WUNLOCK(inp); free(wr, M_CXGBE); REJECT_PASS_ACCEPT(); } so = inp->inp_socket; CURVNET_SET(so->so_vnet); mtu_idx = find_best_mtu_idx(sc, &inc, be16toh(cpl->tcpopt.mss)); rscale = cpl->tcpopt.wsf && V_tcp_do_rfc1323 ? select_rcv_wscale() : 0; SOCKBUF_LOCK(&so->so_rcv); /* opt0 rcv_bufsiz initially, assumes its normal meaning later */ rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ); SOCKBUF_UNLOCK(&so->so_rcv); save_qids_in_mbuf(m, vi); get_qids_from_mbuf(m, NULL, &rxqid); if (is_t4(sc)) INIT_TP_WR_MIT_CPL(rpl, CPL_PASS_ACCEPT_RPL, tid); else { struct cpl_t5_pass_accept_rpl *rpl5 = (void *)rpl; INIT_TP_WR_MIT_CPL(rpl5, CPL_PASS_ACCEPT_RPL, tid); } if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0) { ulp_mode = ULP_MODE_TCPDDP; synqe->flags |= TPF_SYNQE_TCPDDP; } else ulp_mode = ULP_MODE_NONE; rpl->opt0 = calc_opt0(so, vi, e, mtu_idx, rscale, rx_credits, ulp_mode); rpl->opt2 = calc_opt2p(sc, pi, rxqid, &cpl->tcpopt, &th, ulp_mode); synqe->tid = tid; synqe->lctx = lctx; synqe->syn = m; m = NULL; refcount_init(&synqe->refcnt, 1); /* 1 means extra hold */ synqe->l2e_idx = e->idx; synqe->rcv_bufsize = rx_credits; atomic_store_rel_ptr(&synqe->wr, (uintptr_t)wr); insert_tid(sc, tid, synqe); TAILQ_INSERT_TAIL(&lctx->synq, synqe, link); hold_synqe(synqe); /* hold for the duration it's in the synq */ hold_lctx(lctx); /* A synqe on the list has a ref on its lctx */ /* * If all goes well t4_syncache_respond will get called during * syncache_add. Note that syncache_add releases the pcb lock. */ toe_syncache_add(&inc, &to, &th, inp, tod, synqe); INP_UNLOCK_ASSERT(inp); /* ok to assert, we have a ref on the inp */ CURVNET_RESTORE(); /* * If we replied during syncache_add (synqe->wr has been consumed), * good. Otherwise, set it to 0 so that further syncache_respond * attempts by the kernel will be ignored. */ if (atomic_cmpset_ptr(&synqe->wr, (uintptr_t)wr, 0)) { /* * syncache may or may not have a hold on the synqe, which may * or may not be stashed in the original SYN mbuf passed to us. * Just copy it over instead of dealing with all possibilities. */ m = m_dup(synqe->syn, M_NOWAIT); if (m) m->m_pkthdr.rcvif = hw_ifp; remove_tid(sc, synqe->tid); free(wr, M_CXGBE); /* Yank the synqe out of the lctx synq. */ INP_WLOCK(inp); TAILQ_REMOVE(&lctx->synq, synqe, link); release_synqe(synqe); /* removed from synq list */ inp = release_lctx(sc, lctx); if (inp) INP_WUNLOCK(inp); release_synqe(synqe); /* extra hold */ REJECT_PASS_ACCEPT(); } CTR5(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p, synqe %p, SYNACK", __func__, stid, tid, lctx, synqe); INP_WLOCK(inp); synqe->flags |= TPF_SYNQE_HAS_L2TE; if (__predict_false(inp->inp_flags & INP_DROPPED)) { /* * Listening socket closed but tod_listen_stop did not abort * this tid because there was no L2T entry for the tid at that * time. Abort it now. The reply to the abort will clean up. */ CTR6(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p, synqe %p (0x%x), ABORT", __func__, stid, tid, lctx, synqe, synqe->flags); if (!(synqe->flags & TPF_SYNQE_EXPANDED)) send_reset_synqe(tod, synqe); INP_WUNLOCK(inp); release_synqe(synqe); /* extra hold */ return (__LINE__); } INP_WUNLOCK(inp); release_synqe(synqe); /* extra hold */ return (0); reject: CTR4(KTR_CXGBE, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid, reject_reason); if (e) t4_l2t_release(e); release_tid(sc, tid, lctx->ctrlq); if (__predict_true(m != NULL)) { m_adj(m, sizeof(*cpl)); m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m->m_pkthdr.csum_data = 0xffff; hw_ifp->if_input(hw_ifp, m); } return (reject_reason); } static void synqe_to_protohdrs(struct synq_entry *synqe, const struct cpl_pass_establish *cpl, struct in_conninfo *inc, struct tcphdr *th, struct tcpopt *to) { uint16_t tcp_opt = be16toh(cpl->tcp_opt); /* start off with the original SYN */ pass_accept_req_to_protohdrs(synqe->syn, inc, th); /* modify parts to make it look like the ACK to our SYN|ACK */ th->th_flags = TH_ACK; th->th_ack = synqe->iss + 1; th->th_seq = be32toh(cpl->rcv_isn); bzero(to, sizeof(*to)); if (G_TCPOPT_TSTAMP(tcp_opt)) { to->to_flags |= TOF_TS; to->to_tsecr = synqe->ts; } } static int do_pass_establish(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; struct vi_info *vi; struct ifnet *ifp; const struct cpl_pass_establish *cpl = (const void *)(rss + 1); #if defined(KTR) || defined(INVARIANTS) unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid)); #endif unsigned int tid = GET_TID(cpl); struct synq_entry *synqe = lookup_tid(sc, tid); struct listen_ctx *lctx = synqe->lctx; struct inpcb *inp = lctx->inp, *new_inp; struct socket *so; struct tcphdr th; struct tcpopt to; struct in_conninfo inc; struct toepcb *toep; u_int txqid, rxqid; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); #endif KASSERT(opcode == CPL_PASS_ESTABLISH, ("%s: unexpected opcode 0x%x", __func__, opcode)); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__)); KASSERT(synqe->flags & TPF_SYNQE, ("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe)); INP_INFO_RLOCK(&V_tcbinfo); /* for syncache_expand */ INP_WLOCK(inp); CTR6(KTR_CXGBE, "%s: stid %u, tid %u, synqe %p (0x%x), inp_flags 0x%x", __func__, stid, tid, synqe, synqe->flags, inp->inp_flags); if (__predict_false(inp->inp_flags & INP_DROPPED)) { if (synqe->flags & TPF_SYNQE_HAS_L2TE) { KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, ("%s: listen socket closed but tid %u not aborted.", __func__, tid)); } INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); return (0); } ifp = synqe->syn->m_pkthdr.rcvif; vi = ifp->if_softc; KASSERT(vi->pi->adapter == sc, ("%s: vi %p, sc %p mismatch", __func__, vi, sc)); get_qids_from_mbuf(synqe->syn, &txqid, &rxqid); KASSERT(rxqid == iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0], ("%s: CPL arrived on unexpected rxq. %d %d", __func__, rxqid, (int)(iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0]))); toep = alloc_toepcb(vi, txqid, rxqid, M_NOWAIT); if (toep == NULL) { reset: /* * The reply to this abort will perform final cleanup. There is * no need to check for HAS_L2TE here. We can be here only if * we responded to the PASS_ACCEPT_REQ, and our response had the * L2T idx. */ send_reset_synqe(TOEDEV(ifp), synqe); INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); return (0); } toep->tid = tid; toep->l2te = &sc->l2t->l2tab[synqe->l2e_idx]; if (synqe->flags & TPF_SYNQE_TCPDDP) set_tcpddp_ulp_mode(toep); else toep->ulp_mode = ULP_MODE_NONE; /* opt0 rcv_bufsiz initially, assumes its normal meaning later */ toep->rx_credits = synqe->rcv_bufsize; so = inp->inp_socket; KASSERT(so != NULL, ("%s: socket is NULL", __func__)); /* Come up with something that syncache_expand should be ok with. */ synqe_to_protohdrs(synqe, cpl, &inc, &th, &to); /* * No more need for anything in the mbuf that carried the * CPL_PASS_ACCEPT_REQ. Drop the CPL_PASS_ESTABLISH and toep pointer * there. XXX: bad form but I don't want to increase the size of synqe. */ m = synqe->syn; KASSERT(sizeof(*cpl) + sizeof(toep) <= m->m_len, ("%s: no room in mbuf %p (m_len %d)", __func__, m, m->m_len)); bcopy(cpl, mtod(m, void *), sizeof(*cpl)); *(struct toepcb **)(mtod(m, struct cpl_pass_establish *) + 1) = toep; if (!toe_syncache_expand(&inc, &to, &th, &so) || so == NULL) { free_toepcb(toep); goto reset; } /* New connection inpcb is already locked by syncache_expand(). */ new_inp = sotoinpcb(so); INP_WLOCK_ASSERT(new_inp); /* * This is for the unlikely case where the syncache entry that we added * has been evicted from the syncache, but the syncache_expand above * works because of syncookies. * * XXX: we've held the tcbinfo lock throughout so there's no risk of * anyone accept'ing a connection before we've installed our hooks, but * this somewhat defeats the purpose of having a tod_offload_socket :-( */ if (__predict_false(!(synqe->flags & TPF_SYNQE_EXPANDED))) { tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0); t4_offload_socket(TOEDEV(ifp), synqe, so); } INP_WUNLOCK(new_inp); /* Done with the synqe */ TAILQ_REMOVE(&lctx->synq, synqe, link); inp = release_lctx(sc, lctx); if (inp != NULL) INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); release_synqe(synqe); return (0); } void t4_init_listen_cpl_handlers(struct adapter *sc) { t4_register_cpl_handler(sc, CPL_PASS_OPEN_RPL, do_pass_open_rpl); t4_register_cpl_handler(sc, CPL_CLOSE_LISTSRV_RPL, do_close_server_rpl); t4_register_cpl_handler(sc, CPL_PASS_ACCEPT_REQ, do_pass_accept_req); t4_register_cpl_handler(sc, CPL_PASS_ESTABLISH, do_pass_establish); } #endif Index: projects/vnet/sys/dev/usb/quirk/usb_quirk.c =================================================================== --- projects/vnet/sys/dev/usb/quirk/usb_quirk.c (revision 302276) +++ projects/vnet/sys/dev/usb/quirk/usb_quirk.c (revision 302277) @@ -1,1016 +1,1017 @@ /* $FreeBSD$ */ /*- * Copyright (c) 1998 The NetBSD Foundation, Inc. All rights reserved. * Copyright (c) 1998 Lennart Augustsson. All rights reserved. * Copyright (c) 2008 Hans Petter Selasky. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "usbdevs.h" #define USB_DEBUG_VAR usb_debug #include #include #include MODULE_DEPEND(usb_quirk, usb, 1, 1, 1); MODULE_VERSION(usb_quirk, 1); #define USB_DEV_QUIRKS_MAX 384 #define USB_SUB_QUIRKS_MAX 8 #define USB_QUIRK_ENVROOT "hw.usb.quirk." struct usb_quirk_entry { uint16_t vid; uint16_t pid; uint16_t lo_rev; uint16_t hi_rev; uint16_t quirks[USB_SUB_QUIRKS_MAX]; }; static struct mtx usb_quirk_mtx; #define USB_QUIRK_VP(v,p,l,h,...) \ { .vid = (v), .pid = (p), .lo_rev = (l), .hi_rev = (h), \ .quirks = { __VA_ARGS__ } } #define USB_QUIRK(v,p,l,h,...) \ USB_QUIRK_VP(USB_VENDOR_##v, USB_PRODUCT_##v##_##p, l, h, __VA_ARGS__) static struct usb_quirk_entry usb_quirks[USB_DEV_QUIRKS_MAX] = { USB_QUIRK(ASUS, LCM, 0x0000, 0xffff, UQ_HID_IGNORE), USB_QUIRK(INSIDEOUT, EDGEPORT4, 0x094, 0x094, UQ_SWAP_UNICODE), USB_QUIRK(DALLAS, J6502, 0x0a2, 0x0a2, UQ_BAD_ADC), USB_QUIRK(DALLAS, J6502, 0x0a2, 0x0a2, UQ_AU_NO_XU), USB_QUIRK(ALTEC, ADA70, 0x103, 0x103, UQ_BAD_ADC), USB_QUIRK(ALTEC, ASC495, 0x000, 0x000, UQ_BAD_AUDIO), USB_QUIRK(QTRONIX, 980N, 0x110, 0x110, UQ_SPUR_BUT_UP), USB_QUIRK(ALCOR2, KBD_HUB, 0x001, 0x001, UQ_SPUR_BUT_UP), USB_QUIRK(MCT, HUB0100, 0x102, 0x102, UQ_BUS_POWERED), USB_QUIRK(MCT, USB232, 0x102, 0x102, UQ_BUS_POWERED), USB_QUIRK(TI, UTUSB41, 0x110, 0x110, UQ_POWER_CLAIM), USB_QUIRK(TELEX, MIC1, 0x009, 0x009, UQ_AU_NO_FRAC), USB_QUIRK(SILICONPORTALS, YAPPHONE, 0x100, 0x100, UQ_AU_INP_ASYNC), USB_QUIRK(LOGITECH, UN53B, 0x0000, 0xffff, UQ_NO_STRINGS), USB_QUIRK(REALTEK, RTL8196EU, 0x0000, 0xffff, UQ_CFG_INDEX_1), USB_QUIRK(REALTEK, RTL8153, 0x0000, 0xffff, UQ_CFG_INDEX_1), USB_QUIRK(ELSA, MODEM1, 0x0000, 0xffff, UQ_CFG_INDEX_1), USB_QUIRK(PLANEX2, MZKUE150N, 0x0000, 0xffff, UQ_CFG_INDEX_1), USB_QUIRK(CISCOLINKSYS, USB3GIGV1, 0x0000, 0xffff, UQ_CFG_INDEX_1), /* Quirks for printer devices */ USB_QUIRK(HP, 895C, 0x0000, 0xffff, UQ_BROKEN_BIDIR), USB_QUIRK(HP, 880C, 0x0000, 0xffff, UQ_BROKEN_BIDIR), USB_QUIRK(HP, 815C, 0x0000, 0xffff, UQ_BROKEN_BIDIR), USB_QUIRK(HP, 810C, 0x0000, 0xffff, UQ_BROKEN_BIDIR), USB_QUIRK(HP, 830C, 0x0000, 0xffff, UQ_BROKEN_BIDIR), USB_QUIRK(HP, 1220C, 0x0000, 0xffff, UQ_BROKEN_BIDIR), USB_QUIRK(XEROX, WCM15, 0x0000, 0xffff, UQ_BROKEN_BIDIR), /* Devices which should be ignored by uhid */ USB_QUIRK(APC, UPS, 0x0000, 0xffff, UQ_HID_IGNORE), USB_QUIRK(BELKIN, F6C550AVR, 0x0000, 0xffff, UQ_HID_IGNORE), USB_QUIRK(CYBERPOWER, 1500CAVRLCD, 0x0000, 0xffff, UQ_HID_IGNORE), USB_QUIRK(CYPRESS, SILVERSHIELD, 0x0000, 0xffff, UQ_HID_IGNORE), USB_QUIRK(DELORME, EARTHMATE, 0x0000, 0xffff, UQ_HID_IGNORE), USB_QUIRK(DREAMLINK, DL100B, 0x0000, 0xffff, UQ_HID_IGNORE), USB_QUIRK(ITUNERNET, USBLCD2X20, 0x0000, 0xffff, UQ_HID_IGNORE), USB_QUIRK(ITUNERNET, USBLCD4X20, 0x0000, 0xffff, UQ_HID_IGNORE), USB_QUIRK(LIEBERT, POWERSURE_PXT, 0x0000, 0xffff, UQ_HID_IGNORE), USB_QUIRK(LIEBERT2, PSI1000, 0x0000, 0xffff, UQ_HID_IGNORE), USB_QUIRK(MGE, UPS1, 0x0000, 0xffff, UQ_HID_IGNORE), USB_QUIRK(MGE, UPS2, 0x0000, 0xffff, UQ_HID_IGNORE), USB_QUIRK(APPLE, IPHONE, 0x0000, 0xffff, UQ_HID_IGNORE), USB_QUIRK(APPLE, IPHONE_3G, 0x0000, 0xffff, UQ_HID_IGNORE), USB_QUIRK(MEGATEC, UPS, 0x0000, 0xffff, UQ_HID_IGNORE), /* Devices which should be ignored by both ukbd and uhid */ USB_QUIRK(CYPRESS, WISPY1A, 0x0000, 0xffff, UQ_KBD_IGNORE, UQ_HID_IGNORE), USB_QUIRK(METAGEEK, WISPY1B, 0x0000, 0xffff, UQ_KBD_IGNORE, UQ_HID_IGNORE), USB_QUIRK(METAGEEK, WISPY24X, 0x0000, 0xffff, UQ_KBD_IGNORE, UQ_HID_IGNORE), USB_QUIRK(METAGEEK2, WISPYDBX, 0x0000, 0xffff, UQ_KBD_IGNORE, UQ_HID_IGNORE), USB_QUIRK(TENX, UAUDIO0, 0x0101, 0x0101, UQ_AUDIO_SWAP_LR), /* MS keyboards do weird things */ USB_QUIRK(MICROSOFT, NATURAL4000, 0x0000, 0xFFFF, UQ_KBD_BOOTPROTO), USB_QUIRK(MICROSOFT, WLINTELLIMOUSE, 0x0000, 0xffff, UQ_MS_LEADING_BYTE), /* Quirk for Corsair Vengeance K60 keyboard */ USB_QUIRK(CORSAIR, K60, 0x0000, 0xffff, UQ_KBD_BOOTPROTO), /* Quirk for Corsair Vengeance K70 keyboard */ USB_QUIRK(CORSAIR, K70, 0x0000, 0xffff, UQ_KBD_BOOTPROTO), /* Quirk for Corsair STRAFE Gaming keyboard */ USB_QUIRK(CORSAIR, STRAFE, 0x0000, 0xffff, UQ_KBD_BOOTPROTO), /* umodem(4) device quirks */ USB_QUIRK(METRICOM, RICOCHET_GS, 0x100, 0x100, UQ_ASSUME_CM_OVER_DATA), USB_QUIRK(SANYO, SCP4900, 0x000, 0x000, UQ_ASSUME_CM_OVER_DATA), USB_QUIRK(MOTOROLA2, T720C, 0x001, 0x001, UQ_ASSUME_CM_OVER_DATA), USB_QUIRK(EICON, DIVA852, 0x100, 0x100, UQ_ASSUME_CM_OVER_DATA), USB_QUIRK(SIEMENS2, ES75, 0x000, 0x000, UQ_ASSUME_CM_OVER_DATA), USB_QUIRK(QUALCOMM, CDMA_MSM, 0x0000, 0xffff, UQ_ASSUME_CM_OVER_DATA), USB_QUIRK(QUALCOMM2, CDMA_MSM, 0x0000, 0xffff, UQ_ASSUME_CM_OVER_DATA), USB_QUIRK(CURITEL, UM150, 0x0000, 0xffff, UQ_ASSUME_CM_OVER_DATA), USB_QUIRK(CURITEL, UM175, 0x0000, 0xffff, UQ_ASSUME_CM_OVER_DATA), USB_QUIRK(VERTEX, VW110L, 0x0000, 0xffff, UQ_ASSUME_CM_OVER_DATA), /* USB Mass Storage Class Quirks */ USB_QUIRK_VP(USB_VENDOR_ASAHIOPTICAL, 0, UQ_MSC_NO_RS_CLEAR_UA, UQ_MATCH_VENDOR_ONLY), USB_QUIRK(ADDON, ATTACHE, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_IGNORE_RESIDUE), USB_QUIRK(ADDON, A256MB, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_IGNORE_RESIDUE), USB_QUIRK(ADDON, DISKPRO512, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_IGNORE_RESIDUE), USB_QUIRK(ADDONICS2, CABLE_205, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(AIPTEK, POCKETCAM3M, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(ALCOR, UMCR_9361, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_GETMAXLUN), USB_QUIRK(ALCOR, TRANSCEND, 0x0000, 0xffff, UQ_MSC_NO_GETMAXLUN, UQ_MSC_NO_SYNC_CACHE, UQ_MSC_NO_TEST_UNIT_READY), USB_QUIRK(APACER, HT202, 0x0000, 0xffff, UQ_MSC_NO_TEST_UNIT_READY, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(ASAHIOPTICAL, OPTIO230, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_INQUIRY), USB_QUIRK(ASAHIOPTICAL, OPTIO330, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_INQUIRY), USB_QUIRK(ATP, EUSB, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(BELKIN, USB2SCSI, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(CASIO, QV_DIGICAM, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_INQUIRY), USB_QUIRK(CCYU, ED1064, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(CENTURY, EX35QUAT, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_FORCE_SHORT_INQ, UQ_MSC_NO_START_STOP, UQ_MSC_IGNORE_RESIDUE), USB_QUIRK(CYPRESS, XX6830XX, 0x0000, 0xffff, UQ_MSC_NO_GETMAXLUN, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(DESKNOTE, UCR_61S2B, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(DMI, CFSM_RW, 0x0000, 0xffff, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_GETMAXLUN), USB_QUIRK(EMTEC, RUF2PS, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(EPSON, STYLUS_875DC, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_INQUIRY), USB_QUIRK(EPSON, STYLUS_895, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_GETMAXLUN), USB_QUIRK(FEIYA, 5IN1, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(FEIYA, ELANGO, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(FREECOM, DVD, 0x0000, 0xffff, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(FUJIPHOTO, MASS0100, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI_I, UQ_MSC_FORCE_PROTO_ATAPI, UQ_MSC_NO_RS_CLEAR_UA, UQ_MSC_NO_SYNC_CACHE), + USB_QUIRK(GARMIN, FORERUNNER230, 0x0000, 0xffff, UQ_MSC_NO_INQUIRY), USB_QUIRK(GENESYS, GL641USB2IDE, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_FORCE_SHORT_INQ, UQ_MSC_NO_START_STOP, UQ_MSC_IGNORE_RESIDUE, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(GENESYS, GL641USB2IDE_2, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_ATAPI, UQ_MSC_FORCE_SHORT_INQ, UQ_MSC_NO_START_STOP, UQ_MSC_IGNORE_RESIDUE), USB_QUIRK(GENESYS, GL641USB, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_FORCE_SHORT_INQ, UQ_MSC_NO_START_STOP, UQ_MSC_IGNORE_RESIDUE), USB_QUIRK(GENESYS, GL641USB_2, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_WRONG_CSWSIG), USB_QUIRK(HAGIWARA, FG, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(HAGIWARA, FGSM, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(HITACHI, DVDCAM_DZ_MV100A, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_GETMAXLUN), USB_QUIRK(HITACHI, DVDCAM_USB, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI_I, UQ_MSC_FORCE_PROTO_ATAPI, UQ_MSC_NO_INQUIRY), USB_QUIRK(HP, CDW4E, 0x0000, 0xffff, UQ_MSC_FORCE_PROTO_ATAPI), USB_QUIRK(HP, CDW8200, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI_I, UQ_MSC_FORCE_PROTO_ATAPI, UQ_MSC_NO_TEST_UNIT_READY, UQ_MSC_NO_START_STOP), USB_QUIRK(IMAGINATION, DBX1, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_WRONG_CSWSIG), USB_QUIRK(INSYSTEM, USBCABLE, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_ATAPI, UQ_MSC_NO_TEST_UNIT_READY, UQ_MSC_NO_START_STOP, UQ_MSC_ALT_IFACE_1), USB_QUIRK(INSYSTEM, ATAPI, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_RBC), USB_QUIRK(INSYSTEM, STORAGE_V2, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_RBC), USB_QUIRK(IODATA, IU_CD2, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(IODATA, DVR_UEH8, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(IOMEGA, ZIP100, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_TEST_UNIT_READY), /* XXX ZIP drives can also use ATAPI */ USB_QUIRK(JMICRON, JM20337, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(KINGSTON, HYPERX3_0, 0x0000, 0xffff, UQ_MSC_NO_INQUIRY), USB_QUIRK(KYOCERA, FINECAM_L3, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_INQUIRY), USB_QUIRK(KYOCERA, FINECAM_S3X, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_ATAPI, UQ_MSC_NO_INQUIRY), USB_QUIRK(KYOCERA, FINECAM_S4, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_ATAPI, UQ_MSC_NO_INQUIRY), USB_QUIRK(KYOCERA, FINECAM_S5, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_INQUIRY), USB_QUIRK(LACIE, HD, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_RBC), USB_QUIRK(LEXAR, CF_READER, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_INQUIRY), USB_QUIRK(LEXAR, JUMPSHOT, 0x0000, 0xffff, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(LEXAR, JUMPDRIVE, 0x0000, 0xffff, UQ_MSC_NO_INQUIRY), USB_QUIRK(LOGITEC, LDR_H443SU2, 0x0000, 0xffff, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(LOGITEC, LDR_H443U2, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI,), USB_QUIRK(MELCO, DUBPXXG, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_FORCE_SHORT_INQ, UQ_MSC_NO_START_STOP, UQ_MSC_IGNORE_RESIDUE), USB_QUIRK(MICROTECH, DPCM, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_TEST_UNIT_READY, UQ_MSC_NO_START_STOP), USB_QUIRK(MICRON, REALSSD, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(MICROTECH, SCSIDB25, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(MICROTECH, SCSIHD50, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(MINOLTA, E223, 0x0000, 0xffff, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(MINOLTA, F300, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(MITSUMI, CDRRW, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI | UQ_MSC_FORCE_PROTO_ATAPI), USB_QUIRK(MOTOROLA2, E398, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_FORCE_SHORT_INQ, UQ_MSC_NO_INQUIRY_EVPD, UQ_MSC_NO_GETMAXLUN), USB_QUIRK_VP(USB_VENDOR_MPMAN, 0, UQ_MSC_NO_SYNC_CACHE, UQ_MATCH_VENDOR_ONLY), USB_QUIRK(MSYSTEMS, DISKONKEY, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_IGNORE_RESIDUE, UQ_MSC_NO_GETMAXLUN, UQ_MSC_NO_RS_CLEAR_UA), USB_QUIRK(MSYSTEMS, DISKONKEY2, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_ATAPI), USB_QUIRK(MYSON, HEDEN, 0x0000, 0xffff, UQ_MSC_IGNORE_RESIDUE, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(NEODIO, ND3260, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_FORCE_SHORT_INQ), USB_QUIRK(NETAC, CF_CARD, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_INQUIRY), USB_QUIRK(NETAC, ONLYDISK, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_IGNORE_RESIDUE), USB_QUIRK(NETCHIP, CLIK_40, 0x0000, 0xffff, UQ_MSC_FORCE_PROTO_ATAPI, UQ_MSC_NO_INQUIRY), USB_QUIRK(NETCHIP, POCKETBOOK, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(NIKON, D300, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(OLYMPUS, C1, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_WRONG_CSWSIG), USB_QUIRK(OLYMPUS, C700, 0x0000, 0xffff, UQ_MSC_NO_GETMAXLUN), USB_QUIRK(ONSPEC, SDS_HOTFIND_D, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_GETMAXLUN, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(ONSPEC, CFMS_RW, 0x0000, 0xffff, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(ONSPEC, CFSM_COMBO, 0x0000, 0xffff, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(ONSPEC, CFSM_READER, 0x0000, 0xffff, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(ONSPEC, CFSM_READER2, 0x0000, 0xffff, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(ONSPEC, MDCFE_B_CF_READER, 0x0000, 0xffff, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(ONSPEC, MDSM_B_READER, 0x0000, 0xffff, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_INQUIRY), USB_QUIRK(ONSPEC, READER, 0x0000, 0xffff, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(ONSPEC, UCF100, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_ATAPI, UQ_MSC_NO_INQUIRY, UQ_MSC_NO_GETMAXLUN), USB_QUIRK(ONSPEC2, IMAGEMATE_SDDR55, 0x0000, 0xffff, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_GETMAXLUN), USB_QUIRK(PANASONIC, KXL840AN, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_ATAPI, UQ_MSC_NO_GETMAXLUN), USB_QUIRK(PANASONIC, KXLCB20AN, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(PANASONIC, KXLCB35AN, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(PANASONIC, LS120CAM, 0x0000, 0xffff, UQ_MSC_FORCE_PROTO_UFI), USB_QUIRK(PLEXTOR, 40_12_40U, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_TEST_UNIT_READY), USB_QUIRK(PNY, ATTACHE2, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_IGNORE_RESIDUE, UQ_MSC_NO_START_STOP), USB_QUIRK(PROLIFIC, PL2506, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK_VP(USB_VENDOR_SAMSUNG_TECHWIN, USB_PRODUCT_SAMSUNG_TECHWIN_DIGIMAX_410, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_INQUIRY), USB_QUIRK(SANDISK, SDDR05A, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_READ_CAP_OFFBY1, UQ_MSC_NO_GETMAXLUN), USB_QUIRK(SANDISK, SDDR09, 0x0000, 0xffff, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_READ_CAP_OFFBY1, UQ_MSC_NO_GETMAXLUN), USB_QUIRK(SANDISK, SDDR12, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_READ_CAP_OFFBY1, UQ_MSC_NO_GETMAXLUN), USB_QUIRK(SANDISK, SDCZ2_128, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_IGNORE_RESIDUE, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(SANDISK, SDCZ2_256, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_IGNORE_RESIDUE), USB_QUIRK(SANDISK, SDCZ4_128, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_IGNORE_RESIDUE), USB_QUIRK(SANDISK, SDCZ4_256, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_IGNORE_RESIDUE), USB_QUIRK(SANDISK, SDDR31, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_READ_CAP_OFFBY1), USB_QUIRK(SANDISK, IMAGEMATE_SDDR289, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE, UQ_MSC_NO_GETMAXLUN), USB_QUIRK(SCANLOGIC, SL11R, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_ATAPI, UQ_MSC_NO_INQUIRY), USB_QUIRK(SHUTTLE, EUSB, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI_I, UQ_MSC_FORCE_PROTO_ATAPI, UQ_MSC_NO_TEST_UNIT_READY, UQ_MSC_NO_START_STOP, UQ_MSC_SHUTTLE_INIT), USB_QUIRK(SHUTTLE, CDRW, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_ATAPI), USB_QUIRK(SHUTTLE, CF, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_ATAPI), USB_QUIRK(SHUTTLE, EUSBATAPI, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_ATAPI), USB_QUIRK(SHUTTLE, EUSBCFSM, 0x0000, 0xffff, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(SHUTTLE, EUSCSI, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(SHUTTLE, HIFD, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_GETMAXLUN), USB_QUIRK(SHUTTLE, SDDR09, 0x0000, 0xffff, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_GETMAXLUN), USB_QUIRK(SHUTTLE, ZIOMMC, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_GETMAXLUN), USB_QUIRK(SIGMATEL, I_BEAD100, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_SHUTTLE_INIT), USB_QUIRK(SIIG, WINTERREADER, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_IGNORE_RESIDUE), USB_QUIRK(SKANHEX, MD_7425, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_INQUIRY), USB_QUIRK(SKANHEX, SX_520Z, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_INQUIRY), USB_QUIRK(SONY, HANDYCAM, 0x0500, 0x0500, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_RBC, UQ_MSC_RBC_PAD_TO_12), USB_QUIRK(SONY, CLIE_40_MS, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_INQUIRY), USB_QUIRK(SONY, DSC, 0x0500, 0x0500, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_RBC, UQ_MSC_RBC_PAD_TO_12), USB_QUIRK(SONY, DSC, 0x0600, 0x0600, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_RBC, UQ_MSC_RBC_PAD_TO_12), USB_QUIRK(SONY, DSC, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_RBC), USB_QUIRK(SONY, HANDYCAM, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_RBC), USB_QUIRK(SONY, MSC, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_RBC), USB_QUIRK(SONY, MS_MSC_U03, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_UFI, UQ_MSC_NO_GETMAXLUN), USB_QUIRK(SONY, MS_NW_MS7, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_GETMAXLUN), USB_QUIRK(SONY, MS_PEG_N760C, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_INQUIRY), USB_QUIRK(SONY, MSACUS1, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_GETMAXLUN), USB_QUIRK(SONY, PORTABLE_HDD_V2, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(STMICRO, ST72682, 0x0000, 0xffff, UQ_MSC_NO_PREVENT_ALLOW), USB_QUIRK(SUPERTOP, IDE, 0x0000, 0xffff, UQ_MSC_IGNORE_RESIDUE, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(SUPERTOP, FLASHDRIVE, 0x0000, 0xffff, UQ_MSC_NO_TEST_UNIT_READY, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(TAUGA, CAMERAMATE, 0x0000, 0xffff, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(TEAC, FD05PUB, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_UFI), USB_QUIRK(TECLAST, TLC300, 0x0000, 0xffff, UQ_MSC_NO_TEST_UNIT_READY, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(TREK, MEMKEY, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_INQUIRY), USB_QUIRK(TREK, THUMBDRIVE_8MB, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_ATAPI, UQ_MSC_IGNORE_RESIDUE), USB_QUIRK(TRUMPION, C3310, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_UFI), USB_QUIRK(TRUMPION, MP3, 0x0000, 0xffff, UQ_MSC_FORCE_PROTO_RBC), USB_QUIRK(TRUMPION, T33520, 0x0000, 0xffff, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(TWINMOS, MDIV, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI), USB_QUIRK(VIA, USB2IDEBRIDGE, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(VIVITAR, 35XX, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_INQUIRY), USB_QUIRK(WESTERN, COMBO, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_FORCE_SHORT_INQ, UQ_MSC_NO_START_STOP, UQ_MSC_IGNORE_RESIDUE), USB_QUIRK(WESTERN, EXTHDD, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_FORCE_SHORT_INQ, UQ_MSC_NO_START_STOP, UQ_MSC_IGNORE_RESIDUE), USB_QUIRK(WESTERN, MYBOOK, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_INQUIRY_EVPD, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(WESTERN, MYPASSPORT_00, 0x0000, 0xffff, UQ_MSC_FORCE_SHORT_INQ), USB_QUIRK(WESTERN, MYPASSPORT_01, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(WESTERN, MYPASSPORT_02, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(WESTERN, MYPASSPORT_03, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(WESTERN, MYPASSPORT_04, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(WESTERN, MYPASSPORT_05, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(WESTERN, MYPASSPORT_06, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(WESTERN, MYPASSPORT_07, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(WESTERN, MYPASSPORT_08, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(WESTERN, MYPASSPORT_09, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(WESTERN, MYPASSPORT_10, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(WESTERN, MYPASSPORT_11, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(WESTERN, MYPASSPORTES_00, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(WESTERN, MYPASSPORTES_01, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(WESTERN, MYPASSPORTES_02, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(WESTERN, MYPASSPORTES_03, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(WESTERN, MYPASSPORTES_04, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(WESTERN, MYPASSPORTES_05, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(WESTERN, MYPASSPORTES_06, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(WESTERN, MYPASSPORTES_07, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(WESTERN, MYPASSPORTES_08, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(WESTERN, MYPASSPORTES_09, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(WINMAXGROUP, FLASH64MC, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_INQUIRY), USB_QUIRK(YANO, FW800HD, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_FORCE_SHORT_INQ, UQ_MSC_NO_START_STOP, UQ_MSC_IGNORE_RESIDUE), USB_QUIRK(YANO, U640MO, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI_I, UQ_MSC_FORCE_PROTO_ATAPI, UQ_MSC_FORCE_SHORT_INQ), USB_QUIRK(YEDATA, FLASHBUSTERU, 0x0000, 0x007F, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_UFI, UQ_MSC_NO_RS_CLEAR_UA, UQ_MSC_FLOPPY_SPEED, UQ_MSC_NO_TEST_UNIT_READY, UQ_MSC_NO_GETMAXLUN), USB_QUIRK(YEDATA, FLASHBUSTERU, 0x0080, 0x0080, UQ_MSC_FORCE_WIRE_CBI_I, UQ_MSC_FORCE_PROTO_UFI, UQ_MSC_NO_RS_CLEAR_UA, UQ_MSC_FLOPPY_SPEED, UQ_MSC_NO_TEST_UNIT_READY, UQ_MSC_NO_GETMAXLUN), USB_QUIRK(YEDATA, FLASHBUSTERU, 0x0081, 0xFFFF, UQ_MSC_FORCE_WIRE_CBI_I, UQ_MSC_FORCE_PROTO_UFI, UQ_MSC_NO_RS_CLEAR_UA, UQ_MSC_FLOPPY_SPEED, UQ_MSC_NO_GETMAXLUN), USB_QUIRK(ZORAN, EX20DSC, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_CBI, UQ_MSC_FORCE_PROTO_ATAPI), USB_QUIRK(MEIZU, M6_SL, 0x0000, 0xffff, UQ_MSC_FORCE_WIRE_BBB, UQ_MSC_FORCE_PROTO_SCSI, UQ_MSC_NO_INQUIRY, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(TOSHIBA, TRANSMEMORY, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE, UQ_MSC_NO_PREVENT_ALLOW), USB_QUIRK(VIALABS, USB30SATABRIDGE, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE), USB_QUIRK(QUALCOMMINC, ZTE_MF730M, 0x0000, 0xffff, UQ_MSC_NO_GETMAXLUN, UQ_MSC_NO_INQUIRY, UQ_CFG_INDEX_0), /* Non-standard USB MIDI devices */ USB_QUIRK(ROLAND, UM1, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS), USB_QUIRK(ROLAND, SC8850, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS), USB_QUIRK(ROLAND, SD90, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS), USB_QUIRK(ROLAND, UM880N, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS), USB_QUIRK(ROLAND, UA100, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS), USB_QUIRK(ROLAND, UM4, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS), USB_QUIRK(ROLAND, U8, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS), USB_QUIRK(ROLAND, UM2, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS), USB_QUIRK(ROLAND, SC8820, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS), USB_QUIRK(ROLAND, PC300, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS), USB_QUIRK(ROLAND, SK500, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS), USB_QUIRK(ROLAND, SCD70, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS), USB_QUIRK(ROLAND, UM550, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS), USB_QUIRK(ROLAND, SD20, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS), USB_QUIRK(ROLAND, SD80, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS), USB_QUIRK(ROLAND, UA700, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS), USB_QUIRK(EGO, M4U, 0x0000, 0xffff, UQ_SINGLE_CMD_MIDI), USB_QUIRK(LOGILINK, U2M, 0x0000, 0xffff, UQ_SINGLE_CMD_MIDI), USB_QUIRK(MEDELI, DD305, 0x0000, 0xffff, UQ_SINGLE_CMD_MIDI, UQ_MATCH_VENDOR_ONLY), USB_QUIRK(REDOCTANE, GHMIDI, 0x0000, 0xffff, UQ_SINGLE_CMD_MIDI), USB_QUIRK(TEXTECH, U2M_1, 0x0000, 0xffff, UQ_SINGLE_CMD_MIDI), USB_QUIRK(TEXTECH, U2M_2, 0x0000, 0xffff, UQ_SINGLE_CMD_MIDI), USB_QUIRK(WCH2, U2M, 0x0000, 0xffff, UQ_SINGLE_CMD_MIDI), /* Non-standard USB AUDIO devices */ USB_QUIRK(MAUDIO, FASTTRACKULTRA, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS), USB_QUIRK(MAUDIO, FASTTRACKULTRA8R, 0x0000, 0xffff, UQ_AU_VENDOR_CLASS), /* * Quirks for manufacturers which USB devices does not respond * after issuing non-supported commands: */ USB_QUIRK(ALCOR, DUMMY, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE, UQ_MSC_NO_TEST_UNIT_READY, UQ_MATCH_VENDOR_ONLY), USB_QUIRK(APPLE, DUMMY, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE, UQ_MATCH_VENDOR_ONLY), USB_QUIRK(FEIYA, DUMMY, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE, UQ_MATCH_VENDOR_ONLY), USB_QUIRK(REALTEK, DUMMY, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE, UQ_MATCH_VENDOR_ONLY), USB_QUIRK(INITIO, DUMMY, 0x0000, 0xffff, UQ_MSC_NO_SYNC_CACHE, UQ_MATCH_VENDOR_ONLY), /* DYMO LabelManager Pnp */ USB_QUIRK(DYMO, LABELMANAGERPNP, 0x0000, 0xffff, UQ_MSC_DYMO_EJECT), /* Holtek USB gaming keyboard */ USB_QUIRK(HOLTEK, F85, 0x0000, 0xffff, UQ_KBD_BOOTPROTO), }; #undef USB_QUIRK_VP #undef USB_QUIRK static const char *usb_quirk_str[USB_QUIRK_MAX] = { [UQ_NONE] = "UQ_NONE", [UQ_MATCH_VENDOR_ONLY] = "UQ_MATCH_VENDOR_ONLY", [UQ_AUDIO_SWAP_LR] = "UQ_AUDIO_SWAP_LR", [UQ_AU_INP_ASYNC] = "UQ_AU_INP_ASYNC", [UQ_AU_NO_FRAC] = "UQ_AU_NO_FRAC", [UQ_AU_NO_XU] = "UQ_AU_NO_XU", [UQ_BAD_ADC] = "UQ_BAD_ADC", [UQ_BAD_AUDIO] = "UQ_BAD_AUDIO", [UQ_BROKEN_BIDIR] = "UQ_BROKEN_BIDIR", [UQ_BUS_POWERED] = "UQ_BUS_POWERED", [UQ_HID_IGNORE] = "UQ_HID_IGNORE", [UQ_KBD_IGNORE] = "UQ_KBD_IGNORE", [UQ_KBD_BOOTPROTO] = "UQ_KBD_BOOTPROTO", [UQ_UMS_IGNORE] = "UQ_UMS_IGNORE", [UQ_MS_BAD_CLASS] = "UQ_MS_BAD_CLASS", [UQ_MS_LEADING_BYTE] = "UQ_MS_LEADING_BYTE", [UQ_MS_REVZ] = "UQ_MS_REVZ", [UQ_NO_STRINGS] = "UQ_NO_STRINGS", [UQ_POWER_CLAIM] = "UQ_POWER_CLAIM", [UQ_SPUR_BUT_UP] = "UQ_SPUR_BUT_UP", [UQ_SWAP_UNICODE] = "UQ_SWAP_UNICODE", [UQ_CFG_INDEX_1] = "UQ_CFG_INDEX_1", [UQ_CFG_INDEX_2] = "UQ_CFG_INDEX_2", [UQ_CFG_INDEX_3] = "UQ_CFG_INDEX_3", [UQ_CFG_INDEX_4] = "UQ_CFG_INDEX_4", [UQ_CFG_INDEX_0] = "UQ_CFG_INDEX_0", [UQ_ASSUME_CM_OVER_DATA] = "UQ_ASSUME_CM_OVER_DATA", [UQ_MSC_NO_TEST_UNIT_READY] = "UQ_MSC_NO_TEST_UNIT_READY", [UQ_MSC_NO_RS_CLEAR_UA] = "UQ_MSC_NO_RS_CLEAR_UA", [UQ_MSC_NO_START_STOP] = "UQ_MSC_NO_START_STOP", [UQ_MSC_NO_GETMAXLUN] = "UQ_MSC_NO_GETMAXLUN", [UQ_MSC_NO_INQUIRY] = "UQ_MSC_NO_INQUIRY", [UQ_MSC_NO_INQUIRY_EVPD] = "UQ_MSC_NO_INQUIRY_EVPD", [UQ_MSC_NO_PREVENT_ALLOW] = "UQ_MSC_NO_PREVENT_ALLOW", [UQ_MSC_NO_SYNC_CACHE] = "UQ_MSC_NO_SYNC_CACHE", [UQ_MSC_SHUTTLE_INIT] = "UQ_MSC_SHUTTLE_INIT", [UQ_MSC_ALT_IFACE_1] = "UQ_MSC_ALT_IFACE_1", [UQ_MSC_FLOPPY_SPEED] = "UQ_MSC_FLOPPY_SPEED", [UQ_MSC_IGNORE_RESIDUE] = "UQ_MSC_IGNORE_RESIDUE", [UQ_MSC_WRONG_CSWSIG] = "UQ_MSC_WRONG_CSWSIG", [UQ_MSC_RBC_PAD_TO_12] = "UQ_MSC_RBC_PAD_TO_12", [UQ_MSC_READ_CAP_OFFBY1] = "UQ_MSC_READ_CAP_OFFBY1", [UQ_MSC_FORCE_SHORT_INQ] = "UQ_MSC_FORCE_SHORT_INQ", [UQ_MSC_FORCE_WIRE_BBB] = "UQ_MSC_FORCE_WIRE_BBB", [UQ_MSC_FORCE_WIRE_CBI] = "UQ_MSC_FORCE_WIRE_CBI", [UQ_MSC_FORCE_WIRE_CBI_I] = "UQ_MSC_FORCE_WIRE_CBI_I", [UQ_MSC_FORCE_PROTO_SCSI] = "UQ_MSC_FORCE_PROTO_SCSI", [UQ_MSC_FORCE_PROTO_ATAPI] = "UQ_MSC_FORCE_PROTO_ATAPI", [UQ_MSC_FORCE_PROTO_UFI] = "UQ_MSC_FORCE_PROTO_UFI", [UQ_MSC_FORCE_PROTO_RBC] = "UQ_MSC_FORCE_PROTO_RBC", [UQ_MSC_EJECT_HUAWEI] = "UQ_MSC_EJECT_HUAWEI", [UQ_MSC_EJECT_SIERRA] = "UQ_MSC_EJECT_SIERRA", [UQ_MSC_EJECT_SCSIEJECT] = "UQ_MSC_EJECT_SCSIEJECT", [UQ_MSC_EJECT_REZERO] = "UQ_MSC_EJECT_REZERO", [UQ_MSC_EJECT_ZTESTOR] = "UQ_MSC_EJECT_ZTESTOR", [UQ_MSC_EJECT_CMOTECH] = "UQ_MSC_EJECT_CMOTECH", [UQ_MSC_EJECT_WAIT] = "UQ_MSC_EJECT_WAIT", [UQ_MSC_EJECT_SAEL_M460] = "UQ_MSC_EJECT_SAEL_M460", [UQ_MSC_EJECT_HUAWEISCSI] = "UQ_MSC_EJECT_HUAWEISCSI", [UQ_MSC_EJECT_HUAWEISCSI2] = "UQ_MSC_EJECT_HUAWEISCSI2", [UQ_MSC_EJECT_TCT] = "UQ_MSC_EJECT_TCT", [UQ_BAD_MIDI] = "UQ_BAD_MIDI", [UQ_AU_VENDOR_CLASS] = "UQ_AU_VENDOR_CLASS", [UQ_SINGLE_CMD_MIDI] = "UQ_SINGLE_CMD_MIDI", [UQ_MSC_DYMO_EJECT] = "UQ_MSC_DYMO_EJECT", }; /*------------------------------------------------------------------------* * usb_quirkstr * * This function converts an USB quirk code into a string. *------------------------------------------------------------------------*/ static const char * usb_quirkstr(uint16_t quirk) { return ((quirk < USB_QUIRK_MAX && usb_quirk_str[quirk] != NULL) ? usb_quirk_str[quirk] : "UQ_UNKNOWN"); } /*------------------------------------------------------------------------* * usb_strquirk * * This function converts a string into a USB quirk code. * * Returns: * Less than USB_QUIRK_MAX: Quirk code * Else: Quirk code not found *------------------------------------------------------------------------*/ static uint16_t usb_strquirk(const char *str, size_t len) { const char *quirk; uint16_t x; for (x = 0; x != USB_QUIRK_MAX; x++) { quirk = usb_quirkstr(x); if (strncmp(str, quirk, len) == 0 && quirk[len] == 0) break; } return (x); } /*------------------------------------------------------------------------* * usb_test_quirk_by_info * * Returns: * 0: Quirk not found * Else: Quirk found *------------------------------------------------------------------------*/ static uint8_t usb_test_quirk_by_info(const struct usbd_lookup_info *info, uint16_t quirk) { uint16_t x; uint16_t y; if (quirk == UQ_NONE) goto done; mtx_lock(&usb_quirk_mtx); for (x = 0; x != USB_DEV_QUIRKS_MAX; x++) { /* see if quirk information does not match */ if ((usb_quirks[x].vid != info->idVendor) || (usb_quirks[x].lo_rev > info->bcdDevice) || (usb_quirks[x].hi_rev < info->bcdDevice)) { continue; } /* see if quirk only should match vendor ID */ if (usb_quirks[x].pid != info->idProduct) { if (usb_quirks[x].pid != 0) continue; for (y = 0; y != USB_SUB_QUIRKS_MAX; y++) { if (usb_quirks[x].quirks[y] == UQ_MATCH_VENDOR_ONLY) break; } if (y == USB_SUB_QUIRKS_MAX) continue; } /* lookup quirk */ for (y = 0; y != USB_SUB_QUIRKS_MAX; y++) { if (usb_quirks[x].quirks[y] == quirk) { mtx_unlock(&usb_quirk_mtx); DPRINTF("Found quirk '%s'.\n", usb_quirkstr(quirk)); return (1); } } } mtx_unlock(&usb_quirk_mtx); done: return (0); /* no quirk match */ } static struct usb_quirk_entry * usb_quirk_get_entry(uint16_t vid, uint16_t pid, uint16_t lo_rev, uint16_t hi_rev, uint8_t do_alloc) { uint16_t x; mtx_assert(&usb_quirk_mtx, MA_OWNED); if ((vid | pid | lo_rev | hi_rev) == 0) { /* all zero - special case */ return (usb_quirks + USB_DEV_QUIRKS_MAX - 1); } /* search for an existing entry */ for (x = 0; x != USB_DEV_QUIRKS_MAX; x++) { /* see if quirk information does not match */ if ((usb_quirks[x].vid != vid) || (usb_quirks[x].pid != pid) || (usb_quirks[x].lo_rev != lo_rev) || (usb_quirks[x].hi_rev != hi_rev)) { continue; } return (usb_quirks + x); } if (do_alloc == 0) { /* no match */ return (NULL); } /* search for a free entry */ for (x = 0; x != USB_DEV_QUIRKS_MAX; x++) { /* see if quirk information does not match */ if ((usb_quirks[x].vid | usb_quirks[x].pid | usb_quirks[x].lo_rev | usb_quirks[x].hi_rev) != 0) { continue; } usb_quirks[x].vid = vid; usb_quirks[x].pid = pid; usb_quirks[x].lo_rev = lo_rev; usb_quirks[x].hi_rev = hi_rev; return (usb_quirks + x); } /* no entry found */ return (NULL); } /*------------------------------------------------------------------------* * usb_quirk_ioctl - handle quirk IOCTLs * * Returns: * 0: Success * Else: Failure *------------------------------------------------------------------------*/ static int usb_quirk_ioctl(unsigned long cmd, caddr_t data, int fflag, struct thread *td) { struct usb_gen_quirk *pgq; struct usb_quirk_entry *pqe; uint32_t x; uint32_t y; int err; switch (cmd) { case USB_DEV_QUIRK_GET: pgq = (void *)data; x = pgq->index % USB_SUB_QUIRKS_MAX; y = pgq->index / USB_SUB_QUIRKS_MAX; if (y >= USB_DEV_QUIRKS_MAX) { return (EINVAL); } mtx_lock(&usb_quirk_mtx); /* copy out data */ pgq->vid = usb_quirks[y].vid; pgq->pid = usb_quirks[y].pid; pgq->bcdDeviceLow = usb_quirks[y].lo_rev; pgq->bcdDeviceHigh = usb_quirks[y].hi_rev; strlcpy(pgq->quirkname, usb_quirkstr(usb_quirks[y].quirks[x]), sizeof(pgq->quirkname)); mtx_unlock(&usb_quirk_mtx); return (0); /* success */ case USB_QUIRK_NAME_GET: pgq = (void *)data; x = pgq->index; if (x >= USB_QUIRK_MAX) { return (EINVAL); } strlcpy(pgq->quirkname, usb_quirkstr(x), sizeof(pgq->quirkname)); return (0); /* success */ case USB_DEV_QUIRK_ADD: pgq = (void *)data; /* check privileges */ err = priv_check(curthread, PRIV_DRIVER); if (err) { return (err); } /* convert quirk string into numerical */ for (y = 0; y != USB_DEV_QUIRKS_MAX; y++) { if (strcmp(pgq->quirkname, usb_quirkstr(y)) == 0) { break; } } if (y == USB_DEV_QUIRKS_MAX) { return (EINVAL); } if (y == UQ_NONE) { return (EINVAL); } mtx_lock(&usb_quirk_mtx); pqe = usb_quirk_get_entry(pgq->vid, pgq->pid, pgq->bcdDeviceLow, pgq->bcdDeviceHigh, 1); if (pqe == NULL) { mtx_unlock(&usb_quirk_mtx); return (EINVAL); } for (x = 0; x != USB_SUB_QUIRKS_MAX; x++) { if (pqe->quirks[x] == UQ_NONE) { pqe->quirks[x] = y; break; } } mtx_unlock(&usb_quirk_mtx); if (x == USB_SUB_QUIRKS_MAX) { return (ENOMEM); } return (0); /* success */ case USB_DEV_QUIRK_REMOVE: pgq = (void *)data; /* check privileges */ err = priv_check(curthread, PRIV_DRIVER); if (err) { return (err); } /* convert quirk string into numerical */ for (y = 0; y != USB_DEV_QUIRKS_MAX; y++) { if (strcmp(pgq->quirkname, usb_quirkstr(y)) == 0) { break; } } if (y == USB_DEV_QUIRKS_MAX) { return (EINVAL); } if (y == UQ_NONE) { return (EINVAL); } mtx_lock(&usb_quirk_mtx); pqe = usb_quirk_get_entry(pgq->vid, pgq->pid, pgq->bcdDeviceLow, pgq->bcdDeviceHigh, 0); if (pqe == NULL) { mtx_unlock(&usb_quirk_mtx); return (EINVAL); } for (x = 0; x != USB_SUB_QUIRKS_MAX; x++) { if (pqe->quirks[x] == y) { pqe->quirks[x] = UQ_NONE; break; } } if (x == USB_SUB_QUIRKS_MAX) { mtx_unlock(&usb_quirk_mtx); return (ENOMEM); } for (x = 0; x != USB_SUB_QUIRKS_MAX; x++) { if (pqe->quirks[x] != UQ_NONE) { break; } } if (x == USB_SUB_QUIRKS_MAX) { /* all quirk entries are unused - release */ memset(pqe, 0, sizeof(*pqe)); } mtx_unlock(&usb_quirk_mtx); return (0); /* success */ default: break; } return (ENOIOCTL); } /*------------------------------------------------------------------------* * usb_quirk_strtou16 * * Helper function to scan a 16-bit integer. *------------------------------------------------------------------------*/ static uint16_t usb_quirk_strtou16(const char **pptr, const char *name, const char *what) { unsigned long value; char *end; value = strtoul(*pptr, &end, 0); if (value > 65535 || *pptr == end || (*end != ' ' && *end != '\t')) { printf("%s: %s 16-bit %s value set to zero\n", name, what, *end == 0 ? "incomplete" : "invalid"); return (0); } *pptr = end + 1; return ((uint16_t)value); } /*------------------------------------------------------------------------* * usb_quirk_add_entry_from_str * * Add a USB quirk entry from string. * "VENDOR PRODUCT LO_REV HI_REV QUIRK[,QUIRK[,...]]" *------------------------------------------------------------------------*/ static void usb_quirk_add_entry_from_str(const char *name, const char *env) { struct usb_quirk_entry entry = { }; struct usb_quirk_entry *new; uint16_t quirk_idx; uint16_t quirk; const char *end; /* check for invalid environment variable */ if (name == NULL || env == NULL) return; if (bootverbose) printf("Adding USB QUIRK '%s' = '%s'\n", name, env); /* parse device information */ entry.vid = usb_quirk_strtou16(&env, name, "Vendor ID"); entry.pid = usb_quirk_strtou16(&env, name, "Product ID"); entry.lo_rev = usb_quirk_strtou16(&env, name, "Low revision"); entry.hi_rev = usb_quirk_strtou16(&env, name, "High revision"); /* parse quirk information */ quirk_idx = 0; while (*env != 0 && quirk_idx != USB_SUB_QUIRKS_MAX) { /* skip whitespace before quirks */ while (*env == ' ' || *env == '\t') env++; /* look for quirk separation character */ end = strchr(env, ','); if (end == NULL) end = env + strlen(env); /* lookup quirk in string table */ quirk = usb_strquirk(env, end - env); if (quirk < USB_QUIRK_MAX) { entry.quirks[quirk_idx++] = quirk; } else { printf("%s: unknown USB quirk '%.*s' (skipped)\n", name, (int)(end - env), env); } env = end; /* skip quirk delimiter, if any */ if (*env != 0) env++; } /* register quirk */ if (quirk_idx != 0) { if (*env != 0) { printf("%s: Too many USB quirks, only %d allowed!\n", name, USB_SUB_QUIRKS_MAX); } mtx_lock(&usb_quirk_mtx); new = usb_quirk_get_entry(entry.vid, entry.pid, entry.lo_rev, entry.hi_rev, 1); if (new == NULL) printf("%s: USB quirks table is full!\n", name); else memcpy(new->quirks, entry.quirks, sizeof(entry.quirks)); mtx_unlock(&usb_quirk_mtx); } else { printf("%s: No USB quirks found!\n", name); } } static void usb_quirk_init(void *arg) { char envkey[sizeof(USB_QUIRK_ENVROOT) + 2]; /* 2 digits max, 0 to 99 */ int i; /* initialize mutex */ mtx_init(&usb_quirk_mtx, "USB quirk", NULL, MTX_DEF); /* look for quirks defined by the environment variable */ for (i = 0; i != 100; i++) { snprintf(envkey, sizeof(envkey), USB_QUIRK_ENVROOT "%d", i); /* Stop at first undefined var */ if (!testenv(envkey)) break; /* parse environment variable */ usb_quirk_add_entry_from_str(envkey, kern_getenv(envkey)); } /* register our function */ usb_test_quirk_p = &usb_test_quirk_by_info; usb_quirk_ioctl_p = &usb_quirk_ioctl; } static void usb_quirk_uninit(void *arg) { usb_quirk_unload(arg); /* destroy mutex */ mtx_destroy(&usb_quirk_mtx); } SYSINIT(usb_quirk_init, SI_SUB_LOCK, SI_ORDER_FIRST, usb_quirk_init, NULL); SYSUNINIT(usb_quirk_uninit, SI_SUB_LOCK, SI_ORDER_ANY, usb_quirk_uninit, NULL); Index: projects/vnet/sys/dev/usb/usbdevs =================================================================== --- projects/vnet/sys/dev/usb/usbdevs (revision 302276) +++ projects/vnet/sys/dev/usb/usbdevs (revision 302277) @@ -1,4706 +1,4707 @@ $FreeBSD$ /* $NetBSD: usbdevs,v 1.392 2004/12/29 08:38:44 imp Exp $ */ /*- * Copyright (c) 1998-2004 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Lennart Augustsson (lennart@augustsson.net) at * Carlstedt Research & Technology. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * List of known USB vendors * * USB.org publishes a VID list of USB-IF member companies at * http://www.usb.org/developers/tools * Note that it does not show companies that have obtained a Vendor ID * without becoming full members. * * Please note that these IDs do not do anything. Adding an ID here and * regenerating the usbdevs.h and usbdevs_data.h only makes a symbolic name * available to the source code and does not change any functionality, nor * does it make your device available to a specific driver. * It will however make the descriptive string available if a device does not * provide the string itself. * * After adding a vendor ID VNDR and a product ID PRDCT you will have the * following extra defines: * #define USB_VENDOR_VNDR 0x???? * #define USB_PRODUCT_VNDR_PRDCT 0x???? * * You may have to add these defines to the respective probe routines to * make the device recognised by the appropriate device driver. */ vendor UNKNOWN1 0x0053 Unknown vendor vendor UNKNOWN2 0x0105 Unknown vendor vendor EGALAX2 0x0123 eGalax, Inc. vendor CHIPSBANK 0x0204 Chipsbank Microelectronics Co. vendor HUMAX 0x02ad HUMAX vendor LTS 0x0386 LTS vendor BWCT 0x03da Bernd Walter Computer Technology vendor AOX 0x03e8 AOX vendor THESYS 0x03e9 Thesys vendor DATABROADCAST 0x03ea Data Broadcasting vendor ATMEL 0x03eb Atmel vendor IWATSU 0x03ec Iwatsu America vendor MITSUMI 0x03ee Mitsumi vendor HP 0x03f0 Hewlett Packard vendor GENOA 0x03f1 Genoa vendor OAK 0x03f2 Oak vendor ADAPTEC 0x03f3 Adaptec vendor DIEBOLD 0x03f4 Diebold vendor SIEMENSELECTRO 0x03f5 Siemens Electromechanical vendor EPSONIMAGING 0x03f8 Epson Imaging vendor KEYTRONIC 0x03f9 KeyTronic vendor OPTI 0x03fb OPTi vendor ELITEGROUP 0x03fc Elitegroup vendor XILINX 0x03fd Xilinx vendor FARALLON 0x03fe Farallon Communications vendor NATIONAL 0x0400 National Semiconductor vendor NATIONALREG 0x0401 National Registry vendor ACERLABS 0x0402 Acer Labs vendor FTDI 0x0403 Future Technology Devices vendor NCR 0x0404 NCR vendor SYNOPSYS2 0x0405 Synopsys vendor FUJITSUICL 0x0406 Fujitsu-ICL vendor FUJITSU2 0x0407 Fujitsu Personal Systems vendor QUANTA 0x0408 Quanta vendor NEC 0x0409 NEC vendor KODAK 0x040a Eastman Kodak vendor WELTREND 0x040b Weltrend vendor VIA 0x040d VIA vendor MCCI 0x040e MCCI vendor MELCO 0x0411 Melco vendor LEADTEK 0x0413 Leadtek vendor WINBOND 0x0416 Winbond vendor PHOENIX 0x041a Phoenix vendor CREATIVE 0x041e Creative Labs vendor NOKIA 0x0421 Nokia vendor ADI 0x0422 ADI Systems vendor CATC 0x0423 Computer Access Technology vendor SMC2 0x0424 Standard Microsystems vendor MOTOROLA_HK 0x0425 Motorola HK vendor GRAVIS 0x0428 Advanced Gravis Computer vendor CIRRUSLOGIC 0x0429 Cirrus Logic vendor INNOVATIVE 0x042c Innovative Semiconductors vendor MOLEX 0x042f Molex vendor SUN 0x0430 Sun Microsystems vendor UNISYS 0x0432 Unisys vendor TAUGA 0x0436 Taugagreining HF vendor AMD 0x0438 Advanced Micro Devices vendor LEXMARK 0x043d Lexmark International vendor LG 0x043e LG Electronics vendor NANAO 0x0440 NANAO vendor GATEWAY 0x0443 Gateway 2000 vendor NMB 0x0446 NMB vendor ALPS 0x044e Alps Electric vendor THRUST 0x044f Thrustmaster vendor TI 0x0451 Texas Instruments vendor ANALOGDEVICES 0x0456 Analog Devices vendor SIS 0x0457 Silicon Integrated Systems Corp. vendor KYE 0x0458 KYE Systems vendor DIAMOND2 0x045a Diamond (Supra) vendor RENESAS 0x045b Renesas vendor MICROSOFT 0x045e Microsoft vendor PRIMAX 0x0461 Primax Electronics vendor MGE 0x0463 MGE UPS Systems vendor AMP 0x0464 AMP vendor CHERRY 0x046a Cherry Mikroschalter vendor MEGATRENDS 0x046b American Megatrends vendor LOGITECH 0x046d Logitech vendor BTC 0x046e Behavior Tech. Computer vendor PHILIPS 0x0471 Philips vendor SUN2 0x0472 Sun Microsystems (official) vendor SANYO 0x0474 Sanyo Electric vendor SEAGATE 0x0477 Seagate vendor CONNECTIX 0x0478 Connectix vendor SEMTECH 0x047a Semtech vendor KENSINGTON 0x047d Kensington vendor LUCENT 0x047e Lucent vendor PLANTRONICS 0x047f Plantronics vendor KYOCERA 0x0482 Kyocera Wireless Corp. vendor STMICRO 0x0483 STMicroelectronics vendor FOXCONN 0x0489 Foxconn vendor MEIZU 0x0492 Meizu Electronics vendor YAMAHA 0x0499 YAMAHA vendor COMPAQ 0x049f Compaq vendor HITACHI 0x04a4 Hitachi vendor ACERP 0x04a5 Acer Peripherals vendor DAVICOM 0x04a6 Davicom vendor VISIONEER 0x04a7 Visioneer vendor CANON 0x04a9 Canon vendor NIKON 0x04b0 Nikon vendor PAN 0x04b1 Pan International vendor IBM 0x04b3 IBM vendor CYPRESS 0x04b4 Cypress Semiconductor vendor ROHM 0x04b5 ROHM vendor COMPAL 0x04b7 Compal vendor EPSON 0x04b8 Seiko Epson vendor RAINBOW 0x04b9 Rainbow Technologies vendor IODATA 0x04bb I-O Data vendor TDK 0x04bf TDK vendor 3COMUSR 0x04c1 U.S. Robotics vendor METHODE 0x04c2 Methode Electronics Far East vendor MAXISWITCH 0x04c3 Maxi Switch vendor LOCKHEEDMER 0x04c4 Lockheed Martin Energy Research vendor FUJITSU 0x04c5 Fujitsu vendor TOSHIBAAM 0x04c6 Toshiba America vendor MICROMACRO 0x04c7 Micro Macro Technologies vendor KONICA 0x04c8 Konica vendor LITEON 0x04ca Lite-On Technology vendor FUJIPHOTO 0x04cb Fuji Photo Film vendor PHILIPSSEMI 0x04cc Philips Semiconductors vendor TATUNG 0x04cd Tatung Co. Of America vendor SCANLOGIC 0x04ce ScanLogic vendor MYSON 0x04cf Myson Technology vendor DIGI2 0x04d0 Digi vendor ITTCANON 0x04d1 ITT Canon vendor ALTEC 0x04d2 Altec Lansing vendor LSI 0x04d4 LSI vendor MENTORGRAPHICS 0x04d6 Mentor Graphics vendor ITUNERNET 0x04d8 I-Tuner Networks vendor HOLTEK 0x04d9 Holtek Semiconductor, Inc. vendor PANASONIC 0x04da Panasonic (Matsushita) vendor HUANHSIN 0x04dc Huan Hsin vendor SHARP 0x04dd Sharp vendor IIYAMA 0x04e1 Iiyama vendor SHUTTLE 0x04e6 Shuttle Technology vendor ELO 0x04e7 Elo TouchSystems vendor SAMSUNG 0x04e8 Samsung Electronics vendor NORTHSTAR 0x04eb Northstar vendor TOKYOELECTRON 0x04ec Tokyo Electron vendor ANNABOOKS 0x04ed Annabooks vendor JVC 0x04f1 JVC vendor CHICONY 0x04f2 Chicony Electronics vendor ELAN 0x04f3 Elan vendor NEWNEX 0x04f7 Newnex vendor BROTHER 0x04f9 Brother Industries vendor DALLAS 0x04fa Dallas Semiconductor vendor AIPTEK2 0x04fc AIPTEK International vendor PFU 0x04fe PFU vendor FUJIKURA 0x0501 Fujikura/DDK vendor ACER 0x0502 Acer vendor 3COM 0x0506 3Com vendor HOSIDEN 0x0507 Hosiden Corporation vendor AZTECH 0x0509 Aztech Systems vendor BELKIN 0x050d Belkin Components vendor KAWATSU 0x050f Kawatsu Semiconductor vendor FCI 0x0514 FCI vendor LONGWELL 0x0516 Longwell vendor COMPOSITE 0x0518 Composite vendor STAR 0x0519 Star Micronics vendor APC 0x051d American Power Conversion vendor SCIATLANTA 0x051e Scientific Atlanta vendor TSM 0x0520 TSM vendor CONNECTEK 0x0522 Advanced Connectek USA vendor NETCHIP 0x0525 NetChip Technology vendor ALTRA 0x0527 ALTRA vendor ATI 0x0528 ATI Technologies vendor AKS 0x0529 Aladdin Knowledge Systems vendor TEKOM 0x052b Tekom vendor CANONDEV 0x052c Canon vendor WACOMTECH 0x0531 Wacom vendor INVENTEC 0x0537 Inventec vendor SHYHSHIUN 0x0539 Shyh Shiun Terminals vendor PREHWERKE 0x053a Preh Werke Gmbh & Co. KG vendor SYNOPSYS 0x053f Synopsys vendor UNIACCESS 0x0540 Universal Access vendor VIEWSONIC 0x0543 ViewSonic vendor XIRLINK 0x0545 Xirlink vendor ANCHOR 0x0547 Anchor Chips vendor SONY 0x054c Sony vendor FUJIXEROX 0x0550 Fuji Xerox vendor VISION 0x0553 VLSI Vision vendor ASAHIKASEI 0x0556 Asahi Kasei Microsystems vendor ATEN 0x0557 ATEN International vendor SAMSUNG2 0x055d Samsung Electronics vendor MUSTEK 0x055f Mustek Systems vendor TELEX 0x0562 Telex Communications vendor CHINON 0x0564 Chinon vendor PERACOM 0x0565 Peracom Networks vendor ALCOR2 0x0566 Alcor Micro vendor XYRATEX 0x0567 Xyratex vendor WACOM 0x056a WACOM vendor ETEK 0x056c e-TEK Labs vendor EIZO 0x056d EIZO vendor ELECOM 0x056e Elecom vendor CONEXANT 0x0572 Conexant vendor HAUPPAUGE 0x0573 Hauppauge Computer Works vendor BAFO 0x0576 BAFO/Quality Computer Accessories vendor YEDATA 0x057b Y-E Data vendor AVM 0x057c AVM vendor QUICKSHOT 0x057f Quickshot vendor ROLAND 0x0582 Roland vendor ROCKFIRE 0x0583 Rockfire vendor RATOC 0x0584 RATOC Systems vendor ZYXEL 0x0586 ZyXEL Communication vendor INFINEON 0x058b Infineon vendor MICREL 0x058d Micrel vendor ALCOR 0x058f Alcor Micro vendor OMRON 0x0590 OMRON vendor ZORAN 0x0595 Zoran Microelectronics vendor NIIGATA 0x0598 Niigata vendor IOMEGA 0x059b Iomega vendor ATREND 0x059c A-Trend Technology vendor AID 0x059d Advanced Input Devices vendor LACIE 0x059f LaCie vendor FUJIFILM 0x05a2 Fuji Film vendor ARC 0x05a3 ARC vendor ORTEK 0x05a4 Ortek vendor CISCOLINKSYS3 0x05a6 Cisco-Linksys vendor BOSE 0x05a7 Bose vendor OMNIVISION 0x05a9 OmniVision vendor INSYSTEM 0x05ab In-System Design vendor APPLE 0x05ac Apple Computer vendor YCCABLE 0x05ad Y.C. Cable vendor DIGITALPERSONA 0x05ba DigitalPersona vendor 3G 0x05bc 3G Green Green Globe vendor RAFI 0x05bd RAFI vendor TYCO 0x05be Tyco vendor KAWASAKI 0x05c1 Kawasaki vendor DIGI 0x05c5 Digi International vendor QUALCOMM2 0x05c6 Qualcomm vendor QTRONIX 0x05c7 Qtronix vendor FOXLINK 0x05c8 Foxlink vendor RICOH 0x05ca Ricoh vendor ELSA 0x05cc ELSA vendor SCIWORX 0x05ce sci-worx vendor BRAINBOXES 0x05d1 Brainboxes Limited vendor ULTIMA 0x05d8 Ultima vendor AXIOHM 0x05d9 Axiohm Transaction Solutions vendor MICROTEK 0x05da Microtek vendor SUNTAC 0x05db SUN Corporation vendor LEXAR 0x05dc Lexar Media vendor ADDTRON 0x05dd Addtron vendor SYMBOL 0x05e0 Symbol Technologies vendor SYNTEK 0x05e1 Syntek vendor GENESYS 0x05e3 Genesys Logic vendor FUJI 0x05e5 Fuji Electric vendor KEITHLEY 0x05e6 Keithley Instruments vendor EIZONANAO 0x05e7 EIZO Nanao vendor KLSI 0x05e9 Kawasaki LSI vendor FFC 0x05eb FFC vendor ANKO 0x05ef Anko Electronic vendor PIENGINEERING 0x05f3 P.I. Engineering vendor AOC 0x05f6 AOC International vendor CHIC 0x05fe Chic Technology vendor BARCO 0x0600 Barco Display Systems vendor BRIDGE 0x0607 Bridge Information vendor SOLIDYEAR 0x060b Solid Year vendor BIORAD 0x0614 Bio-Rad Laboratories vendor MACALLY 0x0618 Macally vendor ACTLABS 0x061c Act Labs vendor ALARIS 0x0620 Alaris vendor APEX 0x0624 Apex vendor CREATIVE3 0x062a Creative Labs vendor MICRON 0x0634 Micron Technology vendor VIVITAR 0x0636 Vivitar vendor GUNZE 0x0637 Gunze Electronics USA vendor AVISION 0x0638 Avision vendor TEAC 0x0644 TEAC vendor ACTON 0x0647 Acton Research Corp. vendor OPTO 0x065a Optoelectronics Co., Ltd vendor SGI 0x065e Silicon Graphics vendor SANWASUPPLY 0x0663 Sanwa Supply vendor MEGATEC 0x0665 Megatec vendor LINKSYS 0x066b Linksys vendor ACERSA 0x066e Acer Semiconductor America vendor SIGMATEL 0x066f Sigmatel vendor DRAYTEK 0x0675 DrayTek vendor AIWA 0x0677 Aiwa vendor ACARD 0x0678 ACARD Technology vendor PROLIFIC 0x067b Prolific Technology vendor SIEMENS 0x067c Siemens vendor AVANCELOGIC 0x0680 Avance Logic vendor SIEMENS2 0x0681 Siemens vendor MINOLTA 0x0686 Minolta vendor CHPRODUCTS 0x068e CH Products vendor HAGIWARA 0x0693 Hagiwara Sys-Com vendor CTX 0x0698 Chuntex vendor ASKEY 0x069a Askey Computer vendor SAITEK 0x06a3 Saitek vendor ALCATELT 0x06b9 Alcatel Telecom vendor AGFA 0x06bd AGFA-Gevaert vendor ASIAMD 0x06be Asia Microelectronic Development vendor BIZLINK 0x06c4 Bizlink International vendor KEYSPAN 0x06cd Keyspan / InnoSys Inc. vendor CONTEC 0x06ce Contec products vendor AASHIMA 0x06d6 Aashima Technology vendor LIEBERT 0x06da Liebert vendor MULTITECH 0x06e0 MultiTech vendor ADS 0x06e1 ADS Technologies vendor ALCATELM 0x06e4 Alcatel Microelectronics vendor SIRIUS 0x06ea Sirius Technologies vendor GUILLEMOT 0x06f8 Guillemot vendor BOSTON 0x06fd Boston Acoustics vendor SMC 0x0707 Standard Microsystems vendor PUTERCOM 0x0708 Putercom vendor MCT 0x0711 MCT vendor IMATION 0x0718 Imation vendor TECLAST 0x071b Teclast vendor SONYERICSSON 0x0731 Sony Ericsson vendor EICON 0x0734 Eicon Networks vendor SYNTECH 0x0745 Syntech Information vendor DIGITALSTREAM 0x074e Digital Stream vendor AUREAL 0x0755 Aureal Semiconductor vendor MAUDIO 0x0763 M-Audio vendor CYBERPOWER 0x0764 Cyber Power Systems, Inc. vendor SURECOM 0x0769 Surecom Technology vendor HIDGLOBAL 0x076b HID Global vendor LINKSYS2 0x077b Linksys vendor GRIFFIN 0x077d Griffin Technology vendor SANDISK 0x0781 SanDisk vendor JENOPTIK 0x0784 Jenoptik vendor LOGITEC 0x0789 Logitec vendor NOKIA2 0x078b Nokia vendor BRIMAX 0x078e Brimax vendor AXIS 0x0792 Axis Communications vendor ABL 0x0794 ABL Electronics vendor SAGEM 0x079b Sagem vendor SUNCOMM 0x079c Sun Communications, Inc. vendor ALFADATA 0x079d Alfadata Computer vendor NATIONALTECH 0x07a2 National Technical Systems vendor ONNTO 0x07a3 Onnto vendor BE 0x07a4 Be vendor ADMTEK 0x07a6 ADMtek vendor COREGA 0x07aa Corega vendor FREECOM 0x07ab Freecom vendor MICROTECH 0x07af Microtech vendor GENERALINSTMNTS 0x07b2 General Instruments (Motorola) vendor OLYMPUS 0x07b4 Olympus vendor ABOCOM 0x07b8 AboCom Systems vendor KEISOKUGIKEN 0x07c1 Keisokugiken vendor ONSPEC 0x07c4 OnSpec vendor APG 0x07c5 APG Cash Drawer vendor BUG 0x07c8 B.U.G. vendor ALLIEDTELESYN 0x07c9 Allied Telesyn International vendor AVERMEDIA 0x07ca AVerMedia Technologies vendor SIIG 0x07cc SIIG vendor CASIO 0x07cf CASIO vendor DLINK2 0x07d1 D-Link vendor APTIO 0x07d2 Aptio Products vendor ARASAN 0x07da Arasan Chip Systems vendor ALLIEDCABLE 0x07e6 Allied Cable vendor STSN 0x07ef STSN vendor CENTURY 0x07f7 Century Corp vendor NEWLINK 0x07ff NEWlink vendor MAGTEK 0x0801 Mag-Tek vendor ZOOM 0x0803 Zoom Telephonics vendor PCS 0x0810 Personal Communication Systems vendor ALPHASMART 0x081e AlphaSmart, Inc. vendor BROADLOGIC 0x0827 BroadLogic vendor HANDSPRING 0x082d Handspring vendor PALM 0x0830 Palm Computing vendor SOURCENEXT 0x0833 SOURCENEXT vendor ACTIONSTAR 0x0835 Action Star Enterprise vendor SAMSUNG_TECHWIN 0x0839 Samsung Techwin vendor ACCTON 0x083a Accton Technology vendor DIAMOND 0x0841 Diamond vendor NETGEAR 0x0846 BayNETGEAR vendor TOPRE 0x0853 Topre Corporation vendor ACTIVEWIRE 0x0854 ActiveWire vendor BBELECTRONICS 0x0856 B&B Electronics vendor PORTGEAR 0x085a PortGear vendor NETGEAR2 0x0864 Netgear vendor SYSTEMTALKS 0x086e System Talks vendor METRICOM 0x0870 Metricom vendor ADESSOKBTEK 0x087c ADESSO/Kbtek America vendor JATON 0x087d Jaton vendor APT 0x0880 APT Technologies vendor BOCARESEARCH 0x0885 Boca Research vendor ANDREA 0x08a8 Andrea Electronics vendor BURRBROWN 0x08bb Burr-Brown Japan vendor 2WIRE 0x08c8 2Wire vendor AIPTEK 0x08ca AIPTEK International vendor SMARTBRIDGES 0x08d1 SmartBridges vendor FUJITSUSIEMENS 0x08d4 Fujitsu-Siemens vendor BILLIONTON 0x08dd Billionton Systems vendor GEMALTO 0x08e6 Gemalto SA vendor EXTENDED 0x08e9 Extended Systems vendor MSYSTEMS 0x08ec M-Systems vendor DIGIANSWER 0x08fd Digianswer vendor AUTHENTEC 0x08ff AuthenTec vendor AUDIOTECHNICA 0x0909 Audio-Technica vendor TRUMPION 0x090a Trumpion Microelectronics vendor FEIYA 0x090c Feiya vendor ALATION 0x0910 Alation Systems vendor GLOBESPAN 0x0915 Globespan vendor CONCORDCAMERA 0x0919 Concord Camera vendor GARMIN 0x091e Garmin International vendor GOHUBS 0x0921 GoHubs vendor DYMO 0x0922 DYMO vendor XEROX 0x0924 Xerox vendor BIOMETRIC 0x0929 American Biometric Company vendor TOSHIBA 0x0930 Toshiba vendor PLEXTOR 0x093b Plextor vendor INTREPIDCS 0x093c Intrepid vendor YANO 0x094f Yano vendor KINGSTON 0x0951 Kingston Technology vendor BLUEWATER 0x0956 BlueWater Systems vendor AGILENT 0x0957 Agilent Technologies vendor GUDE 0x0959 Gude ADS vendor PORTSMITH 0x095a Portsmith vendor ACERW 0x0967 Acer vendor ADIRONDACK 0x0976 Adirondack Wire & Cable vendor BECKHOFF 0x0978 Beckhoff vendor MINDSATWORK 0x097a Minds At Work vendor POINTCHIPS 0x09a6 PointChips vendor INTERSIL 0x09aa Intersil vendor ALTIUS 0x09b3 Altius Solutions vendor ARRIS 0x09c1 Arris Interactive vendor ACTIVCARD 0x09c3 ACTIVCARD vendor ACTISYS 0x09c4 ACTiSYS vendor NOVATEL2 0x09d7 Novatel Wireless vendor AFOURTECH 0x09da A-FOUR TECH vendor AIMEX 0x09dc AIMEX vendor ADDONICS 0x09df Addonics Technologies vendor AKAI 0x09e8 AKAI professional M.I. vendor ARESCOM 0x09f5 ARESCOM vendor BAY 0x09f9 Bay Associates vendor ALTERA 0x09fb Altera vendor CSR 0x0a12 Cambridge Silicon Radio vendor TREK 0x0a16 Trek Technology vendor ASAHIOPTICAL 0x0a17 Asahi Optical vendor BOCASYSTEMS 0x0a43 Boca Systems vendor SHANTOU 0x0a46 ShanTou vendor MEDIAGEAR 0x0a48 MediaGear vendor BROADCOM 0x0a5c Broadcom vendor GREENHOUSE 0x0a6b GREENHOUSE vendor MEDELI 0x0a67 Medeli vendor GEOCAST 0x0a79 Geocast Network Systems vendor EGO 0x0a92 EGO systems vendor IDQUANTIQUE 0x0aba ID Quantique vendor IDTECH 0x0acd ID TECH vendor ZYDAS 0x0ace Zydas Technology Corporation vendor NEODIO 0x0aec Neodio vendor OPTION 0x0af0 Option N.V. vendor ASUS 0x0b05 ASUSTeK Computer vendor TODOS 0x0b0c Todos Data System vendor SIIG2 0x0b39 SIIG vendor TEKRAM 0x0b3b Tekram Technology vendor HAL 0x0b41 HAL Corporation vendor EMS 0x0b43 EMS Production vendor NEC2 0x0b62 NEC vendor ADLINK 0x0b63 ADLINK Technoligy, Inc. vendor ATI2 0x0b6f ATI vendor ZEEVO 0x0b7a Zeevo, Inc. vendor KURUSUGAWA 0x0b7e Kurusugawa Electronics, Inc. vendor SMART 0x0b8c Smart Technologies vendor ASIX 0x0b95 ASIX Electronics vendor O2MICRO 0x0b97 O2 Micro, Inc. vendor USR 0x0baf U.S. Robotics vendor AMBIT 0x0bb2 Ambit Microsystems vendor HTC 0x0bb4 HTC vendor REALTEK 0x0bda Realtek vendor ERICSSON2 0x0bdb Ericsson vendor MEI 0x0bed MEI vendor ADDONICS2 0x0bf6 Addonics Technology vendor FSC 0x0bf8 Fujitsu Siemens Computers vendor AGATE 0x0c08 Agate Technologies vendor DMI 0x0c0b DMI vendor CANYON 0x0c10 Canyon vendor ICOM 0x0c26 Icom Inc. vendor GNOTOMETRICS 0x0c33 GN Otometrics vendor CHICONY2 0x0c45 Chicony / Microdia / Sonix Technology Co., Ltd. vendor REINERSCT 0x0c4b Reiner-SCT vendor SEALEVEL 0x0c52 Sealevel System vendor JETI 0x0c6c Jeti vendor LUWEN 0x0c76 Luwen vendor ELEKTOR 0x0c7d ELEKTOR Electronics vendor KYOCERA2 0x0c88 Kyocera Wireless Corp. vendor ZCOM 0x0cde Z-Com vendor ATHEROS2 0x0cf3 Atheros Communications vendor POSIFLEX 0x0d3a POSIFLEX vendor TANGTOP 0x0d3d Tangtop vendor KOBIL 0x0d46 KOBIL vendor SMC3 0x0d5c Standard Microsystems vendor ADDON 0x0d7d Add-on Technology vendor ACDC 0x0d7e American Computer & Digital Components vendor CMEDIA 0x0d8c CMEDIA vendor CONCEPTRONIC 0x0d8e Conceptronic vendor SKANHEX 0x0d96 Skanhex Technology, Inc. vendor MSI 0x0db0 Micro Star International vendor ELCON 0x0db7 ELCON Systemtechnik vendor UNKNOWN4 0x0dcd Unknown vendor vendor NETAC 0x0dd8 Netac vendor SITECOMEU 0x0df6 Sitecom Europe vendor MOBILEACTION 0x0df7 Mobile Action vendor AMIGO 0x0e0b Amigo Technology vendor SPEEDDRAGON 0x0e55 Speed Dragon Multimedia vendor HAWKING 0x0e66 Hawking vendor FOSSIL 0x0e67 Fossil, Inc vendor GMATE 0x0e7e G.Mate, Inc vendor MEDIATEK 0x0e8d MediaTek, Inc. vendor OTI 0x0ea0 Ours Technology vendor YISO 0x0eab Yiso Wireless Co. vendor PILOTECH 0x0eaf Pilotech vendor NOVATECH 0x0eb0 NovaTech vendor ITEGNO 0x0eba iTegno vendor WINMAXGROUP 0x0ed1 WinMaxGroup vendor TOD 0x0ede TOD vendor EGALAX 0x0eef eGalax, Inc. vendor AIRPRIME 0x0f3d AirPrime, Inc. vendor MICROTUNE 0x0f4d Microtune vendor VTECH 0x0f88 VTech vendor FALCOM 0x0f94 Falcom Wireless Communications GmbH vendor RIM 0x0fca Research In Motion vendor DYNASTREAM 0x0fcf Dynastream Innovations vendor LARSENBRUSGAARD 0x0fd8 Larsen and Brusgaard vendor OWL 0x0fde OWL vendor KONTRON 0x0fe6 Kontron AG vendor QUALCOMM 0x1004 Qualcomm vendor APACER 0x1005 Apacer vendor MOTOROLA4 0x100d Motorola vendor HP3 0x103c Hewlett Packard vendor AIRPLUS 0x1011 Airplus vendor DESKNOTE 0x1019 Desknote vendor NEC3 0x1033 NEC vendor TTI 0x103e Thurlby Thandar Instruments vendor GIGABYTE 0x1044 GIGABYTE vendor WESTERN 0x1058 Western Digital vendor MOTOROLA 0x1063 Motorola vendor CCYU 0x1065 CCYU Technology vendor CURITEL 0x106c Curitel Communications Inc vendor SILABS2 0x10a6 SILABS2 vendor USI 0x10ab USI vendor LIEBERT2 0x10af Liebert vendor PLX 0x10b5 PLX vendor ASANTE 0x10bd Asante vendor SILABS 0x10c4 Silicon Labs vendor SILABS3 0x10c5 Silicon Labs vendor SILABS4 0x10ce Silicon Labs vendor ACTIONS 0x10d6 Actions vendor ANALOG 0x1110 Analog Devices vendor TENX 0x1130 Ten X Technology, Inc. vendor ISSC 0x1131 Integrated System Solution Corp. vendor JRC 0x1145 Japan Radio Company vendor SPHAIRON 0x114b Sphairon Access Systems GmbH vendor DELORME 0x1163 DeLorme vendor SERVERWORKS 0x1166 ServerWorks vendor DLINK3 0x1186 Dlink vendor ACERCM 0x1189 Acer Communications & Multimedia vendor SIERRA 0x1199 Sierra Wireless vendor SANWA 0x11ad Sanwa Electric Instrument Co., Ltd. vendor TOPFIELD 0x11db Topfield Co., Ltd vendor SIEMENS3 0x11f5 Siemens vendor NETINDEX 0x11f6 NetIndex vendor ALCATEL 0x11f7 Alcatel vendor INTERBIOMETRICS 0x1209 Interbiometrics vendor UNKNOWN3 0x1233 Unknown vendor vendor TSUNAMI 0x1241 Tsunami vendor PHEENET 0x124a Pheenet vendor TARGUS 0x1267 Targus vendor TWINMOS 0x126f TwinMOS vendor TENDA 0x1286 Tenda vendor TESTO 0x128d Testo products vendor CREATIVE2 0x1292 Creative Labs vendor BELKIN2 0x1293 Belkin Components vendor CYBERTAN 0x129b CyberTAN Technology vendor HUAWEI 0x12d1 Huawei Technologies vendor ARANEUS 0x12d8 Araneus Information Systems vendor TAPWAVE 0x12ef Tapwave vendor AINCOMM 0x12fd Aincomm vendor MOBILITY 0x1342 Mobility vendor DICKSMITH 0x1371 Dick Smith Electronics vendor NETGEAR3 0x1385 Netgear vendor BALTECH 0x13ad Baltech vendor CISCOLINKSYS 0x13b1 Cisco-Linksys vendor SHARK 0x13d2 Shark vendor AZUREWAVE 0x13d3 AsureWave vendor INITIO 0x13fd Initio Corporation vendor EMTEC 0x13fe Emtec vendor NOVATEL 0x1410 Novatel Wireless vendor MERLIN 0x1416 Merlin vendor REDOCTANE 0x1430 RedOctane vendor WISTRONNEWEB 0x1435 Wistron NeWeb vendor RADIOSHACK 0x1453 Radio Shack vendor FIC 0x1457 FIC / OpenMoko vendor HUAWEI3COM 0x1472 Huawei-3Com vendor ABOCOM2 0x1482 AboCom Systems vendor SILICOM 0x1485 Silicom vendor RALINK 0x148f Ralink Technology vendor IMAGINATION 0x149a Imagination Technologies vendor ATP 0x14af ATP Electronics vendor CONCEPTRONIC2 0x14b2 Conceptronic vendor SUPERTOP 0x14cd Super Top vendor PLANEX3 0x14ea Planex Communications vendor SILICONPORTALS 0x1527 Silicon Portals vendor UBIQUAM 0x1529 UBIQUAM Co., Ltd. vendor JMICRON 0x152d JMicron vendor UBLOX 0x1546 U-blox vendor PNY 0x154b PNY vendor OWEN 0x1555 Owen vendor OQO 0x1557 OQO vendor UMEDIA 0x157e U-MEDIA Communications vendor FIBERLINE 0x1582 Fiberline vendor FREESCALE 0x15a2 Freescale Semiconductor, Inc. vendor AFATECH 0x15a4 Afatech Technologies, Inc. vendor SPARKLAN 0x15a9 SparkLAN vendor OLIMEX 0x15ba Olimex vendor SOUNDGRAPH 0x15c2 Soundgraph, Inc. vendor AMIT2 0x15c5 AMIT vendor TEXTECH 0x15ca Textech International Ltd. vendor SOHOWARE 0x15e8 SOHOware vendor UMAX 0x1606 UMAX Data Systems vendor INSIDEOUT 0x1608 Inside Out Networks vendor AMOI 0x1614 Amoi Electronics vendor GOODWAY 0x1631 Good Way Technology vendor ENTREGA 0x1645 Entrega vendor ACTIONTEC 0x1668 Actiontec Electronics vendor CLIPSAL 0x166a Clipsal vendor CISCOLINKSYS2 0x167b Cisco-Linksys vendor ATHEROS 0x168c Atheros Communications vendor GIGASET 0x1690 Gigaset vendor GLOBALSUN 0x16ab Global Sun Technology vendor ANYDATA 0x16d5 AnyDATA Corporation vendor JABLOTRON 0x16d6 Jablotron vendor CMOTECH 0x16d8 C-motech vendor WIENERPLEINBAUS 0x16dc WIENER Plein & Baus GmbH. vendor AXESSTEL 0x1726 Axesstel Co., Ltd. vendor LINKSYS4 0x1737 Linksys vendor SENAO 0x1740 Senao vendor ASUS2 0x1761 ASUS vendor SWEEX2 0x177f Sweex vendor METAGEEK 0x1781 MetaGeek vendor KAMSTRUP 0x17a8 Kamstrup A/S vendor DISPLAYLINK 0x17e9 DisplayLink vendor LENOVO 0x17ef Lenovo vendor WAVESENSE 0x17f4 WaveSense vendor VAISALA 0x1843 Vaisala vendor AMIT 0x18c5 AMIT vendor GOOGLE 0x18d1 Google vendor QCOM 0x18e8 Qcom vendor ELV 0x18ef ELV vendor LINKSYS3 0x1915 Linksys vendor QUALCOMMINC 0x19d2 Qualcomm, Incorporated vendor QUALCOMM3 0x19f5 Qualcomm, Inc. vendor BAYER 0x1a79 Bayer vendor WCH2 0x1a86 QinHeng Electronics vendor STELERA 0x1a8d Stelera Wireless vendor SEL 0x1adb Schweitzer Engineering Laboratories vendor CORSAIR 0x1b1c Corsair vendor MATRIXORBITAL 0x1b3d Matrix Orbital vendor OVISLINK 0x1b75 OvisLink vendor TML 0x1b91 The Mobility Lab vendor TCTMOBILE 0x1bbb TCT Mobile vendor ALTI2 0x1bc9 Alti-2 products vendor SUNPLUS 0x1bcf Sunplus Innovation Technology Inc. vendor WAGO 0x1be3 WAGO Kontakttechnik GmbH. vendor TELIT 0x1bc7 Telit vendor IONICS 0x1c0c Ionics PlugComputer vendor LONGCHEER 0x1c9e Longcheer Holdings, Ltd. vendor MPMAN 0x1cae MpMan vendor DRESDENELEKTRONIK 0x1cf1 dresden elektronik vendor NEOTEL 0x1d09 Neotel vendor DREAMLINK 0x1d34 Dream Link vendor PEGATRON 0x1d4d Pegatron vendor QISDA 0x1da5 Qisda vendor METAGEEK2 0x1dd5 MetaGeek vendor ALINK 0x1e0e Alink vendor AIRTIES 0x1eda AirTies vendor FESTO 0x1e29 Festo vendor LAKESHORE 0x1fb9 Lake Shore Cryotronics, Inc. vendor VERTEX 0x1fe7 Vertex Wireless Co., Ltd. vendor DLINK 0x2001 D-Link vendor PLANEX2 0x2019 Planex Communications vendor HAUPPAUGE2 0x2040 Hauppauge Computer Works vendor TLAYTECH 0x20b9 Tlay Tech vendor ENCORE 0x203d Encore vendor QIHARDWARE 0x20b7 QI-hardware vendor PARA 0x20b8 PARA Industrial vendor SIMTEC 0x20df Simtec Electronics vendor TRENDNET 0x20f4 TRENDnet vendor RTSYSTEMS 0x2100 RTSYSTEMS vendor VIALABS 0x2109 VIA Labs vendor ERICSSON 0x2282 Ericsson vendor MOTOROLA2 0x22b8 Motorola vendor WETELECOM 0x22de WeTelecom vendor WESTMOUNTAIN 0x2405 West Mountain Radio vendor TRIPPLITE 0x2478 Tripp-Lite vendor HIROSE 0x2631 Hirose Electric vendor NHJ 0x2770 NHJ vendor PLANEX 0x2c02 Planex Communications vendor VIDZMEDIA 0x3275 VidzMedia Pte Ltd vendor LINKINSTRUMENTS 0x3195 Link Instruments Inc. vendor AEI 0x3334 AEI vendor HANK 0x3353 Hank Connection vendor PQI 0x3538 PQI vendor DAISY 0x3579 Daisy Technology vendor NI 0x3923 National Instruments vendor MICRONET 0x3980 Micronet Communications vendor IODATA2 0x40bb I-O Data vendor IRIVER 0x4102 iRiver vendor DELL 0x413c Dell vendor WCH 0x4348 QinHeng Electronics vendor ACEECA 0x4766 Aceeca vendor FEIXUN 0x4855 FeiXun Communication vendor PAPOUCH 0x5050 Papouch products vendor AVERATEC 0x50c2 Averatec vendor SWEEX 0x5173 Sweex vendor PROLIFIC2 0x5372 Prolific Technologies vendor ONSPEC2 0x55aa OnSpec Electronic Inc. vendor ZINWELL 0x5a57 Zinwell vendor SITECOM 0x6189 Sitecom vendor ARKMICRO 0x6547 Arkmicro Technologies Inc. vendor 3COM2 0x6891 3Com vendor EDIMAX 0x7392 Edimax vendor INTEL 0x8086 Intel vendor INTEL2 0x8087 Intel vendor ALLWIN 0x8516 ALLWIN Tech vendor SITECOM2 0x9016 Sitecom vendor MOSCHIP 0x9710 MosChip Semiconductor vendor NETGEAR4 0x9846 Netgear vendor MARVELL 0x9e88 Marvell Technology Group Ltd. vendor 3COM3 0xa727 3Com vendor CACE 0xcace CACE Technologies vendor EVOLUTION 0xdeee Evolution Robotics products vendor DATAAPEX 0xdaae DataApex vendor HP2 0xf003 Hewlett Packard vendor LOGILINK 0xfc08 LogiLink vendor USRP 0xfffe GNU Radio USRP /* * List of known products. Grouped by vendor. */ /* 3Com products */ product 3COM HOMECONN 0x009d HomeConnect Camera product 3COM 3CREB96 0x00a0 Bluetooth USB Adapter product 3COM 3C19250 0x03e8 3C19250 Ethernet Adapter product 3COM 3CRSHEW696 0x0a01 3CRSHEW696 Wireless Adapter product 3COM 3C460 0x11f8 HomeConnect 3C460 product 3COM USR56K 0x3021 U.S.Robotics 56000 Voice FaxModem Pro product 3COM 3C460B 0x4601 HomeConnect 3C460B product 3COM2 3CRUSB10075 0xa727 3CRUSB10075 product 3COM3 AR5523_1 0x6893 AR5523 product 3COM3 AR5523_2 0x6895 AR5523 product 3COM3 AR5523_3 0x6897 AR5523 product 3COMUSR OFFICECONN 0x0082 3Com OfficeConnect Analog Modem product 3COMUSR USRISDN 0x008f 3Com U.S. Robotics Pro ISDN TA product 3COMUSR HOMECONN 0x009d 3Com HomeConnect Camera product 3COMUSR USR56K 0x3021 U.S. Robotics 56000 Voice FaxModem Pro /* AboCom products */ product ABOCOM XX1 0x110c XX1 product ABOCOM XX2 0x200c XX2 product ABOCOM RT2770 0x2770 RT2770 product ABOCOM RT2870 0x2870 RT2870 product ABOCOM RT3070 0x3070 RT3070 product ABOCOM RT3071 0x3071 RT3071 product ABOCOM RT3072 0x3072 RT3072 product ABOCOM2 RT2870_1 0x3c09 RT2870 product ABOCOM URE450 0x4000 URE450 Ethernet Adapter product ABOCOM UFE1000 0x4002 UFE1000 Fast Ethernet Adapter product ABOCOM DSB650TX_PNA 0x4003 1/10/100 Ethernet Adapter product ABOCOM XX4 0x4004 XX4 product ABOCOM XX5 0x4007 XX5 product ABOCOM XX6 0x400b XX6 product ABOCOM XX7 0x400c XX7 product ABOCOM RTL8151 0x401a RTL8151 product ABOCOM XX8 0x4102 XX8 product ABOCOM XX9 0x4104 XX9 product ABOCOM UF200 0x420a UF200 Ethernet product ABOCOM WL54 0x6001 WL54 product ABOCOM XX10 0xabc1 XX10 product ABOCOM BWU613 0xb000 BWU613 product ABOCOM HWU54DM 0xb21b HWU54DM product ABOCOM RT2573_2 0xb21c RT2573 product ABOCOM RT2573_3 0xb21d RT2573 product ABOCOM RT2573_4 0xb21e RT2573 product ABOCOM RTL8188CU_1 0x8188 RTL8188CU product ABOCOM RTL8188CU_2 0x8189 RTL8188CU product ABOCOM RTL8192CU 0x8178 RTL8192CU product ABOCOM RTL8188EU 0x8179 RTL8188EU product ABOCOM WUG2700 0xb21f WUG2700 /* Acton Research Corp. */ product ACTON SPECTRAPRO 0x0100 FTDI compatible adapter /* Accton products */ product ACCTON USB320_EC 0x1046 USB320-EC Ethernet Adapter product ACCTON 2664W 0x3501 2664W product ACCTON 111 0x3503 T-Sinus 111 Wireless Adapter product ACCTON SMCWUSBG_NF 0x4505 SMCWUSB-G (no firmware) product ACCTON SMCWUSBG 0x4506 SMCWUSB-G product ACCTON SMCWUSBTG2_NF 0x4507 SMCWUSBT-G2 (no firmware) product ACCTON SMCWUSBTG2 0x4508 SMCWUSBT-G2 product ACCTON PRISM_GT 0x4521 PrismGT USB 2.0 WLAN product ACCTON SS1001 0x5046 SpeedStream Ethernet Adapter product ACCTON RT2870_2 0x6618 RT2870 product ACCTON RT3070 0x7511 RT3070 product ACCTON RT2770 0x7512 RT2770 product ACCTON RT2870_3 0x7522 RT2870 product ACCTON RT2870_5 0x8522 RT2870 product ACCTON RT3070_4 0xa512 RT3070 product ACCTON RT2870_4 0xa618 RT2870 product ACCTON RT3070_1 0xa701 RT3070 product ACCTON RT3070_2 0xa702 RT3070 product ACCTON RT2870_1 0xb522 RT2870 product ACCTON RT3070_3 0xc522 RT3070 product ACCTON RT3070_5 0xd522 RT3070 product ACCTON RTL8192SU 0xc512 RTL8192SU product ACCTON ZD1211B 0xe501 ZD1211B product ACCTON WN7512 0xf522 WN7512 /* Aceeca products */ product ACEECA MEZ1000 0x0001 MEZ1000 RDA /* Acer Communications & Multimedia (oemd by Surecom) */ product ACERCM EP1427X2 0x0893 EP-1427X-2 Ethernet Adapter /* Acer Labs products */ product ACERLABS M5632 0x5632 USB 2.0 Data Link /* Acer Peripherals, Inc. products */ product ACERP ACERSCAN_C310U 0x12a6 Acerscan C310U product ACERP ACERSCAN_320U 0x2022 Acerscan 320U product ACERP ACERSCAN_640U 0x2040 Acerscan 640U product ACERP ACERSCAN_620U 0x2060 Acerscan 620U product ACERP ACERSCAN_4300U 0x20b0 Benq 3300U/4300U product ACERP ACERSCAN_640BT 0x20be Acerscan 640BT product ACERP ACERSCAN_1240U 0x20c0 Acerscan 1240U product ACERP S81 0x4027 BenQ S81 phone product ACERP H10 0x4068 AWL400 Wireless Adapter product ACERP ATAPI 0x6003 ATA/ATAPI Adapter product ACERP AWL300 0x9000 AWL300 Wireless Adapter product ACERP AWL400 0x9001 AWL400 Wireless Adapter /* Acer Warp products */ product ACERW WARPLINK 0x0204 Warplink /* Actions products */ product ACTIONS MP4 0x1101 Actions MP4 Player /* Actiontec, Inc. products */ product ACTIONTEC PRISM_25 0x0408 Prism2.5 Wireless Adapter product ACTIONTEC PRISM_25A 0x0421 Prism2.5 Wireless Adapter A product ACTIONTEC FREELAN 0x6106 ROPEX FreeLan 802.11b product ACTIONTEC UAT1 0x7605 UAT1 Wireless Ethernet Adapter /* ACTiSYS products */ product ACTISYS IR2000U 0x0011 ACT-IR2000U FIR /* ActiveWire, Inc. products */ product ACTIVEWIRE IOBOARD 0x0100 I/O Board product ACTIVEWIRE IOBOARD_FW1 0x0101 I/O Board, rev. 1 firmware /* Adaptec products */ product ADAPTEC AWN8020 0x0020 AWN-8020 WLAN /* Addtron products */ product ADDTRON AWU120 0xff31 AWU-120 /* ADLINK Texhnology products */ product ADLINK ND6530 0x6530 ND-6530 USB-Serial /* ADMtek products */ product ADMTEK PEGASUSII_4 0x07c2 AN986A Ethernet product ADMTEK PEGASUS 0x0986 AN986 Ethernet product ADMTEK PEGASUSII 0x8511 AN8511 Ethernet product ADMTEK PEGASUSII_2 0x8513 AN8513 Ethernet product ADMTEK PEGASUSII_3 0x8515 AN8515 Ethernet /* ADDON products */ /* PNY OEMs these */ product ADDON ATTACHE 0x1300 USB 2.0 Flash Drive product ADDON ATTACHE 0x1300 USB 2.0 Flash Drive product ADDON A256MB 0x1400 Attache 256MB USB 2.0 Flash Drive product ADDON DISKPRO512 0x1420 USB 2.0 Flash Drive (DANE-ELEC zMate 512MB USB flash drive) /* Addonics products */ product ADDONICS2 CABLE_205 0xa001 Cable 205 /* ADS products */ product ADS UBS10BT 0x0008 UBS-10BT Ethernet product ADS UBS10BTX 0x0009 UBS-10BT Ethernet /* AEI products */ product AEI FASTETHERNET 0x1701 Fast Ethernet /* Afatech Technologies, Inc. */ product AFATECH AFATECH1336 0x1336 Flash Card Reader /* Agate Technologies products */ product AGATE QDRIVE 0x0378 Q-Drive /* AGFA products */ product AGFA SNAPSCAN1212U 0x0001 SnapScan 1212U product AGFA SNAPSCAN1236U 0x0002 SnapScan 1236U product AGFA SNAPSCANTOUCH 0x0100 SnapScan Touch product AGFA SNAPSCAN1212U2 0x2061 SnapScan 1212U product AGFA SNAPSCANE40 0x208d SnapScan e40 product AGFA SNAPSCANE50 0x208f SnapScan e50 product AGFA SNAPSCANE20 0x2091 SnapScan e20 product AGFA SNAPSCANE25 0x2095 SnapScan e25 product AGFA SNAPSCANE26 0x2097 SnapScan e26 product AGFA SNAPSCANE52 0x20fd SnapScan e52 /* Ain Communication Technology products */ product AINCOMM AWU2000B 0x1001 AWU2000B Wireless Adapter /* AIPTEK products */ product AIPTEK POCKETCAM3M 0x2011 PocketCAM 3Mega product AIPTEK2 PENCAM_MEGA_1_3 0x504a PenCam Mega 1.3 product AIPTEK2 SUNPLUS_TECH 0x0c15 Sunplus Technology Inc. /* AirPlis products */ product AIRPLUS MCD650 0x3198 MCD650 modem /* AirPrime products */ product AIRPRIME PC5220 0x0112 CDMA Wireless PC Card product AIRPRIME USB308 0x68A3 USB308 HSPA+ USB Modem product AIRPRIME AC313U 0x68aa Sierra Wireless AirCard 313U /* AirTies products */ product AIRTIES RT3070 0x2310 RT3070 /* AKS products */ product AKS USBHASP 0x0001 USB-HASP 0.06 /* Alcatel products */ product ALCATEL OT535 0x02df One Touch 535/735 /* Alcor Micro, Inc. products */ product ALCOR2 KBD_HUB 0x2802 Kbd Hub product ALCOR DUMMY 0x0000 Dummy product product ALCOR SDCR_6335 0x6335 SD/MMC Card Reader product ALCOR SDCR_6362 0x6362 SD/MMC Card Reader product ALCOR SDCR_6366 0x6366 SD/MMC Card Reader product ALCOR TRANSCEND 0x6387 Transcend JetFlash Drive product ALCOR MA_KBD_HUB 0x9213 MacAlly Kbd Hub product ALCOR AU9814 0x9215 AU9814 Hub product ALCOR UMCR_9361 0x9361 USB Multimedia Card Reader product ALCOR SM_KBD 0x9410 MicroConnectors/StrongMan Keyboard product ALCOR NEC_KBD_HUB 0x9472 NEC Kbd Hub product ALCOR AU9720 0x9720 USB2 - RS-232 product ALCOR AU6390 0x6390 AU6390 USB-IDE converter /* Alink products */ product ALINK DWM652U5 0xce16 DWM-652 product ALINK 3G 0x9000 3G modem product ALINK 3GU 0x9200 3G modem /* Altec Lansing products */ product ALTEC ADA70 0x0070 ADA70 Speakers product ALTEC ASC495 0xff05 ASC495 Speakers /* Alti-2 products */ product ALTI2 N3 0x6001 FTDI compatible adapter /* Allied Telesyn International products */ product ALLIEDTELESYN ATUSB100 0xb100 AT-USB100 /* ALLWIN Tech products */ product ALLWIN RT2070 0x2070 RT2070 product ALLWIN RT2770 0x2770 RT2770 product ALLWIN RT2870 0x2870 RT2870 product ALLWIN RT3070 0x3070 RT3070 product ALLWIN RT3071 0x3071 RT3071 product ALLWIN RT3072 0x3072 RT3072 product ALLWIN RT3572 0x3572 RT3572 /* AlphaSmart, Inc. products */ product ALPHASMART DANA_KB 0xdbac AlphaSmart Dana Keyboard product ALPHASMART DANA_SYNC 0xdf00 AlphaSmart Dana HotSync /* Amoi products */ product AMOI H01 0x0800 H01 3G modem product AMOI H01A 0x7002 H01A 3G modem product AMOI H02 0x0802 H02 3G modem /* American Power Conversion products */ product APC UPS 0x0002 Uninterruptible Power Supply /* Ambit Microsystems products */ product AMBIT WLAN 0x0302 WLAN product AMBIT NTL_250 0x6098 NTL 250 cable modem /* Apacer products */ product APACER HT202 0xb113 USB 2.0 Flash Drive /* American Power Conversion products */ product APC UPS 0x0002 Uninterruptible Power Supply /* Amigo Technology products */ product AMIGO RT2870_1 0x9031 RT2870 product AMIGO RT2870_2 0x9041 RT2870 /* AMIT products */ product AMIT CGWLUSB2GO 0x0002 CG-WLUSB2GO product AMIT CGWLUSB2GNR 0x0008 CG-WLUSB2GNR product AMIT RT2870_1 0x0012 RT2870 /* AMIT(2) products */ product AMIT2 RT2870 0x0008 RT2870 /* Analog Devices products */ product ANALOGDEVICES GNICE 0xf000 FTDI compatible adapter product ANALOGDEVICES GNICEPLUS 0xf001 FTDI compatible adapter /* Anchor products */ product ANCHOR SERIAL 0x2008 Serial product ANCHOR EZUSB 0x2131 EZUSB product ANCHOR EZLINK 0x2720 EZLINK /* AnyData products */ product ANYDATA ADU_620UW 0x6202 CDMA 2000 EV-DO USB Modem product ANYDATA ADU_E100X 0x6501 CDMA 2000 1xRTT/EV-DO USB Modem product ANYDATA ADU_500A 0x6502 CDMA 2000 EV-DO USB Modem /* AOX, Inc. products */ product AOX USB101 0x0008 Ethernet /* American Power Conversion products */ product APC UPS 0x0002 Uninterruptible Power Supply /* Apple Computer products */ product APPLE DUMMY 0x0000 Dummy product product APPLE IMAC_KBD 0x0201 USB iMac Keyboard product APPLE KBD 0x0202 USB Keyboard M2452 product APPLE EXT_KBD 0x020c Apple Extended USB Keyboard /* MacbookAir, aka wellspring */ product APPLE WELLSPRING_ANSI 0x0223 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING_ISO 0x0224 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING_JIS 0x0225 Apple Internal Keyboard/Trackpad /* MacbookProPenryn, aka wellspring2 */ product APPLE WELLSPRING2_ANSI 0x0230 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING2_ISO 0x0231 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING2_JIS 0x0232 Apple Internal Keyboard/Trackpad /* Macbook5,1 (unibody), aka wellspring3 */ product APPLE WELLSPRING3_ANSI 0x0236 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING3_ISO 0x0237 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING3_JIS 0x0238 Apple Internal Keyboard/Trackpad /* MacbookAir3,2 (unibody), aka wellspring4 */ product APPLE WELLSPRING4_ANSI 0x023f Apple Internal Keyboard/Trackpad product APPLE WELLSPRING4_ISO 0x0240 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING4_JIS 0x0241 Apple Internal Keyboard/Trackpad /* MacbookAir3,1 (unibody), aka wellspring4 */ product APPLE WELLSPRING4A_ANSI 0x0242 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING4A_ISO 0x0243 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING4A_JIS 0x0244 Apple Internal Keyboard/Trackpad /* Macbook8 (unibody, March 2011) */ product APPLE WELLSPRING5_ANSI 0x0245 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING5_ISO 0x0246 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING5_JIS 0x0247 Apple Internal Keyboard/Trackpad /* MacbookAir4,1 (unibody, July 2011) */ product APPLE WELLSPRING6A_ANSI 0x0249 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING6A_ISO 0x024a Apple Internal Keyboard/Trackpad product APPLE WELLSPRING6A_JIS 0x024b Apple Internal Keyboard/Trackpad /* MacbookAir4,2 (unibody, July 2011) */ product APPLE WELLSPRING6_ANSI 0x024c Apple Internal Keyboard/Trackpad product APPLE WELLSPRING6_ISO 0x024d Apple Internal Keyboard/Trackpad product APPLE WELLSPRING6_JIS 0x024e Apple Internal Keyboard/Trackpad /* Macbook8,2 (unibody) */ product APPLE WELLSPRING5A_ANSI 0x0252 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING5A_ISO 0x0253 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING5A_JIS 0x0254 Apple Internal Keyboard/Trackpad /* MacbookPro10,1 (unibody, June 2012) */ product APPLE WELLSPRING7_ANSI 0x0262 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING7_ISO 0x0263 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING7_JIS 0x0264 Apple Internal Keyboard/Trackpad /* MacbookPro10,2 (unibody, October 2012) */ product APPLE WELLSPRING7A_ANSI 0x0259 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING7A_ISO 0x025a Apple Internal Keyboard/Trackpad product APPLE WELLSPRING7A_JIS 0x025b Apple Internal Keyboard/Trackpad /* MacbookAir6,2 (unibody, June 2013) */ product APPLE WELLSPRING8_ANSI 0x0290 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING8_ISO 0x0291 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING8_JIS 0x0292 Apple Internal Keyboard/Trackpad /* MacbookPro12,1 */ product APPLE WELLSPRING9_ANSI 0x0272 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING9_ISO 0x0273 Apple Internal Keyboard/Trackpad product APPLE WELLSPRING9_JIS 0x0274 Apple Internal Keyboard/Trackpad product APPLE MOUSE 0x0301 Mouse M4848 product APPLE OPTMOUSE 0x0302 Optical mouse product APPLE MIGHTYMOUSE 0x0304 Mighty Mouse product APPLE KBD_HUB 0x1001 Hub in Apple USB Keyboard product APPLE EXT_KBD_HUB 0x1003 Hub in Apple Extended USB Keyboard product APPLE SPEAKERS 0x1101 Speakers product APPLE IPOD 0x1201 iPod product APPLE IPOD2G 0x1202 iPod 2G product APPLE IPOD3G 0x1203 iPod 3G product APPLE IPOD_04 0x1204 iPod '04' product APPLE IPODMINI 0x1205 iPod Mini product APPLE IPOD_06 0x1206 iPod '06' product APPLE IPOD_07 0x1207 iPod '07' product APPLE IPOD_08 0x1208 iPod '08' product APPLE IPODVIDEO 0x1209 iPod Video product APPLE IPODNANO 0x120a iPod Nano product APPLE IPHONE 0x1290 iPhone product APPLE IPOD_TOUCH 0x1291 iPod Touch product APPLE IPHONE_3G 0x1292 iPhone 3G product APPLE IPHONE_3GS 0x1294 iPhone 3GS product APPLE IPHONE_4 0x1297 iPhone 4 product APPLE IPHONE_4S 0x12a0 iPhone 4S product APPLE IPHONE_5 0x12a8 iPhone 5 product APPLE IPAD 0x129a iPad product APPLE ETHERNET 0x1402 Ethernet A1277 /* Arkmicro Technologies */ product ARKMICRO ARK3116 0x0232 ARK3116 Serial /* Asahi Optical products */ product ASAHIOPTICAL OPTIO230 0x0004 Digital camera product ASAHIOPTICAL OPTIO330 0x0006 Digital camera /* Asante products */ product ASANTE EA 0x1427 Ethernet /* ASIX Electronics products */ product ASIX AX88172 0x1720 10/100 Ethernet product ASIX AX88178 0x1780 AX88178 product ASIX AX88178A 0x178a AX88178A USB 2.0 10/100/1000 Ethernet product ASIX AX88179 0x1790 AX88179 USB 3.0 10/100/1000 Ethernet product ASIX AX88772 0x7720 AX88772 product ASIX AX88772A 0x772a AX88772A USB 2.0 10/100 Ethernet product ASIX AX88772B 0x772b AX88772B USB 2.0 10/100 Ethernet product ASIX AX88772B_1 0x7e2b AX88772B USB 2.0 10/100 Ethernet /* ASUS products */ product ASUS2 USBN11 0x0b05 USB-N11 product ASUS RT2570 0x1706 RT2500USB Wireless Adapter product ASUS WL167G 0x1707 WL-167g Wireless Adapter product ASUS WL159G 0x170c WL-159g product ASUS A9T_WIFI 0x171b A9T wireless product ASUS P5B_WIFI 0x171d P5B wireless product ASUS RT2573_1 0x1723 RT2573 product ASUS RT2573_2 0x1724 RT2573 product ASUS LCM 0x1726 LCM display product ASUS RT2870_1 0x1731 RT2870 product ASUS RT2870_2 0x1732 RT2870 product ASUS RT2870_3 0x1742 RT2870 product ASUS RT2870_4 0x1760 RT2870 product ASUS RT2870_5 0x1761 RT2870 product ASUS USBN13 0x1784 USB-N13 product ASUS USBN10 0x1786 USB-N10 product ASUS RT3070_1 0x1790 RT3070 product ASUS RTL8192SU 0x1791 RTL8192SU product ASUS USB_N53 0x179d ASUS Black Diamond Dual Band USB-N53 product ASUS RTL8192CU 0x17ab RTL8192CU product ASUS USBN66 0x17ad USB-N66 product ASUS USBN10NANO 0x17ba USB-N10 Nano product ASUS USBAC51 0x17d1 USB-AC51 product ASUS A730W 0x4202 ASUS MyPal A730W product ASUS P535 0x420f ASUS P535 PDA product ASUS GMSC 0x422f ASUS Generic Mass Storage /* ATen products */ product ATEN UC1284 0x2001 Parallel printer product ATEN UC10T 0x2002 10Mbps Ethernet product ATEN UC110T 0x2007 UC-110T Ethernet product ATEN UC232A 0x2008 Serial product ATEN UC210T 0x2009 UC-210T Ethernet product ATEN DSB650C 0x4000 DSB-650C /* ATP Electronics products */ product ATP EUSB 0xaf01 ATP IG eUSB SSD /* Atheros Communications products */ product ATHEROS AR5523 0x0001 AR5523 product ATHEROS AR5523_NF 0x0002 AR5523 (no firmware) product ATHEROS2 AR5523_1 0x0001 AR5523 product ATHEROS2 AR5523_1_NF 0x0002 AR5523 (no firmware) product ATHEROS2 AR5523_2 0x0003 AR5523 product ATHEROS2 AR5523_2_NF 0x0004 AR5523 (no firmware) product ATHEROS2 AR5523_3 0x0005 AR5523 product ATHEROS2 AR5523_3_NF 0x0006 AR5523 (no firmware) product ATHEROS2 TG121N 0x1001 TG121N product ATHEROS2 WN821NV2 0x1002 WN821NV2 product ATHEROS2 3CRUSBN275 0x1010 3CRUSBN275 product ATHEROS2 WN612 0x1011 WN612 product ATHEROS2 AR9170 0x9170 AR9170 /* Atmel Comp. products */ product ATMEL STK541 0x2109 Zigbee Controller product ATMEL UHB124 0x3301 UHB124 hub product ATMEL DWL120 0x7603 DWL-120 Wireless Adapter product ATMEL BW002 0x7605 BW002 Wireless Adapter product ATMEL WL1130USB 0x7613 WL-1130 USB product ATMEL AT76C505A 0x7614 AT76c505a Wireless Adapter /* AuthenTec products */ product AUTHENTEC AES1610 0x1600 AES1610 Fingerprint Sensor /* Avision products */ product AVISION 1200U 0x0268 1200U scanner /* AVM products */ product AVM FRITZWLAN 0x8401 FRITZ!WLAN N /* Axesstel products */ product AXESSTEL DATAMODEM 0x1000 Data Modem /* AsureWave products */ product AZUREWAVE RT2870_1 0x3247 RT2870 product AZUREWAVE RT2870_2 0x3262 RT2870 product AZUREWAVE RT3070_1 0x3273 RT3070 product AZUREWAVE RT3070_2 0x3284 RT3070 product AZUREWAVE RT3070_3 0x3305 RT3070 product AZUREWAVE RTL8188CU 0x3357 RTL8188CU product AZUREWAVE RTL8188CE_1 0x3358 RTL8188CE product AZUREWAVE RTL8188CE_2 0x3359 RTL8188CE product AZUREWAVE RTL8192SU_1 0x3306 RTL8192SU product AZUREWAVE RTL8192SU_2 0x3309 RTL8192SU product AZUREWAVE RTL8192SU_3 0x3310 RTL8192SU product AZUREWAVE RTL8192SU_4 0x3311 RTL8192SU product AZUREWAVE RTL8192SU_5 0x3325 RTL8192SU /* Baltech products */ product BALTECH CARDREADER 0x9999 Card reader /* Bayer products */ product BAYER CONTOUR_CABLE 0x6001 FTDI compatible adapter /* B&B Electronics products */ product BBELECTRONICS USOTL4 0xAC01 RS-422/485 product BBELECTRONICS 232USB9M 0xac27 FTDI compatible adapter product BBELECTRONICS 485USB9F_2W 0xac25 FTDI compatible adapter product BBELECTRONICS 485USB9F_4W 0xac26 FTDI compatible adapter product BBELECTRONICS 485USBTB_2W 0xac33 FTDI compatible adapter product BBELECTRONICS 485USBTB_4W 0xac34 FTDI compatible adapter product BBELECTRONICS TTL3USB9M 0xac50 FTDI compatible adapter product BBELECTRONICS TTL5USB9M 0xac49 FTDI compatible adapter product BBELECTRONICS USO9ML2 0xac03 FTDI compatible adapter product BBELECTRONICS USO9ML2DR 0xac17 FTDI compatible adapter product BBELECTRONICS USO9ML2DR_2 0xac16 FTDI compatible adapter product BBELECTRONICS USOPTL4 0xac11 FTDI compatible adapter product BBELECTRONICS USOPTL4DR 0xac19 FTDI compatible adapter product BBELECTRONICS USOPTL4DR2 0xac18 FTDI compatible adapter product BBELECTRONICS USPTL4 0xac12 FTDI compatible adapter product BBELECTRONICS USTL4 0xac02 FTDI compatible adapter product BBELECTRONICS ZZ_PROG1_USB 0xba02 FTDI compatible adapter /* Belkin products */ /*product BELKIN F5U111 0x???? F5U111 Ethernet*/ product BELKIN F5D6050 0x0050 F5D6050 802.11b Wireless Adapter product BELKIN FBT001V 0x0081 FBT001v2 Bluetooth product BELKIN FBT003V 0x0084 FBT003v2 Bluetooth product BELKIN F5U103 0x0103 F5U103 Serial product BELKIN F5U109 0x0109 F5U109 Serial product BELKIN USB2SCSI 0x0115 USB to SCSI product BELKIN F8T012 0x0121 F8T012xx1 Bluetooth USB Adapter product BELKIN USB2LAN 0x0121 USB to LAN product BELKIN F5U208 0x0208 F5U208 VideoBus II product BELKIN F5U237 0x0237 F5U237 USB 2.0 7-Port Hub product BELKIN F5U257 0x0257 F5U257 Serial product BELKIN F5U409 0x0409 F5U409 Serial product BELKIN F6C550AVR 0x0551 F6C550-AVR UPS product BELKIN F5U120 0x1203 F5U120-PC Hub product BELKIN RTL8188CU 0x1102 RTL8188CU Wireless Adapter product BELKIN F9L1103 0x1103 F9L1103 Wireless Adapter product BELKIN RTL8192CU 0x2102 RTL8192CU Wireless Adapter product BELKIN F7D2102 0x2103 F7D2102 Wireless Adapter product BELKIN ZD1211B 0x4050 ZD1211B product BELKIN F5D5055 0x5055 F5D5055 product BELKIN F5D7050 0x7050 F5D7050 Wireless Adapter product BELKIN F5D7051 0x7051 F5D7051 54g USB Network Adapter product BELKIN F5D7050A 0x705a F5D7050A Wireless Adapter /* Also sold as 'Ativa 802.11g wireless card' */ product BELKIN F5D7050_V4000 0x705c F5D7050 v4000 Wireless Adapter product BELKIN F5D7050E 0x705e F5D7050E Wireless Adapter product BELKIN RT2870_1 0x8053 RT2870 product BELKIN RT2870_2 0x805c RT2870 product BELKIN F5D8053V3 0x815c F5D8053 v3 product BELKIN RTL8192SU_1 0x815f RTL8192SU product BELKIN RTL8192SU_2 0x845a RTL8192SU product BELKIN RTL8192SU_3 0x945a RTL8192SU product BELKIN F5D8055 0x825a F5D8055 product BELKIN F5D8055V2 0x825b F5D8055 v2 product BELKIN F5D9050V3 0x905b F5D9050 ver 3 Wireless Adapter product BELKIN2 F5U002 0x0002 F5U002 Parallel printer product BELKIN F6D4050V1 0x935a F6D4050 v1 product BELKIN F6D4050V2 0x935b F6D4050 v2 /* Billionton products */ product BILLIONTON USB100 0x0986 USB100N 10/100 FastEthernet product BILLIONTON USBLP100 0x0987 USB100LP product BILLIONTON USBEL100 0x0988 USB100EL product BILLIONTON USBE100 0x8511 USBE100 product BILLIONTON USB2AR 0x90ff USB2AR Ethernet /* Broadcom products */ product BROADCOM BCM2033 0x2033 BCM2033 Bluetooth USB dongle /* Brother Industries products */ product BROTHER HL1050 0x0002 HL-1050 laser printer product BROTHER MFC8600_9650 0x0100 MFC8600/9650 multifunction device /* Behavior Technology Computer products */ product BTC BTC6100 0x5550 6100C Keyboard product BTC BTC7932 0x6782 Keyboard with mouse port /* CACE Technologies products */ product CACE AIRPCAPNX 0x0300 AirPcap NX /* Canon, Inc. products */ product CANON N656U 0x2206 CanoScan N656U product CANON N1220U 0x2207 CanoScan N1220U product CANON D660U 0x2208 CanoScan D660U product CANON N676U 0x220d CanoScan N676U product CANON N1240U 0x220e CanoScan N1240U product CANON LIDE25 0x2220 CanoScan LIDE 25 product CANON S10 0x3041 PowerShot S10 product CANON S100 0x3045 PowerShot S100 product CANON S200 0x3065 PowerShot S200 product CANON REBELXT 0x30ef Digital Rebel XT /* CATC products */ product CATC NETMATE 0x000a Netmate Ethernet product CATC NETMATE2 0x000c Netmate2 Ethernet product CATC CHIEF 0x000d USB Chief Bus & Protocol Analyzer product CATC ANDROMEDA 0x1237 Andromeda hub /* CASIO products */ product CASIO QV_DIGICAM 0x1001 QV DigiCam product CASIO EXS880 0x1105 Exilim EX-S880 product CASIO BE300 0x2002 BE-300 PDA product CASIO NAMELAND 0x4001 CASIO Nameland EZ-USB /* CCYU products */ product CCYU ED1064 0x2136 EasyDisk ED1064 /* Century products */ product CENTURY EX35QUAT 0x011e Century USB Disk Enclosure product CENTURY EX35SW4_SB4 0x011f Century USB Disk Enclosure /* Cherry products */ product CHERRY MY3000KBD 0x0001 My3000 keyboard product CHERRY MY3000HUB 0x0003 My3000 hub product CHERRY CYBOARD 0x0004 CyBoard Keyboard /* Chic Technology products */ product CHIC MOUSE1 0x0001 mouse product CHIC CYPRESS 0x0003 Cypress USB Mouse /* Chicony products */ product CHICONY KB8933 0x0001 KB-8933 keyboard product CHICONY KU0325 0x0116 KU-0325 keyboard product CHICONY CNF7129 0xb071 Notebook Web Camera product CHICONY HDUVCCAM 0xb40a HD UVC WebCam product CHICONY RTL8188CUS_1 0xaff7 RTL8188CUS product CHICONY RTL8188CUS_2 0xaff8 RTL8188CUS product CHICONY RTL8188CUS_3 0xaff9 RTL8188CUS product CHICONY RTL8188CUS_4 0xaffa RTL8188CUS product CHICONY RTL8188CUS_5 0xaffa RTL8188CUS product CHICONY2 TWINKLECAM 0x600d TwinkleCam USB camera /* CH Products */ product CHPRODUCTS PROTHROTTLE 0x00f1 Pro Throttle product CHPRODUCTS PROPEDALS 0x00f2 Pro Pedals product CHPRODUCTS FIGHTERSTICK 0x00f3 Fighterstick product CHPRODUCTS FLIGHTYOKE 0x00ff Flight Sim Yoke /* Cisco-Linksys products */ product CISCOLINKSYS WUSB54AG 0x000c WUSB54AG Wireless Adapter product CISCOLINKSYS WUSB54G 0x000d WUSB54G Wireless Adapter product CISCOLINKSYS WUSB54GP 0x0011 WUSB54GP Wireless Adapter product CISCOLINKSYS USB200MV2 0x0018 USB200M v2 product CISCOLINKSYS HU200TS 0x001a HU200TS Wireless Adapter product CISCOLINKSYS WUSB54GC 0x0020 WUSB54GC product CISCOLINKSYS WUSB54GR 0x0023 WUSB54GR product CISCOLINKSYS WUSBF54G 0x0024 WUSBF54G product CISCOLINKSYS AE1000 0x002f AE1000 product CISCOLINKSYS USB3GIGV1 0x0041 USB3GIGV1 USB Ethernet Adapter product CISCOLINKSYS2 RT3070 0x4001 RT3070 product CISCOLINKSYS3 RT3070 0x0101 RT3070 /* Clipsal products */ product CLIPSAL 560884 0x0101 560884 C-Bus Audio Matrix Switch product CLIPSAL 5500PACA 0x0201 5500PACA C-Bus Pascal Automation Controller product CLIPSAL 5800PC 0x0301 5800PC C-Bus Wireless Interface product CLIPSAL 5500PCU 0x0303 5500PCU C-Bus Interface product CLIPSAL 5000CT2 0x0304 5000CT2 C-Bus Touch Screen product CLIPSAL C5000CT2 0x0305 C5000CT2 C-Bus Touch Screen product CLIPSAL L51xx 0x0401 L51xx C-Bus Dimmer /* CMOTECH products */ product CMOTECH CNU510 0x5141 CDMA Technologies USB modem product CMOTECH CNU550 0x5543 CDMA 2000 1xRTT/1xEVDO USB modem product CMOTECH CGU628 0x6006 CGU-628 product CMOTECH CDMA_MODEM1 0x6280 CDMA Technologies USB modem product CMOTECH DISK 0xf000 disk mode /* Compaq products */ product COMPAQ IPAQPOCKETPC 0x0003 iPAQ PocketPC product COMPAQ PJB100 0x504a Personal Jukebox PJB100 product COMPAQ IPAQLINUX 0x505a iPAQ Linux /* Composite Corp products looks the same as "TANGTOP" */ product COMPOSITE USBPS2 0x0001 USB to PS2 Adaptor /* Conceptronic products */ product CONCEPTRONIC PRISM_GT 0x3762 PrismGT USB 2.0 WLAN product CONCEPTRONIC C11U 0x7100 C11U product CONCEPTRONIC WL210 0x7110 WL-210 product CONCEPTRONIC AR5523_1 0x7801 AR5523 product CONCEPTRONIC AR5523_1_NF 0x7802 AR5523 (no firmware) product CONCEPTRONIC AR5523_2 0x7811 AR5523 product CONCEPTRONIC AR5523_2_NF 0x7812 AR5523 (no firmware) product CONCEPTRONIC2 RTL8192SU_1 0x3300 RTL8192SU product CONCEPTRONIC2 RTL8192SU_2 0x3301 RTL8192SU product CONCEPTRONIC2 RTL8192SU_3 0x3302 RTL8192SU product CONCEPTRONIC2 C54RU 0x3c02 C54RU WLAN product CONCEPTRONIC2 C54RU2 0x3c22 C54RU product CONCEPTRONIC2 RT3070_1 0x3c08 RT3070 product CONCEPTRONIC2 RT3070_2 0x3c11 RT3070 product CONCEPTRONIC2 VIGORN61 0x3c25 VIGORN61 product CONCEPTRONIC2 RT2870_1 0x3c06 RT2870 product CONCEPTRONIC2 RT2870_2 0x3c07 RT2870 product CONCEPTRONIC2 RT2870_7 0x3c09 RT2870 product CONCEPTRONIC2 RT2870_8 0x3c12 RT2870 product CONCEPTRONIC2 RT2870_3 0x3c23 RT2870 product CONCEPTRONIC2 RT2870_4 0x3c25 RT2870 product CONCEPTRONIC2 RT2870_5 0x3c27 RT2870 product CONCEPTRONIC2 RT2870_6 0x3c28 RT2870 /* Connectix products */ product CONNECTIX QUICKCAM 0x0001 QuickCam /* Conect products */ product CONTEC COM1USBH 0x8311 FTDI compatible adapter /* Corega products */ product COREGA ETHER_USB_T 0x0001 Ether USB-T product COREGA FETHER_USB_TX 0x0004 FEther USB-TX product COREGA WLAN_USB_USB_11 0x000c WirelessLAN USB-11 product COREGA FETHER_USB_TXS 0x000d FEther USB-TXS product COREGA WLANUSB 0x0012 Wireless LAN Stick-11 product COREGA FETHER_USB2_TX 0x0017 FEther USB2-TX product COREGA WLUSB_11_KEY 0x001a ULUSB-11 Key product COREGA CGUSBRS232R 0x002a CG-USBRS232R product COREGA CGWLUSB2GL 0x002d CG-WLUSB2GL product COREGA CGWLUSB2GPX 0x002e CG-WLUSB2GPX product COREGA RT2870_1 0x002f RT2870 product COREGA RT2870_2 0x003c RT2870 product COREGA RT2870_3 0x003f RT2870 product COREGA RT3070 0x0041 RT3070 product COREGA CGWLUSB300GNM 0x0042 CG-WLUSB300GNM product COREGA RTL8192SU 0x0047 RTL8192SU product COREGA RTL8192CU 0x0056 RTL8192CU product COREGA WLUSB_11_STICK 0x7613 WLAN USB Stick 11 product COREGA FETHER_USB_TXC 0x9601 FEther USB-TXC /* Corsair products */ product CORSAIR K60 0x0a60 Corsair Vengeance K60 keyboard product CORSAIR K70 0x1b09 Corsair Vengeance K70 keyboard product CORSAIR STRAFE 0x1b15 Cossair STRAFE Gaming keyboard /* Creative products */ product CREATIVE NOMAD_II 0x1002 Nomad II MP3 player product CREATIVE NOMAD_IIMG 0x4004 Nomad II MG product CREATIVE NOMAD 0x4106 Nomad product CREATIVE2 VOIP_BLASTER 0x0258 Voip Blaster product CREATIVE3 OPTICAL_MOUSE 0x0001 Notebook Optical Mouse /* Cambridge Silicon Radio Ltd. products */ product CSR BT_DONGLE 0x0001 Bluetooth USB dongle product CSR CSRDFU 0xffff USB Bluetooth Device in DFU State /* Chipsbank Microelectronics Co., Ltd */ product CHIPSBANK USBMEMSTICK 0x6025 CBM2080 Flash drive controller product CHIPSBANK USBMEMSTICK1 0x6026 CBM1180 Flash drive controller /* CTX products */ product CTX EX1300 0x9999 Ex1300 hub /* Curitel products */ product CURITEL HX550C 0x1101 CDMA 2000 1xRTT USB modem (HX-550C) product CURITEL HX57XB 0x2101 CDMA 2000 1xRTT USB modem (HX-570/575B/PR-600) product CURITEL PC5740 0x3701 Broadband Wireless modem product CURITEL UM150 0x3711 EVDO modem product CURITEL UM175 0x3714 EVDO modem /* CyberPower products */ product CYBERPOWER 1500CAVRLCD 0x0501 1500CAVRLCD /* CyberTAN Technology products */ product CYBERTAN TG54USB 0x1666 TG54USB product CYBERTAN RT2870 0x1828 RT2870 /* Cypress Semiconductor products */ product CYPRESS MOUSE 0x0001 mouse product CYPRESS THERMO 0x0002 thermometer product CYPRESS WISPY1A 0x0bad MetaGeek Wi-Spy product CYPRESS KBDHUB 0x0101 Keyboard/Hub product CYPRESS FMRADIO 0x1002 FM Radio product CYPRESS IKARILASER 0x121f Ikari Laser SteelSeries ApS product CYPRESS USBRS232 0x5500 USB-RS232 Interface product CYPRESS SLIM_HUB 0x6560 Slim Hub product CYPRESS XX6830XX 0x6830 PATA Storage Device product CYPRESS SILVERSHIELD 0xfd13 Gembird Silver Shield PM /* Daisy Technology products */ product DAISY DMC 0x6901 USB MultiMedia Reader /* Dallas Semiconductor products */ product DALLAS J6502 0x4201 J-6502 speakers /* DataApex products */ product DATAAPEX MULTICOM 0xead6 MultiCom /* Dell products */ product DELL PORT 0x0058 Port Replicator product DELL AIO926 0x5115 Photo AIO Printer 926 product DELL BC02 0x8000 BC02 Bluetooth USB Adapter product DELL PRISM_GT_1 0x8102 PrismGT USB 2.0 WLAN product DELL TM350 0x8103 TrueMobile 350 Bluetooth USB Adapter product DELL PRISM_GT_2 0x8104 PrismGT USB 2.0 WLAN product DELL U5700 0x8114 Dell 5700 3G product DELL U5500 0x8115 Dell 5500 3G product DELL U5505 0x8116 Dell 5505 3G product DELL U5700_2 0x8117 Dell 5700 3G product DELL U5510 0x8118 Dell 5510 3G product DELL U5700_3 0x8128 Dell 5700 3G product DELL U5700_4 0x8129 Dell 5700 3G product DELL U5720 0x8133 Dell 5720 3G product DELL U5720_2 0x8134 Dell 5720 3G product DELL U740 0x8135 Dell U740 CDMA product DELL U5520 0x8136 Dell 5520 3G product DELL U5520_2 0x8137 Dell 5520 3G product DELL U5520_3 0x8138 Dell 5520 3G product DELL U5730 0x8180 Dell 5730 3G product DELL U5730_2 0x8181 Dell 5730 3G product DELL U5730_3 0x8182 Dell 5730 3G product DELL DW700 0x9500 Dell DW700 GPS /* Delorme Paublishing products */ product DELORME EARTHMATE 0x0100 Earthmate GPS /* Desknote products */ product DESKNOTE UCR_61S2B 0x0c55 UCR-61S2B /* Diamond products */ product DIAMOND RIO500USB 0x0001 Rio 500 USB /* Dick Smith Electronics (really C-Net) products */ product DICKSMITH RT2573 0x9022 RT2573 product DICKSMITH CWD854F 0x9032 C-Net CWD-854 rev F /* Digi International products */ product DIGI ACCELEPORT2 0x0002 AccelePort USB 2 product DIGI ACCELEPORT4 0x0004 AccelePort USB 4 product DIGI ACCELEPORT8 0x0008 AccelePort USB 8 /* Digianswer A/S products */ product DIGIANSWER ZIGBEE802154 0x000a ZigBee/802.15.4 MAC /* D-Link products */ /*product DLINK DSBS25 0x0100 DSB-S25 serial*/ product DLINK DUBE100 0x1a00 10/100 Ethernet product DLINK DUBE100C1 0x1a02 DUB-E100 rev C1 product DLINK DSB650TX4 0x200c 10/100 Ethernet product DLINK DWL120E 0x3200 DWL-120 rev E product DLINK DWA125D1 0x330f DWA-125 rev D1 product DLINK DWA123D1 0x3310 DWA-123 rev D1 product DLINK DWL122 0x3700 DWL-122 product DLINK DWLG120 0x3701 DWL-G120 product DLINK DWL120F 0x3702 DWL-120 rev F product DLINK DWLAG132 0x3a00 DWL-AG132 product DLINK DWLAG132_NF 0x3a01 DWL-AG132 (no firmware) product DLINK DWLG132 0x3a02 DWL-G132 product DLINK DWLG132_NF 0x3a03 DWL-G132 (no firmware) product DLINK DWLAG122 0x3a04 DWL-AG122 product DLINK DWLAG122_NF 0x3a05 DWL-AG122 (no firmware) product DLINK DWLG122 0x3c00 DWL-G122 b1 Wireless Adapter product DLINK DUBE100B1 0x3c05 DUB-E100 rev B1 product DLINK RT2870 0x3c09 RT2870 product DLINK RT3072 0x3c0a RT3072 product DLINK DWA140B3 0x3c15 DWA-140 rev B3 product DLINK DWA160B2 0x3c1a DWA-160 rev B2 product DLINK DWA127 0x3c1b DWA-127 Wireless Adapter product DLINK DWA162 0x3c1f DWA-162 Wireless Adapter product DLINK DWA140D1 0x3c20 DWA-140 rev D1 product DLINK DSB650C 0x4000 10Mbps Ethernet product DLINK DSB650TX1 0x4001 10/100 Ethernet product DLINK DSB650TX 0x4002 10/100 Ethernet product DLINK DSB650TX_PNA 0x4003 1/10/100 Ethernet product DLINK DSB650TX3 0x400b 10/100 Ethernet product DLINK DSB650TX2 0x4102 10/100 Ethernet product DLINK DUB1312 0x4a00 10/100/1000 Ethernet product DLINK DSB650 0xabc1 10/100 Ethernet product DLINK DUBH7 0xf103 DUB-H7 USB 2.0 7-Port Hub product DLINK DWR510_CD 0xa805 DWR-510 CD-ROM Mode product DLINK DWR510 0x7e12 DWR-510 product DLINK DWM157 0x7d02 DWM-157 product DLINK DWM157_CD 0xa707 DWM-157 CD-ROM Mode product DLINK RTL8188CU 0x3308 RTL8188CU product DLINK RTL8192CU_1 0x3307 RTL8192CU product DLINK RTL8192CU_2 0x3309 RTL8192CU product DLINK RTL8192CU_3 0x330a RTL8192CU product DLINK DWA131B 0x330d DWA-131 rev B product DLINK2 RTL8192SU_1 0x3300 RTL8192SU product DLINK2 RTL8192SU_2 0x3302 RTL8192SU product DLINK2 DWA131A1 0x3303 DWA-131 A1 product DLINK2 DWA160A2 0x3a09 DWA-160 A2 product DLINK2 DWA120 0x3a0c DWA-120 product DLINK2 DWA120_NF 0x3a0d DWA-120 (no firmware) product DLINK2 DWA130D1 0x3a0f DWA-130 D1 product DLINK2 DWLG122C1 0x3c03 DWL-G122 c1 product DLINK2 WUA1340 0x3c04 WUA-1340 product DLINK2 DWA111 0x3c06 DWA-111 product DLINK2 RT2870_1 0x3c09 RT2870 product DLINK2 DWA110 0x3c07 DWA-110 product DLINK2 RT3072 0x3c0a RT3072 product DLINK2 RT3072_1 0x3c0b RT3072 product DLINK2 RT3070_1 0x3c0d RT3070 product DLINK2 RT3070_2 0x3c0e RT3070 product DLINK2 RT3070_3 0x3c0f RT3070 product DLINK2 DWA160A1 0x3c10 DWA-160 A1 product DLINK2 RT2870_2 0x3c11 RT2870 product DLINK2 DWA130 0x3c13 DWA-130 product DLINK2 RT3070_4 0x3c15 RT3070 product DLINK2 RT3070_5 0x3c16 RT3070 product DLINK3 DWM652 0x3e04 DWM-652 /* DisplayLink products */ product DISPLAYLINK LCD4300U 0x01ba LCD-4300U product DISPLAYLINK LCD8000U 0x01bb LCD-8000U product DISPLAYLINK LD220 0x0100 Samsung LD220 product DISPLAYLINK GUC2020 0x0059 IOGEAR DVI GUC2020 product DISPLAYLINK VCUD60 0x0136 Rextron DVI product DISPLAYLINK CONV 0x0138 StarTech CONV-USB2DVI product DISPLAYLINK DLDVI 0x0141 DisplayLink DVI product DISPLAYLINK VGA10 0x015a CMP-USBVGA10 product DISPLAYLINK WSDVI 0x0198 WS Tech DVI product DISPLAYLINK EC008 0x019b EasyCAP008 DVI product DISPLAYLINK HPDOCK 0x01d4 HP USB Docking product DISPLAYLINK NL571 0x01d7 HP USB DVI product DISPLAYLINK M01061 0x01e2 Lenovo DVI product DISPLAYLINK SWDVI 0x024c SUNWEIT DVI product DISPLAYLINK NBDOCK 0x0215 VideoHome NBdock1920 product DISPLAYLINK LUM70 0x02a9 Lilliput UM-70 product DISPLAYLINK UM7X0 0x401a nanovision MiMo product DISPLAYLINK LT1421 0x03e0 Lenovo ThinkVision LT1421 product DISPLAYLINK POLARIS2 0x0117 Polaris2 USB dock product DISPLAYLINK PLUGABLE 0x0377 Plugable docking station product DISPLAYLINK ITEC 0x02e9 i-tec USB 2.0 Docking Station /* DMI products */ product DMI CFSM_RW 0xa109 CF/SM Reader/Writer product DMI DISK 0x2bcf Generic Disk /* DrayTek products */ product DRAYTEK VIGOR550 0x0550 Vigor550 /* Dream Link products */ product DREAMLINK DL100B 0x0004 USB Webmail Notifier /* dresden elektronik products */ product DRESDENELEKTRONIK SENSORTERMINALBOARD 0x0001 SensorTerminalBoard product DRESDENELEKTRONIK WIRELESSHANDHELDTERMINAL 0x0004 Wireless Handheld Terminal product DRESDENELEKTRONIK DE_RFNODE 0x001c deRFnode product DRESDENELEKTRONIK LEVELSHIFTERSTICKLOWCOST 0x0022 Levelshifter Stick Low Cost /* DYMO */ product DYMO LABELMANAGERPNP 0x1001 DYMO LabelManager PnP /* Dynastream Innovations */ product DYNASTREAM ANTDEVBOARD 0x1003 ANT dev board product DYNASTREAM ANT2USB 0x1004 ANT2USB product DYNASTREAM ANTDEVBOARD2 0x1006 ANT dev board /* Edimax products */ product EDIMAX EW7318USG 0x7318 USB Wireless dongle product EDIMAX RTL8192SU_1 0x7611 RTL8192SU product EDIMAX RTL8192SU_2 0x7612 RTL8192SU product EDIMAX EW7622UMN 0x7622 EW-7622UMn product EDIMAX RT2870_1 0x7711 RT2870 product EDIMAX EW7717 0x7717 EW-7717 product EDIMAX EW7718 0x7718 EW-7718 product EDIMAX EW7733UND 0x7733 EW-7733UnD product EDIMAX EW7811UN 0x7811 EW-7811Un product EDIMAX RTL8192CU 0x7822 RTL8192CU /* eGalax Products */ product EGALAX TPANEL 0x0001 Touch Panel product EGALAX TPANEL2 0x0002 Touch Panel product EGALAX2 TPANEL 0x0001 Touch Panel /* EGO Products */ product EGO DUMMY 0x0000 Dummy Product product EGO M4U 0x1020 ESI M4U /* Eicon Networks */ product EICON DIVA852 0x4905 Diva 852 ISDN TA /* EIZO products */ product EIZO HUB 0x0000 hub product EIZO MONITOR 0x0001 monitor /* ELCON Systemtechnik products */ product ELCON PLAN 0x0002 Goldpfeil P-LAN /* Elecom products */ product ELECOM MOUSE29UO 0x0002 mouse 29UO product ELECOM LDUSBTX0 0x200c LD-USB/TX product ELECOM LDUSBTX1 0x4002 LD-USB/TX product ELECOM LDUSBLTX 0x4005 LD-USBL/TX product ELECOM WDC150SU2M 0x4008 WDC-150SU2M product ELECOM LDUSBTX2 0x400b LD-USB/TX product ELECOM LDUSB20 0x4010 LD-USB20 product ELECOM UCSGT 0x5003 UC-SGT product ELECOM UCSGT0 0x5004 UC-SGT product ELECOM LDUSBTX3 0xabc1 LD-USB/TX /* Elektor products */ product ELEKTOR FT323R 0x0005 FTDI compatible adapter /* Elsa products */ product ELSA MODEM1 0x2265 ELSA Modem Board product ELSA USB2ETHERNET 0x3000 Microlink USB2Ethernet /* ELV products */ product ELV USBI2C 0xe00f USB-I2C interface /* EMS products */ product EMS DUAL_SHOOTER 0x0003 PSX gun controller converter /* Emtec products */ product EMTEC RUF2PS 0x2240 Flash Drive /* Encore products */ product ENCORE RT3070_1 0x1480 RT3070 product ENCORE RT3070_2 0x14a1 RT3070 product ENCORE RT3070_3 0x14a9 RT3070 /* Entrega products */ product ENTREGA 1S 0x0001 1S serial product ENTREGA 2S 0x0002 2S serial product ENTREGA 1S25 0x0003 1S25 serial product ENTREGA 4S 0x0004 4S serial product ENTREGA E45 0x0005 E45 Ethernet product ENTREGA CENTRONICS 0x0006 Parallel Port product ENTREGA XX1 0x0008 Ethernet product ENTREGA 1S9 0x0093 1S9 serial product ENTREGA EZUSB 0x8000 EZ-USB /*product ENTREGA SERIAL 0x8001 DB25 Serial*/ product ENTREGA 2U4S 0x8004 2U4S serial/usb hub product ENTREGA XX2 0x8005 Ethernet /*product ENTREGA SERIAL_DB9 0x8093 DB9 Serial*/ /* Epson products */ product EPSON PRINTER1 0x0001 USB Printer product EPSON PRINTER2 0x0002 ISD USB Smart Cable for Mac product EPSON PRINTER3 0x0003 ISD USB Smart Cable product EPSON PRINTER5 0x0005 USB Printer product EPSON 636 0x0101 Perfection 636U / 636Photo scanner product EPSON 610 0x0103 Perfection 610 scanner product EPSON 1200 0x0104 Perfection 1200U / 1200Photo scanner product EPSON 1600 0x0107 Expression 1600 scanner product EPSON 1640 0x010a Perfection 1640SU scanner product EPSON 1240 0x010b Perfection 1240U / 1240Photo scanner product EPSON 640U 0x010c Perfection 640U scanner product EPSON 1250 0x010f Perfection 1250U / 1250Photo scanner product EPSON 1650 0x0110 Perfection 1650 scanner product EPSON GT9700F 0x0112 GT-9700F scanner product EPSON GT9300UF 0x011b GT-9300UF scanner product EPSON 3200 0x011c Perfection 3200 scanner product EPSON 1260 0x011d Perfection 1260 scanner product EPSON 1660 0x011e Perfection 1660 scanner product EPSON 1670 0x011f Perfection 1670 scanner product EPSON 1270 0x0120 Perfection 1270 scanner product EPSON 2480 0x0121 Perfection 2480 scanner product EPSON 3590 0x0122 Perfection 3590 scanner product EPSON 4990 0x012a Perfection 4990 Photo scanner product EPSON CRESSI_EDY 0x0521 Cressi Edy diving computer product EPSON N2ITION3 0x0522 Zeagle N2iTion3 diving computer product EPSON STYLUS_875DC 0x0601 Stylus Photo 875DC Card Reader product EPSON STYLUS_895 0x0602 Stylus Photo 895 Card Reader product EPSON CX5400 0x0808 CX5400 scanner product EPSON 3500 0x080e CX-3500/3600/3650 MFP product EPSON RX425 0x080f Stylus Photo RX425 scanner product EPSON DX3800 0x0818 CX3700/CX3800/DX38x0 MFP scanner product EPSON 4800 0x0819 CX4700/CX4800/DX48x0 MFP scanner product EPSON 4200 0x0820 CX4100/CX4200/DX4200 MFP scanner product EPSON 5000 0x082b CX4900/CX5000/DX50x0 MFP scanner product EPSON 6000 0x082e CX5900/CX6000/DX60x0 MFP scanner product EPSON DX4000 0x082f DX4000 MFP scanner product EPSON DX7400 0x0838 CX7300/CX7400/DX7400 MFP scanner product EPSON DX8400 0x0839 CX8300/CX8400/DX8400 MFP scanner product EPSON SX100 0x0841 SX100/NX100 MFP scanner product EPSON NX300 0x0848 NX300 MFP scanner product EPSON SX200 0x0849 SX200/SX205 MFP scanner product EPSON SX400 0x084a SX400/NX400/TX400 MFP scanner /* e-TEK Labs products */ product ETEK 1COM 0x8007 Serial /* Evolution products */ product EVOLUTION ER1 0x0300 FTDI compatible adapter product EVOLUTION HYBRID 0x0302 FTDI compatible adapter product EVOLUTION RCM4 0x0303 FTDI compatible adapter /* Extended Systems products */ product EXTENDED XTNDACCESS 0x0100 XTNDAccess IrDA /* Falcom products */ product FALCOM TWIST 0x0001 USB GSM/GPRS Modem product FALCOM SAMBA 0x0005 FTDI compatible adapter /* FEIYA products */ product FEIYA DUMMY 0x0000 Dummy product product FEIYA 5IN1 0x1132 5-in-1 Card Reader product FEIYA ELANGO 0x6200 MicroSDHC Card Reader product FEIYA AC110 0x6300 AC-110 Card Reader /* FeiXun Communication products */ product FEIXUN RTL8188CU 0x0090 RTL8188CU product FEIXUN RTL8192CU 0x0091 RTL8192CU /* Festo */ product FESTO CPX_USB 0x0102 CPX-USB product FESTO CMSP 0x0501 CMSP /* Fiberline */ product FIBERLINE WL430U 0x6003 WL-430U /* FIC / OpenMoko */ product FIC NEO1973_DEBUG 0x5118 FTDI compatible adapter /* Fossil, Inc products */ product FOSSIL WRISTPDA 0x0002 Wrist PDA /* Foxconn products */ product FOXCONN TCOM_TC_300 0xe000 T-Com TC 300 product FOXCONN PIRELLI_DP_L10 0xe003 Pirelli DP-L10 /* Freecom products */ product FREECOM DVD 0xfc01 DVD drive product FREECOM HDD 0xfc05 Classic SL Hard Drive /* Fujitsu Siemens Computers products */ product FSC E5400 0x1009 PrismGT USB 2.0 WLAN /* Future Technology Devices products */ product FTDI SCX8_USB_PHOENIX 0x5259 SCx8 USB Phoenix interface product FTDI SERIAL_8U100AX 0x8372 8U100AX Serial product FTDI SERIAL_8U232AM 0x6001 8U232AM Serial product FTDI SERIAL_8U232AM4 0x6004 8U232AM Serial product FTDI SERIAL_232RL 0x6006 FT232RL Serial product FTDI SERIAL_2232C 0x6010 FT2232C Dual port Serial product FTDI 232H 0x6014 FTDI compatible adapter product FTDI 232EX 0x6015 FTDI compatible adapter product FTDI SERIAL_2232D 0x9e90 FT2232D Dual port Serial product FTDI SERIAL_4232H 0x6011 FT4232H Quad port Serial product FTDI XDS100V2 0xa6d0 TI XDS100V1/V2 and early Beaglebones product FTDI XDS100V3 0xa6d1 TI XDS100V3 product FTDI KTLINK 0xbbe2 KT-LINK Embedded Hackers Multitool product FTDI TURTELIZER2 0xbdc8 egnite Turtelizer 2 JTAG/RS232 Adapter /* Gude Analog- und Digitalsysteme products also uses FTDI's id: */ product FTDI TACTRIX_OPENPORT_13M 0xcc48 OpenPort 1.3 Mitsubishi product FTDI TACTRIX_OPENPORT_13S 0xcc49 OpenPort 1.3 Subaru product FTDI TACTRIX_OPENPORT_13U 0xcc4a OpenPort 1.3 Universal product FTDI GAMMASCOUT 0xd678 Gamma-Scout product FTDI KBS 0xe6c8 Pyramid KBS USB LCD product FTDI EISCOU 0xe888 Expert ISDN Control USB product FTDI UOPTBR 0xe889 USB-RS232 OptoBridge product FTDI EMCU2D 0xe88a Expert mouseCLOCK USB II product FTDI PCMSFU 0xe88b Precision Clock MSF USB product FTDI EMCU2H 0xe88c Expert mouseCLOCK USB II HBG product FTDI MAXSTREAM 0xee18 Maxstream PKG-U product FTDI USB_UIRT 0xf850 USB-UIRT product FTDI USBSERIAL 0xfa00 Matrix Orbital USB Serial product FTDI MX2_3 0xfa01 Matrix Orbital MX2 or MX3 product FTDI MX4_5 0xfa02 Matrix Orbital MX4 or MX5 product FTDI LK202 0xfa03 Matrix Orbital VK/LK202 Family product FTDI LK204 0xfa04 Matrix Orbital VK/LK204 Family product FTDI CFA_632 0xfc08 Crystalfontz CFA-632 USB LCD product FTDI CFA_634 0xfc09 Crystalfontz CFA-634 USB LCD product FTDI CFA_633 0xfc0b Crystalfontz CFA-633 USB LCD product FTDI CFA_631 0xfc0c Crystalfontz CFA-631 USB LCD product FTDI CFA_635 0xfc0d Crystalfontz CFA-635 USB LCD product FTDI SEMC_DSS20 0xfc82 SEMC DSS-20 SyncStation /* Commerzielle und Technische Informationssysteme GmbH products */ product FTDI CTI_USB_NANO_485 0xf60b CTI USB-Nano 485 product FTDI CTI_USB_MINI_485 0xf608 CTI USB-Mini 485 /* Other products */ product FTDI 232RL 0xfbfa FTDI compatible adapter product FTDI 4N_GALAXY_DE_1 0xf3c0 FTDI compatible adapter product FTDI 4N_GALAXY_DE_2 0xf3c1 FTDI compatible adapter product FTDI 4N_GALAXY_DE_3 0xf3c2 FTDI compatible adapter product FTDI 8U232AM_ALT 0x6006 FTDI compatible adapter product FTDI ACCESSO 0xfad0 FTDI compatible adapter product FTDI ACG_HFDUAL 0xdd20 FTDI compatible adapter product FTDI ACTIVE_ROBOTS 0xe548 FTDI compatible adapter product FTDI ACTZWAVE 0xf2d0 FTDI compatible adapter product FTDI AMC232 0xff00 FTDI compatible adapter product FTDI ARTEMIS 0xdf28 FTDI compatible adapter product FTDI ASK_RDR400 0xc991 FTDI compatible adapter product FTDI ATIK_ATK16 0xdf30 FTDI compatible adapter product FTDI ATIK_ATK16C 0xdf32 FTDI compatible adapter product FTDI ATIK_ATK16HR 0xdf31 FTDI compatible adapter product FTDI ATIK_ATK16HRC 0xdf33 FTDI compatible adapter product FTDI ATIK_ATK16IC 0xdf35 FTDI compatible adapter product FTDI BCS_SE923 0xfb99 FTDI compatible adapter product FTDI CANDAPTER 0x9f80 FTDI compatible adapter product FTDI CANUSB 0xffa8 FTDI compatible adapter product FTDI CCSICDU20_0 0xf9d0 FTDI compatible adapter product FTDI CCSICDU40_1 0xf9d1 FTDI compatible adapter product FTDI CCSICDU64_4 0xf9d4 FTDI compatible adapter product FTDI CCSLOAD_N_GO_3 0xf9d3 FTDI compatible adapter product FTDI CCSMACHX_2 0xf9d2 FTDI compatible adapter product FTDI CCSPRIME8_5 0xf9d5 FTDI compatible adapter product FTDI CHAMSYS_24_MASTER_WING 0xdaf8 FTDI compatible adapter product FTDI CHAMSYS_MAXI_WING 0xdafd FTDI compatible adapter product FTDI CHAMSYS_MEDIA_WING 0xdafe FTDI compatible adapter product FTDI CHAMSYS_MIDI_TIMECODE 0xdafb FTDI compatible adapter product FTDI CHAMSYS_MINI_WING 0xdafc FTDI compatible adapter product FTDI CHAMSYS_PC_WING 0xdaf9 FTDI compatible adapter product FTDI CHAMSYS_USB_DMX 0xdafa FTDI compatible adapter product FTDI CHAMSYS_WING 0xdaff FTDI compatible adapter product FTDI COM4SM 0xd578 FTDI compatible adapter product FTDI CONVERTER_0 0xd388 FTDI compatible adapter product FTDI CONVERTER_1 0xd389 FTDI compatible adapter product FTDI CONVERTER_2 0xd38a FTDI compatible adapter product FTDI CONVERTER_3 0xd38b FTDI compatible adapter product FTDI CONVERTER_4 0xd38c FTDI compatible adapter product FTDI CONVERTER_5 0xd38d FTDI compatible adapter product FTDI CONVERTER_6 0xd38e FTDI compatible adapter product FTDI CONVERTER_7 0xd38f FTDI compatible adapter product FTDI DMX4ALL 0xc850 FTDI compatible adapter product FTDI DOMINTELL_DGQG 0xef50 FTDI compatible adapter product FTDI DOMINTELL_DUSB 0xef51 FTDI compatible adapter product FTDI DOTEC 0x9868 FTDI compatible adapter product FTDI ECLO_COM_1WIRE 0xea90 FTDI compatible adapter product FTDI ECO_PRO_CDS 0xe520 FTDI compatible adapter product FTDI ELSTER_UNICOM 0xe700 FTDI compatible adapter product FTDI ELV_ALC8500 0xf06e FTDI compatible adapter product FTDI ELV_CLI7000 0xfb59 FTDI compatible adapter product FTDI ELV_CSI8 0xe0f0 FTDI compatible adapter product FTDI ELV_EC3000 0xe006 FTDI compatible adapter product FTDI ELV_EM1000DL 0xe0f1 FTDI compatible adapter product FTDI ELV_EM1010PC 0xe0ef FTDI compatible adapter product FTDI ELV_FEM 0xe00a FTDI compatible adapter product FTDI ELV_FHZ1000PC 0xf06f FTDI compatible adapter product FTDI ELV_FHZ1300PC 0xe0e8 FTDI compatible adapter product FTDI ELV_FM3RX 0xe0ed FTDI compatible adapter product FTDI ELV_FS20SIG 0xe0f4 FTDI compatible adapter product FTDI ELV_HS485 0xe0ea FTDI compatible adapter product FTDI ELV_KL100 0xe002 FTDI compatible adapter product FTDI ELV_MSM1 0xe001 FTDI compatible adapter product FTDI ELV_PCD200 0xf06c FTDI compatible adapter product FTDI ELV_PCK100 0xe0f2 FTDI compatible adapter product FTDI ELV_PPS7330 0xfb5c FTDI compatible adapter product FTDI ELV_RFP500 0xe0f3 FTDI compatible adapter product FTDI ELV_T1100 0xf06b FTDI compatible adapter product FTDI ELV_TFD128 0xe0ec FTDI compatible adapter product FTDI ELV_TFM100 0xfb5d FTDI compatible adapter product FTDI ELV_TWS550 0xe009 FTDI compatible adapter product FTDI ELV_UAD8 0xf068 FTDI compatible adapter product FTDI ELV_UDA7 0xf069 FTDI compatible adapter product FTDI ELV_UDF77 0xfb5e FTDI compatible adapter product FTDI ELV_UIO88 0xfb5f FTDI compatible adapter product FTDI ELV_ULA200 0xf06d FTDI compatible adapter product FTDI ELV_UM100 0xfb5a FTDI compatible adapter product FTDI ELV_UMS100 0xe0eb FTDI compatible adapter product FTDI ELV_UO100 0xfb5b FTDI compatible adapter product FTDI ELV_UR100 0xfb58 FTDI compatible adapter product FTDI ELV_USI2 0xf06a FTDI compatible adapter product FTDI ELV_USR 0xe000 FTDI compatible adapter product FTDI ELV_UTP8 0xe0f5 FTDI compatible adapter product FTDI ELV_WS300PC 0xe0f6 FTDI compatible adapter product FTDI ELV_WS444PC 0xe0f7 FTDI compatible adapter product FTDI ELV_WS500 0xe0e9 FTDI compatible adapter product FTDI ELV_WS550 0xe004 FTDI compatible adapter product FTDI ELV_WS777 0xe0ee FTDI compatible adapter product FTDI ELV_WS888 0xe008 FTDI compatible adapter product FTDI FUTURE_0 0xf44a FTDI compatible adapter product FTDI FUTURE_1 0xf44b FTDI compatible adapter product FTDI FUTURE_2 0xf44c FTDI compatible adapter product FTDI GENERIC 0x9378 FTDI compatible adapter product FTDI GUDEADS_E808 0xe808 FTDI compatible adapter product FTDI GUDEADS_E809 0xe809 FTDI compatible adapter product FTDI GUDEADS_E80A 0xe80a FTDI compatible adapter product FTDI GUDEADS_E80B 0xe80b FTDI compatible adapter product FTDI GUDEADS_E80C 0xe80c FTDI compatible adapter product FTDI GUDEADS_E80D 0xe80d FTDI compatible adapter product FTDI GUDEADS_E80E 0xe80e FTDI compatible adapter product FTDI GUDEADS_E80F 0xe80f FTDI compatible adapter product FTDI GUDEADS_E88D 0xe88d FTDI compatible adapter product FTDI GUDEADS_E88E 0xe88e FTDI compatible adapter product FTDI GUDEADS_E88F 0xe88f FTDI compatible adapter product FTDI HD_RADIO 0x937c FTDI compatible adapter product FTDI HO720 0xed72 FTDI compatible adapter product FTDI HO730 0xed73 FTDI compatible adapter product FTDI HO820 0xed74 FTDI compatible adapter product FTDI HO870 0xed71 FTDI compatible adapter product FTDI IBS_APP70 0xff3d FTDI compatible adapter product FTDI IBS_PCMCIA 0xff3a FTDI compatible adapter product FTDI IBS_PEDO 0xff3e FTDI compatible adapter product FTDI IBS_PICPRO 0xff39 FTDI compatible adapter product FTDI IBS_PK1 0xff3b FTDI compatible adapter product FTDI IBS_PROD 0xff3f FTDI compatible adapter product FTDI IBS_RS232MON 0xff3c FTDI compatible adapter product FTDI IBS_US485 0xff38 FTDI compatible adapter product FTDI IPLUS 0xd070 FTDI compatible adapter product FTDI IPLUS2 0xd071 FTDI compatible adapter product FTDI IRTRANS 0xfc60 FTDI compatible adapter product FTDI LENZ_LIUSB 0xd780 FTDI compatible adapter product FTDI LM3S_DEVEL_BOARD 0xbcd8 FTDI compatible adapter product FTDI LM3S_EVAL_BOARD 0xbcd9 FTDI compatible adapter product FTDI LM3S_ICDI_B_BOARD 0xbcda FTDI compatible adapter product FTDI MASTERDEVEL2 0xf449 FTDI compatible adapter product FTDI MHAM_DB9 0xeeed FTDI compatible adapter product FTDI MHAM_IC 0xeeec FTDI compatible adapter product FTDI MHAM_KW 0xeee8 FTDI compatible adapter product FTDI MHAM_RS232 0xeeee FTDI compatible adapter product FTDI MHAM_Y6 0xeeea FTDI compatible adapter product FTDI MHAM_Y8 0xeeeb FTDI compatible adapter product FTDI MHAM_Y9 0xeeef FTDI compatible adapter product FTDI MHAM_YS 0xeee9 FTDI compatible adapter product FTDI MICRO_CHAMELEON 0xcaa0 FTDI compatible adapter product FTDI MTXORB_5 0xfa05 FTDI compatible adapter product FTDI MTXORB_6 0xfa06 FTDI compatible adapter product FTDI NXTCAM 0xabb8 FTDI compatible adapter product FTDI OCEANIC 0xf460 FTDI compatible adapter product FTDI OOCDLINK 0xbaf8 FTDI compatible adapter product FTDI OPENDCC 0xbfd8 FTDI compatible adapter product FTDI OPENDCC_GATEWAY 0xbfdb FTDI compatible adapter product FTDI OPENDCC_GBM 0xbfdc FTDI compatible adapter product FTDI OPENDCC_SNIFFER 0xbfd9 FTDI compatible adapter product FTDI OPENDCC_THROTTLE 0xbfda FTDI compatible adapter product FTDI PCDJ_DAC2 0xfa88 FTDI compatible adapter product FTDI PERLE_ULTRAPORT 0xf0c0 FTDI compatible adapter product FTDI PHI_FISCO 0xe40b FTDI compatible adapter product FTDI PIEGROUP 0xf208 FTDI compatible adapter product FTDI PROPOX_JTAGCABLEII 0xd738 FTDI compatible adapter product FTDI R2000KU_TRUE_RNG 0xfb80 FTDI compatible adapter product FTDI R2X0 0xfc71 FTDI compatible adapter product FTDI RELAIS 0xfa10 FTDI compatible adapter product FTDI REU_TINY 0xed22 FTDI compatible adapter product FTDI RMP200 0xe729 FTDI compatible adapter product FTDI RM_CANVIEW 0xfd60 FTDI compatible adapter product FTDI RRCIRKITS_LOCOBUFFER 0xc7d0 FTDI compatible adapter product FTDI SCIENCESCOPE_HS_LOGBOOK 0xff1d FTDI compatible adapter product FTDI SCIENCESCOPE_LOGBOOKML 0xff18 FTDI compatible adapter product FTDI SCIENCESCOPE_LS_LOGBOOK 0xff1c FTDI compatible adapter product FTDI SCS_DEVICE_0 0xd010 FTDI compatible adapter product FTDI SCS_DEVICE_1 0xd011 FTDI compatible adapter product FTDI SCS_DEVICE_2 0xd012 FTDI compatible adapter product FTDI SCS_DEVICE_3 0xd013 FTDI compatible adapter product FTDI SCS_DEVICE_4 0xd014 FTDI compatible adapter product FTDI SCS_DEVICE_5 0xd015 FTDI compatible adapter product FTDI SCS_DEVICE_6 0xd016 FTDI compatible adapter product FTDI SCS_DEVICE_7 0xd017 FTDI compatible adapter product FTDI SDMUSBQSS 0xf448 FTDI compatible adapter product FTDI SIGNALYZER_SH2 0xbca2 FTDI compatible adapter product FTDI SIGNALYZER_SH4 0xbca4 FTDI compatible adapter product FTDI SIGNALYZER_SLITE 0xbca1 FTDI compatible adapter product FTDI SIGNALYZER_ST 0xbca0 FTDI compatible adapter product FTDI SPECIAL_1 0xfc70 FTDI compatible adapter product FTDI SPECIAL_3 0xfc72 FTDI compatible adapter product FTDI SPECIAL_4 0xfc73 FTDI compatible adapter product FTDI SPROG_II 0xf0c8 FTDI compatible adapter product FTDI SR_RADIO 0x9379 FTDI compatible adapter product FTDI SUUNTO_SPORTS 0xf680 FTDI compatible adapter product FTDI TAVIR_STK500 0xfa33 FTDI compatible adapter product FTDI TERATRONIK_D2XX 0xec89 FTDI compatible adapter product FTDI TERATRONIK_VCP 0xec88 FTDI compatible adapter product FTDI THORLABS 0xfaf0 FTDI compatible adapter product FTDI TNC_X 0xebe0 FTDI compatible adapter product FTDI TTUSB 0xff20 FTDI compatible adapter product FTDI USBX_707 0xf857 FTDI compatible adapter product FTDI USINT_CAT 0xb810 FTDI compatible adapter product FTDI USINT_RS232 0xb812 FTDI compatible adapter product FTDI USINT_WKEY 0xb811 FTDI compatible adapter product FTDI VARDAAN 0xf070 FTDI compatible adapter product FTDI VNHCPCUSB_D 0xfe38 FTDI compatible adapter product FTDI WESTREX_MODEL_777 0xdc00 FTDI compatible adapter product FTDI WESTREX_MODEL_8900F 0xdc01 FTDI compatible adapter product FTDI XF_547 0xfc0a FTDI compatible adapter product FTDI XF_640 0xfc0e FTDI compatible adapter product FTDI XF_642 0xfc0f FTDI compatible adapter product FTDI XM_RADIO 0x937a FTDI compatible adapter product FTDI YEI_SERVOCENTER31 0xe050 FTDI compatible adapter /* Fuji photo products */ product FUJIPHOTO MASS0100 0x0100 Mass Storage /* Fujitsu protducts */ product FUJITSU AH_F401U 0x105b AH-F401U Air H device /* Fujitsu-Siemens protducts */ product FUJITSUSIEMENS SCR 0x0009 Fujitsu-Siemens SCR USB Reader /* Garmin products */ +product GARMIN FORERUNNER230 0x086d ForeRunner 230 product GARMIN IQUE_3600 0x0004 iQue 3600 /* Gemalto products */ product GEMALTO PROXPU 0x5501 Prox-PU/CU RFID Card Reader /* General Instruments (Motorola) products */ product GENERALINSTMNTS SB5100 0x5100 SURFboard SB5100 Cable modem /* Genesys Logic products */ product GENESYS GL620USB 0x0501 GL620USB Host-Host interface product GENESYS GL650 0x0604 GL650 HUB product GENESYS GL606 0x0606 USB 2.0 HUB product GENESYS GL641USB 0x0700 GL641USB CompactFlash Card Reader product GENESYS GL641USB2IDE_2 0x0701 GL641USB USB-IDE Bridge No 2 product GENESYS GL641USB2IDE 0x0702 GL641USB USB-IDE Bridge product GENESYS GL641USB_2 0x0760 GL641USB 6-in-1 Card Reader /* GIGABYTE products */ product GIGABYTE GN54G 0x8001 GN-54G product GIGABYTE GNBR402W 0x8002 GN-BR402W product GIGABYTE GNWLBM101 0x8003 GN-WLBM101 product GIGABYTE GNWBKG 0x8007 GN-WBKG product GIGABYTE GNWB01GS 0x8008 GN-WB01GS product GIGABYTE GNWI05GS 0x800a GN-WI05GS /* Gigaset products */ product GIGASET WLAN 0x0701 WLAN product GIGASET SMCWUSBTG 0x0710 SMCWUSBT-G product GIGASET SMCWUSBTG_NF 0x0711 SMCWUSBT-G (no firmware) product GIGASET AR5523 0x0712 AR5523 product GIGASET AR5523_NF 0x0713 AR5523 (no firmware) product GIGASET RT2573 0x0722 RT2573 product GIGASET RT3070_1 0x0740 RT3070 product GIGASET RT3070_2 0x0744 RT3070 product GIGABYTE RT2870_1 0x800b RT2870 product GIGABYTE GNWB31N 0x800c GN-WB31N product GIGABYTE GNWB32L 0x800d GN-WB32L /* Global Sun Technology product */ product GLOBALSUN AR5523_1 0x7801 AR5523 product GLOBALSUN AR5523_1_NF 0x7802 AR5523 (no firmware) product GLOBALSUN AR5523_2 0x7811 AR5523 product GLOBALSUN AR5523_2_NF 0x7812 AR5523 (no firmware) /* Globespan products */ product GLOBESPAN PRISM_GT_1 0x2000 PrismGT USB 2.0 WLAN product GLOBESPAN PRISM_GT_2 0x2002 PrismGT USB 2.0 WLAN /* G.Mate, Inc products */ product GMATE YP3X00 0x1001 YP3X00 PDA /* GN Otometrics */ product GNOTOMETRICS USB 0x0010 FTDI compatible adapter /* GoHubs products */ product GOHUBS GOCOM232 0x1001 GoCOM232 Serial /* Good Way Technology products */ product GOODWAY GWUSB2E 0x6200 GWUSB2E product GOODWAY RT2573 0xc019 RT2573 /* Google products */ product GOOGLE NEXUSONE 0x4e11 Nexus One /* Gravis products */ product GRAVIS GAMEPADPRO 0x4001 GamePad Pro /* GREENHOUSE products */ product GREENHOUSE KANA21 0x0001 CF-writer with MP3 /* Griffin Technology */ product GRIFFIN IMATE 0x0405 iMate, ADB Adapter /* Guillemot Corporation */ product GUILLEMOT DALEADER 0xa300 DA Leader product GUILLEMOT HWGUSB254 0xe000 HWGUSB2-54 WLAN product GUILLEMOT HWGUSB254LB 0xe010 HWGUSB2-54-LB product GUILLEMOT HWGUSB254V2AP 0xe020 HWGUSB2-54V2-AP product GUILLEMOT HWNU300 0xe030 HWNU-300 product GUILLEMOT HWNUM300 0xe031 HWNUm-300 product GUILLEMOT HWGUN54 0xe032 HWGUn-54 product GUILLEMOT HWNUP150 0xe033 HWNUP-150 /* Hagiwara products */ product HAGIWARA FGSM 0x0002 FlashGate SmartMedia Card Reader product HAGIWARA FGCF 0x0003 FlashGate CompactFlash Card Reader product HAGIWARA FG 0x0005 FlashGate /* HAL Corporation products */ product HAL IMR001 0x0011 Crossam2+USB IR commander /* Handspring, Inc. */ product HANDSPRING VISOR 0x0100 Handspring Visor product HANDSPRING TREO 0x0200 Handspring Treo product HANDSPRING TREO600 0x0300 Handspring Treo 600 /* Hauppauge Computer Works */ product HAUPPAUGE WINTV_USB_FM 0x4d12 WinTV USB FM product HAUPPAUGE2 NOVAT500 0x9580 NovaT 500Stick /* Hawking Technologies products */ product HAWKING RT2870_1 0x0001 RT2870 product HAWKING RT2870_2 0x0003 RT2870 product HAWKING HWUN2 0x0009 HWUN2 product HAWKING RT3070 0x000b RT3070 product HAWKING RTL8192CU 0x0019 RTL8192CU product HAWKING UF100 0x400c 10/100 USB Ethernet product HAWKING RTL8192SU_1 0x0015 RTL8192SU product HAWKING RTL8192SU_2 0x0016 RTL8192SU /* HID Global GmbH products */ product HIDGLOBAL CM2020 0x0596 Omnikey Cardman 2020 product HIDGLOBAL CM6020 0x1784 Omnikey Cardman 6020 /* Hitachi, Ltd. products */ product HITACHI DVDCAM_DZ_MV100A 0x0004 DVD-CAM DZ-MV100A Camcorder product HITACHI DVDCAM_USB 0x001e DVDCAM USB HS Interface /* Holtek products */ product HOLTEK F85 0xa030 Holtek USB gaming keyboard /* HP products */ product HP 895C 0x0004 DeskJet 895C product HP 4100C 0x0101 Scanjet 4100C product HP S20 0x0102 Photosmart S20 product HP 880C 0x0104 DeskJet 880C product HP 4200C 0x0105 ScanJet 4200C product HP CDWRITERPLUS 0x0107 CD-Writer Plus product HP KBDHUB 0x010c Multimedia Keyboard Hub product HP G55XI 0x0111 OfficeJet G55xi product HP HN210W 0x011c HN210W 802.11b WLAN product HP 49GPLUS 0x0121 49g+ graphing calculator product HP 6200C 0x0201 ScanJet 6200C product HP S20b 0x0202 PhotoSmart S20 product HP 815C 0x0204 DeskJet 815C product HP 3300C 0x0205 ScanJet 3300C product HP CDW8200 0x0207 CD-Writer Plus 8200e product HP MMKEYB 0x020c Multimedia keyboard product HP 1220C 0x0212 DeskJet 1220C product HP UN2420_QDL 0x241d UN2420 QDL Firmware Loader product HP UN2420 0x251d UN2420 WWAN/GPS Module product HP 810C 0x0304 DeskJet 810C/812C product HP 4300C 0x0305 Scanjet 4300C product HP CDW4E 0x0307 CD-Writer+ CD-4e product HP G85XI 0x0311 OfficeJet G85xi product HP 1200 0x0317 LaserJet 1200 product HP 5200C 0x0401 Scanjet 5200C product HP 830C 0x0404 DeskJet 830C product HP 3400CSE 0x0405 ScanJet 3400cse product HP 6300C 0x0601 Scanjet 6300C product HP 840C 0x0604 DeskJet 840c product HP 2200C 0x0605 ScanJet 2200C product HP 5300C 0x0701 Scanjet 5300C product HP 4400C 0x0705 Scanjet 4400C product HP 4470C 0x0805 Scanjet 4470C product HP 82x0C 0x0b01 Scanjet 82x0C product HP 2300D 0x0b17 Laserjet 2300d product HP 970CSE 0x1004 Deskjet 970Cse product HP 5400C 0x1005 Scanjet 5400C product HP 2215 0x1016 iPAQ 22xx/Jornada 548 product HP 568J 0x1116 Jornada 568 product HP 930C 0x1204 DeskJet 930c product HP3 RTL8188CU 0x1629 RTL8188CU product HP P2000U 0x1801 Inkjet P-2000U product HP HS2300 0x1e1d HS2300 HSDPA (aka MC8775) product HP 640C 0x2004 DeskJet 640c product HP 4670V 0x3005 ScanJet 4670v product HP P1100 0x3102 Photosmart P1100 product HP LD220 0x3524 LD220 POS Display product HP OJ4215 0x3d11 OfficeJet 4215 product HP HN210E 0x811c Ethernet HN210E product HP2 C500 0x6002 PhotoSmart C500 product HP EV2200 0x1b1d ev2200 HSDPA (aka MC5720) product HP HS2300 0x1e1d hs2300 HSDPA (aka MC8775) /* HTC products */ product HTC WINMOBILE 0x00ce HTC USB Sync product HTC PPC6700MODEM 0x00cf PPC6700 Modem product HTC SMARTPHONE 0x0a51 SmartPhone USB Sync product HTC WIZARD 0x0bce HTC Wizard USB Sync product HTC LEGENDSYNC 0x0c97 HTC Legend USB Sync product HTC LEGEND 0x0ff9 HTC Legend product HTC LEGENDINTERNET 0x0ffe HTC Legend Internet Sharing /* HUAWEI products */ product HUAWEI MOBILE 0x1001 Huawei Mobile product HUAWEI E220 0x1003 HSDPA modem product HUAWEI E220BIS 0x1004 HSDPA modem product HUAWEI E1401 0x1401 3G modem product HUAWEI E1402 0x1402 3G modem product HUAWEI E1403 0x1403 3G modem product HUAWEI E1404 0x1404 3G modem product HUAWEI E1405 0x1405 3G modem product HUAWEI E1406 0x1406 3G modem product HUAWEI E1407 0x1407 3G modem product HUAWEI E1408 0x1408 3G modem product HUAWEI E1409 0x1409 3G modem product HUAWEI E140A 0x140a 3G modem product HUAWEI E140B 0x140b 3G modem product HUAWEI E180V 0x140c E180V product HUAWEI E140D 0x140d 3G modem product HUAWEI E140E 0x140e 3G modem product HUAWEI E140F 0x140f 3G modem product HUAWEI E1410 0x1410 3G modem product HUAWEI E1411 0x1411 3G modem product HUAWEI E1412 0x1412 3G modem product HUAWEI E1413 0x1413 3G modem product HUAWEI E1414 0x1414 3G modem product HUAWEI E1415 0x1415 3G modem product HUAWEI E1416 0x1416 3G modem product HUAWEI E1417 0x1417 3G modem product HUAWEI E1418 0x1418 3G modem product HUAWEI E1419 0x1419 3G modem product HUAWEI E141A 0x141a 3G modem product HUAWEI E141B 0x141b 3G modem product HUAWEI E141C 0x141c 3G modem product HUAWEI E141D 0x141d 3G modem product HUAWEI E141E 0x141e 3G modem product HUAWEI E141F 0x141f 3G modem product HUAWEI E1420 0x1420 3G modem product HUAWEI E1421 0x1421 3G modem product HUAWEI E1422 0x1422 3G modem product HUAWEI E1423 0x1423 3G modem product HUAWEI E1424 0x1424 3G modem product HUAWEI E1425 0x1425 3G modem product HUAWEI E1426 0x1426 3G modem product HUAWEI E1427 0x1427 3G modem product HUAWEI E1428 0x1428 3G modem product HUAWEI E1429 0x1429 3G modem product HUAWEI E142A 0x142a 3G modem product HUAWEI E142B 0x142b 3G modem product HUAWEI E142C 0x142c 3G modem product HUAWEI E142D 0x142d 3G modem product HUAWEI E142E 0x142e 3G modem product HUAWEI E142F 0x142f 3G modem product HUAWEI E1430 0x1430 3G modem product HUAWEI E1431 0x1431 3G modem product HUAWEI E1432 0x1432 3G modem product HUAWEI E1433 0x1433 3G modem product HUAWEI E1434 0x1434 3G modem product HUAWEI E1435 0x1435 3G modem product HUAWEI E1436 0x1436 3G modem product HUAWEI E1437 0x1437 3G modem product HUAWEI E1438 0x1438 3G modem product HUAWEI E1439 0x1439 3G modem product HUAWEI E143A 0x143a 3G modem product HUAWEI E143B 0x143b 3G modem product HUAWEI E143C 0x143c 3G modem product HUAWEI E143D 0x143d 3G modem product HUAWEI E143E 0x143e 3G modem product HUAWEI E143F 0x143f 3G modem product HUAWEI E1752 0x1446 3G modem product HUAWEI K4505 0x1464 3G modem product HUAWEI K3765 0x1465 3G modem product HUAWEI E1820 0x14ac E1820 HSPA+ USB Slider product HUAWEI K3770 0x14c9 3G modem product HUAWEI K3772 0x14cf K3772 product HUAWEI K3770_INIT 0x14d1 K3770 Initial product HUAWEI E3131_INIT 0x14fe 3G modem initial product HUAWEI E392 0x1505 LTE modem product HUAWEI E3131 0x1506 3G modem product HUAWEI K3765_INIT 0x1520 K3765 Initial product HUAWEI K4505_INIT 0x1521 K4505 Initial product HUAWEI K3772_INIT 0x1526 K3772 Initial product HUAWEI E3272_INIT 0x155b LTE modem initial product HUAWEI ME909U 0x1573 LTE modem product HUAWEI R215_INIT 0x1582 LTE modem initial product HUAWEI R215 0x1588 LTE modem product HUAWEI ETS2055 0x1803 CDMA modem product HUAWEI E173 0x1c05 3G modem product HUAWEI E173_INIT 0x1c0b 3G modem initial product HUAWEI E3272 0x1c1e LTE modem /* HUAWEI 3com products */ product HUAWEI3COM WUB320G 0x0009 Aolynk WUB320g /* IBM Corporation */ product IBM USBCDROMDRIVE 0x4427 USB CD-ROM Drive /* Icom products */ product ICOM SP1 0x0004 FTDI compatible adapter product ICOM OPC_U_UC 0x0018 FTDI compatible adapter product ICOM RP2C1 0x0009 FTDI compatible adapter product ICOM RP2C2 0x000a FTDI compatible adapter product ICOM RP2D 0x000b FTDI compatible adapter product ICOM RP2KVR 0x0013 FTDI compatible adapter product ICOM RP2KVT 0x0012 FTDI compatible adapter product ICOM RP2VR 0x000d FTDI compatible adapter product ICOM RP2VT 0x000c FTDI compatible adapter product ICOM RP4KVR 0x0011 FTDI compatible adapter product ICOM RP4KVT 0x0010 FTDI compatible adapter /* ID-tech products */ product IDTECH IDT1221U 0x0300 FTDI compatible adapter /* Imagination Technologies products */ product IMAGINATION DBX1 0x2107 DBX1 DSP core /* Initio Corporation products */ product INITIO DUMMY 0x0000 Dummy product product INITIO INIC_1610P 0x1e40 USB to SATA Bridge /* Inside Out Networks products */ product INSIDEOUT EDGEPORT4 0x0001 EdgePort/4 serial ports /* In-System products */ product INSYSTEM F5U002 0x0002 Parallel printer product INSYSTEM ATAPI 0x0031 ATAPI Adapter product INSYSTEM ISD110 0x0200 IDE Adapter ISD110 product INSYSTEM ISD105 0x0202 IDE Adapter ISD105 product INSYSTEM USBCABLE 0x081a USB cable product INSYSTEM STORAGE_V2 0x5701 USB Storage Adapter V2 /* Intel products */ product INTEL EASYPC_CAMERA 0x0110 Easy PC Camera product INTEL TESTBOARD 0x9890 82930 test board product INTEL2 IRMH 0x0020 Integrated Rate Matching Hub product INTEL2 IRMH2 0x0024 Integrated Rate Matching Hub product INTEL2 IRMH3 0x8000 Integrated Rate Matching Hub product INTEL2 IRMH4 0x8008 Integrated Rate Matching Hub /* Interbiometric products */ product INTERBIOMETRICS IOBOARD 0x1002 FTDI compatible adapter product INTERBIOMETRICS MINI_IOBOARD 0x1006 FTDI compatible adapter /* Intersil products */ product INTERSIL PRISM_GT 0x1000 PrismGT USB 2.0 WLAN product INTERSIL PRISM_2X 0x3642 Prism2.x or Atmel WLAN /* Interpid Control Systems products */ product INTREPIDCS VALUECAN 0x0601 ValueCAN CAN bus interface product INTREPIDCS NEOVI 0x0701 NeoVI Blue vehicle bus interface /* I/O DATA products */ product IODATA IU_CD2 0x0204 DVD Multi-plus unit iU-CD2 product IODATA DVR_UEH8 0x0206 DVD Multi-plus unit DVR-UEH8 product IODATA USBSSMRW 0x0314 USB-SSMRW SD-card product IODATA USBSDRW 0x031e USB-SDRW SD-card product IODATA USBETT 0x0901 USB ETT product IODATA USBETTX 0x0904 USB ETTX product IODATA USBETTXS 0x0913 USB ETTX product IODATA USBWNB11A 0x0919 USB WN-B11 product IODATA USBWNB11 0x0922 USB Airport WN-B11 product IODATA ETGUS2 0x0930 ETG-US2 product IODATA WNGDNUS2 0x093f WN-GDN/US2 product IODATA RT3072_1 0x0944 RT3072 product IODATA RT3072_2 0x0945 RT3072 product IODATA RT3072_3 0x0947 RT3072 product IODATA RT3072_4 0x0948 RT3072 product IODATA USBRSAQ 0x0a03 Serial USB-RSAQ1 product IODATA USBRSAQ5 0x0a0e Serial USB-RSAQ5 product IODATA2 USB2SC 0x0a09 USB2.0-SCSI Bridge USB2-SC /* Iomega products */ product IOMEGA ZIP100 0x0001 Zip 100 product IOMEGA ZIP250 0x0030 Zip 250 /* Ionic products */ product IONICS PLUGCOMPUTER 0x0102 FTDI compatible adapter /* Integrated System Solution Corp. products */ product ISSC ISSCBTA 0x1001 Bluetooth USB Adapter /* iTegno products */ product ITEGNO WM1080A 0x1080 WM1080A GSM/GPRS modem product ITEGNO WM2080A 0x2080 WM2080A CDMA modem /* Ituner networks products */ product ITUNERNET USBLCD2X20 0x0002 USB-LCD 2x20 product ITUNERNET USBLCD4X20 0xc001 USB-LCD 4x20 /* Jablotron products */ product JABLOTRON PC60B 0x0001 PC-60B /* Jaton products */ product JATON EDA 0x5704 Ethernet /* Jeti products */ product JETI SPC1201 0x04b2 FTDI compatible adapter /* JMicron products */ product JMICRON JM20336 0x2336 USB to SATA Bridge product JMICRON JM20337 0x2338 USB to ATA/ATAPI Bridge /* JVC products */ product JVC GR_DX95 0x000a GR-DX95 product JVC MP_PRX1 0x3008 MP-PRX1 Ethernet /* JRC products */ product JRC AH_J3001V_J3002V 0x0001 AirH PHONE AH-J3001V/J3002V /* Kamstrrup products */ product KAMSTRUP OPTICALEYE 0x0001 Optical Eye/3-wire product KAMSTRUP MBUS_250D 0x0005 M-Bus Master MultiPort 250D /* Kawatsu products */ product KAWATSU MH4000P 0x0003 MiniHub 4000P /* Keisokugiken Corp. products */ product KEISOKUGIKEN USBDAQ 0x0068 HKS-0200 USBDAQ /* Kensington products */ product KENSINGTON ORBIT 0x1003 Orbit USB/PS2 trackball product KENSINGTON TURBOBALL 0x1005 TurboBall /* Keyspan products */ product KEYSPAN USA28_NF 0x0101 USA-28 serial Adapter (no firmware) product KEYSPAN USA28X_NF 0x0102 USA-28X serial Adapter (no firmware) product KEYSPAN USA19_NF 0x0103 USA-19 serial Adapter (no firmware) product KEYSPAN USA18_NF 0x0104 USA-18 serial Adapter (no firmware) product KEYSPAN USA18X_NF 0x0105 USA-18X serial Adapter (no firmware) product KEYSPAN USA19W_NF 0x0106 USA-19W serial Adapter (no firmware) product KEYSPAN USA19 0x0107 USA-19 serial Adapter product KEYSPAN USA19W 0x0108 USA-19W serial Adapter product KEYSPAN USA49W_NF 0x0109 USA-49W serial Adapter (no firmware) product KEYSPAN USA49W 0x010a USA-49W serial Adapter product KEYSPAN USA19QI_NF 0x010b USA-19QI serial Adapter (no firmware) product KEYSPAN USA19QI 0x010c USA-19QI serial Adapter product KEYSPAN USA19Q_NF 0x010d USA-19Q serial Adapter (no firmware) product KEYSPAN USA19Q 0x010e USA-19Q serial Adapter product KEYSPAN USA28 0x010f USA-28 serial Adapter product KEYSPAN USA28XXB 0x0110 USA-28X/XB serial Adapter product KEYSPAN USA18 0x0111 USA-18 serial Adapter product KEYSPAN USA18X 0x0112 USA-18X serial Adapter product KEYSPAN USA28XB_NF 0x0113 USA-28XB serial Adapter (no firmware) product KEYSPAN USA28XA_NF 0x0114 USA-28XB serial Adapter (no firmware) product KEYSPAN USA28XA 0x0115 USA-28XA serial Adapter product KEYSPAN USA18XA_NF 0x0116 USA-18XA serial Adapter (no firmware) product KEYSPAN USA18XA 0x0117 USA-18XA serial Adapter product KEYSPAN USA19QW_NF 0x0118 USA-19WQ serial Adapter (no firmware) product KEYSPAN USA19QW 0x0119 USA-19WQ serial Adapter product KEYSPAN USA19HA 0x0121 USA-19HS serial Adapter product KEYSPAN UIA10 0x0201 UIA-10 remote control product KEYSPAN UIA11 0x0202 UIA-11 remote control /* Kingston products */ product KINGSTON XX1 0x0008 Ethernet product KINGSTON KNU101TX 0x000a KNU101TX USB Ethernet product KINGSTON HYPERX3_0 0x162b DT HyperX 3.0 /* Kawasaki products */ product KLSI DUH3E10BT 0x0008 USB Ethernet product KLSI DUH3E10BTN 0x0009 USB Ethernet /* Kobil products */ product KOBIL CONV_B1 0x2020 FTDI compatible adapter product KOBIL CONV_KAAN 0x2021 FTDI compatible adapter /* Kodak products */ product KODAK DC220 0x0100 Digital Science DC220 product KODAK DC260 0x0110 Digital Science DC260 product KODAK DC265 0x0111 Digital Science DC265 product KODAK DC290 0x0112 Digital Science DC290 product KODAK DC240 0x0120 Digital Science DC240 product KODAK DC280 0x0130 Digital Science DC280 /* Kontron AG products */ product KONTRON DM9601 0x8101 USB Ethernet product KONTRON JP1082 0x9700 USB Ethernet /* Konica Corp. Products */ product KONICA CAMERA 0x0720 Digital Color Camera /* KYE products */ product KYE NICHE 0x0001 Niche mouse product KYE NETSCROLL 0x0003 Genius NetScroll mouse product KYE FLIGHT2000 0x1004 Flight 2000 joystick product KYE VIVIDPRO 0x2001 ColorPage Vivid-Pro scanner /* Kyocera products */ product KYOCERA FINECAM_S3X 0x0100 Finecam S3x product KYOCERA FINECAM_S4 0x0101 Finecam S4 product KYOCERA FINECAM_S5 0x0103 Finecam S5 product KYOCERA FINECAM_L3 0x0105 Finecam L3 product KYOCERA AHK3001V 0x0203 AH-K3001V product KYOCERA2 CDMA_MSM_K 0x17da Qualcomm Kyocera CDMA Technologies MSM product KYOCERA2 KPC680 0x180a Qualcomm Kyocera CDMA Technologies MSM /* LaCie products */ product LACIE HD 0xa601 Hard Disk product LACIE CDRW 0xa602 CD R/W /* Lake Shore Cryotronics products */ product LAKESHORE 121 0x0100 121 Current Source product LAKESHORE 218A 0x0200 218A Temperature Monitor product LAKESHORE 219 0x0201 219 Temperature Monitor product LAKESHORE 233 0x0202 233 Temperature Transmitter product LAKESHORE 235 0x0203 235 Temperature Transmitter product LAKESHORE 335 0x0300 335 Temperature Controller product LAKESHORE 336 0x0301 336 Temperature Controller product LAKESHORE 350 0x0302 350 Temperature Controller product LAKESHORE 371 0x0303 371 AC Bridge product LAKESHORE 411 0x0400 411 Handheld Gaussmeter product LAKESHORE 425 0x0401 425 Gaussmeter product LAKESHORE 455A 0x0402 455A DSP Gaussmeter product LAKESHORE 475A 0x0403 475A DSP Gaussmeter product LAKESHORE 465 0x0404 465 Gaussmeter product LAKESHORE 625A 0x0600 625A Magnet PSU product LAKESHORE 642A 0x0601 642A Magnet PSU product LAKESHORE 648 0x0602 648 Magnet PSU product LAKESHORE 737 0x0700 737 VSM Controller product LAKESHORE 776 0x0701 776 Matrix Switch /* Larsen and Brusgaard products */ product LARSENBRUSGAARD ALTITRACK 0x0001 FTDI compatible adapter /* Leadtek products */ product LEADTEK 9531 0x2101 9531 GPS /* Lenovo products */ product LENOVO GIGALAN 0x304b USB 3.0 Ethernet product LENOVO ETHERNET 0x7203 USB 2.0 Ethernet /* Lexar products */ product LEXAR JUMPSHOT 0x0001 jumpSHOT CompactFlash Reader product LEXAR CF_READER 0xb002 USB CF Reader product LEXAR JUMPDRIVE 0xa833 USB Jumpdrive Flash Drive /* Lexmark products */ product LEXMARK S2450 0x0009 Optra S 2450 /* Liebert products */ product LIEBERT POWERSURE_PXT 0xffff PowerSure Personal XT product LIEBERT2 PSI1000 0x0004 UPS PSI 1000 FW:08 /* Link Instruments Inc. products */ product LINKINSTRUMENTS MSO19 0xf190 Link Instruments MSO-19 product LINKINSTRUMENTS MSO28 0xf280 Link Instruments MSO-28 product LINKINSTRUMENTS MSO28_2 0xf281 Link Instruments MSO-28 /* Linksys products */ product LINKSYS MAUSB2 0x0105 Camedia MAUSB-2 product LINKSYS USB10TX1 0x200c USB10TX product LINKSYS USB10T 0x2202 USB10T Ethernet product LINKSYS USB100TX 0x2203 USB100TX Ethernet product LINKSYS USB100H1 0x2204 USB100H1 Ethernet/HPNA product LINKSYS USB10TA 0x2206 USB10TA Ethernet product LINKSYS USB10TX2 0x400b USB10TX product LINKSYS2 WUSB11 0x2219 WUSB11 Wireless Adapter product LINKSYS2 USB200M 0x2226 USB 2.0 10/100 Ethernet product LINKSYS3 WUSB11v28 0x2233 WUSB11 v2.8 Wireless Adapter product LINKSYS4 USB1000 0x0039 USB1000 product LINKSYS4 WUSB100 0x0070 WUSB100 product LINKSYS4 WUSB600N 0x0071 WUSB600N product LINKSYS4 WUSB54GCV2 0x0073 WUSB54GC v2 product LINKSYS4 WUSB54GCV3 0x0077 WUSB54GC v3 product LINKSYS4 RT3070 0x0078 RT3070 product LINKSYS4 WUSB600NV2 0x0079 WUSB600N v2 /* Logilink products */ product LOGILINK DUMMY 0x0000 Dummy product product LOGILINK U2M 0x0101 LogiLink USB MIDI Cable /* Logitech products */ product LOGITECH M2452 0x0203 M2452 keyboard product LOGITECH M4848 0x0301 M4848 mouse product LOGITECH PAGESCAN 0x040f PageScan product LOGITECH QUICKCAMWEB 0x0801 QuickCam Web product LOGITECH QUICKCAMPRO 0x0810 QuickCam Pro product LOGITECH WEBCAMC100 0X0817 Webcam C100 product LOGITECH QUICKCAMEXP 0x0840 QuickCam Express product LOGITECH QUICKCAM 0x0850 QuickCam product LOGITECH QUICKCAMPRO3 0x0990 QuickCam Pro 9000 product LOGITECH N43 0xc000 N43 product LOGITECH N48 0xc001 N48 mouse product LOGITECH MBA47 0xc002 M-BA47 mouse product LOGITECH WMMOUSE 0xc004 WingMan Gaming Mouse product LOGITECH BD58 0xc00c BD58 mouse product LOGITECH UN58A 0xc030 iFeel Mouse product LOGITECH UN53B 0xc032 iFeel MouseMan product LOGITECH WMPAD 0xc208 WingMan GamePad Extreme product LOGITECH WMRPAD 0xc20a WingMan RumblePad product LOGITECH WMJOY 0xc281 WingMan Force joystick product LOGITECH BB13 0xc401 USB-PS/2 Trackball product LOGITECH RK53 0xc501 Cordless mouse product LOGITECH RB6 0xc503 Cordless keyboard product LOGITECH MX700 0xc506 Cordless optical mouse product LOGITECH QUICKCAMPRO2 0xd001 QuickCam Pro /* Logitec Corp. products */ product LOGITEC LDR_H443SU2 0x0033 DVD Multi-plus unit LDR-H443SU2 product LOGITEC LDR_H443U2 0x00b3 DVD Multi-plus unit LDR-H443U2 product LOGITEC LAN_GTJU2A 0x0160 LAN-GTJ/U2A Ethernet product LOGITEC RT2870_1 0x0162 RT2870 product LOGITEC RT2870_2 0x0163 RT2870 product LOGITEC RT2870_3 0x0164 RT2870 product LOGITEC LANW300NU2 0x0166 LAN-W300N/U2 product LOGITEC LANW150NU2 0x0168 LAN-W150N/U2 product LOGITEC LANW300NU2S 0x0169 LAN-W300N/U2S /* Longcheer Holdings, Ltd. products */ product LONGCHEER WM66 0x6061 Longcheer WM66 HSDPA product LONGCHEER W14 0x9603 Mobilcom W14 product LONGCHEER DISK 0xf000 Driver disk product LONGCHEER XSSTICK 0x9605 4G Systems XSStick P14 /* Lucent products */ product LUCENT EVALKIT 0x1001 USS-720 evaluation kit /* Luwen products */ product LUWEN EASYDISK 0x0005 EasyDisc /* Macally products */ product MACALLY MOUSE1 0x0101 mouse /* Mag-Tek products */ product MAGTEK USBSWIPE 0x0002 USB Mag Stripe Swipe Reader /* Marvell Technology Group, Ltd. products */ product MARVELL SHEEVAPLUG 0x9e8f SheevaPlug serial interface /* Matrix Orbital products */ product MATRIXORBITAL FTDI_RANGE_0100 0x0100 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0101 0x0101 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0102 0x0102 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0103 0x0103 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0104 0x0104 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0105 0x0105 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0106 0x0106 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0107 0x0107 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0108 0x0108 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0109 0x0109 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_010A 0x010a FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_010B 0x010b FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_010C 0x010c FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_010D 0x010d FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_010E 0x010e FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_010F 0x010f FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0110 0x0110 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0111 0x0111 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0112 0x0112 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0113 0x0113 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0114 0x0114 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0115 0x0115 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0116 0x0116 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0117 0x0117 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0118 0x0118 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0119 0x0119 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_011A 0x011a FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_011B 0x011b FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_011C 0x011c FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_011D 0x011d FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_011E 0x011e FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_011F 0x011f FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0120 0x0120 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0121 0x0121 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0122 0x0122 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0123 0x0123 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0124 0x0124 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0125 0x0125 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0126 0x0126 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0128 0x0128 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0129 0x0129 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_012A 0x012a FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_012B 0x012b FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_012D 0x012d FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_012E 0x012e FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_012F 0x012f FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0130 0x0130 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0131 0x0131 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0132 0x0132 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0133 0x0133 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0134 0x0134 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0135 0x0135 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0136 0x0136 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0137 0x0137 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0138 0x0138 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0139 0x0139 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_013A 0x013a FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_013B 0x013b FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_013C 0x013c FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_013D 0x013d FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_013E 0x013e FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_013F 0x013f FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0140 0x0140 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0141 0x0141 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0142 0x0142 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0143 0x0143 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0144 0x0144 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0145 0x0145 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0146 0x0146 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0147 0x0147 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0148 0x0148 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0149 0x0149 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_014A 0x014a FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_014B 0x014b FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_014C 0x014c FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_014D 0x014d FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_014E 0x014e FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_014F 0x014f FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0150 0x0150 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0151 0x0151 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0152 0x0152 FTDI compatible adapter product MATRIXORBITAL MOUA 0x0153 Martrix Orbital MOU-Axxxx LCD displays product MATRIXORBITAL FTDI_RANGE_0159 0x0159 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_015A 0x015a FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_015B 0x015b FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_015C 0x015c FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_015D 0x015d FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_015E 0x015e FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_015F 0x015f FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0160 0x0160 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0161 0x0161 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0162 0x0162 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0163 0x0163 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0164 0x0164 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0165 0x0165 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0166 0x0166 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0167 0x0167 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0168 0x0168 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0169 0x0169 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_016A 0x016a FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_016B 0x016b FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_016C 0x016c FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_016D 0x016d FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_016E 0x016e FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_016F 0x016f FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0170 0x0170 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0171 0x0171 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0172 0x0172 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0173 0x0173 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0174 0x0174 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0175 0x0175 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0176 0x0176 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0177 0x0177 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0178 0x0178 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0179 0x0179 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_017A 0x017a FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_017B 0x017b FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_017C 0x017c FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_017D 0x017d FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_017E 0x017e FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_017F 0x017f FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0180 0x0180 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0181 0x0181 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0182 0x0182 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0183 0x0183 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0184 0x0184 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0185 0x0185 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0186 0x0186 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0187 0x0187 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0188 0x0188 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0189 0x0189 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_018A 0x018a FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_018B 0x018b FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_018C 0x018c FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_018D 0x018d FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_018E 0x018e FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_018F 0x018f FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0190 0x0190 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0191 0x0191 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0192 0x0192 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0193 0x0193 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0194 0x0194 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0195 0x0195 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0196 0x0196 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0197 0x0197 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0198 0x0198 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_0199 0x0199 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_019A 0x019a FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_019B 0x019b FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_019C 0x019c FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_019D 0x019d FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_019E 0x019e FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_019F 0x019f FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01A0 0x01a0 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01A1 0x01a1 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01A2 0x01a2 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01A3 0x01a3 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01A4 0x01a4 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01A5 0x01a5 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01A6 0x01a6 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01A7 0x01a7 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01A8 0x01a8 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01A9 0x01a9 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01AA 0x01aa FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01AB 0x01ab FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01AC 0x01ac FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01AD 0x01ad FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01AE 0x01ae FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01AF 0x01af FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01B0 0x01b0 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01B1 0x01b1 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01B2 0x01b2 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01B3 0x01b3 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01B4 0x01b4 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01B5 0x01b5 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01B6 0x01b6 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01B7 0x01b7 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01B8 0x01b8 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01B9 0x01b9 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01BA 0x01ba FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01BB 0x01bb FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01BC 0x01bc FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01BD 0x01bd FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01BE 0x01be FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01BF 0x01bf FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01C0 0x01c0 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01C1 0x01c1 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01C2 0x01c2 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01C3 0x01c3 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01C4 0x01c4 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01C5 0x01c5 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01C6 0x01c6 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01C7 0x01c7 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01C8 0x01c8 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01C9 0x01c9 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01CA 0x01ca FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01CB 0x01cb FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01CC 0x01cc FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01CD 0x01cd FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01CE 0x01ce FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01CF 0x01cf FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01D0 0x01d0 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01D1 0x01d1 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01D2 0x01d2 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01D3 0x01d3 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01D4 0x01d4 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01D5 0x01d5 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01D6 0x01d6 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01D7 0x01d7 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01D8 0x01d8 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01D9 0x01d9 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01DA 0x01da FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01DB 0x01db FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01DC 0x01dc FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01DD 0x01dd FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01DE 0x01de FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01DF 0x01df FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01E0 0x01e0 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01E1 0x01e1 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01E2 0x01e2 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01E3 0x01e3 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01E4 0x01e4 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01E5 0x01e5 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01E6 0x01e6 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01E7 0x01e7 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01E8 0x01e8 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01E9 0x01e9 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01EA 0x01ea FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01EB 0x01eb FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01EC 0x01ec FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01ED 0x01ed FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01EE 0x01ee FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01EF 0x01ef FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01F0 0x01f0 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01F1 0x01f1 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01F2 0x01f2 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01F3 0x01f3 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01F4 0x01f4 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01F5 0x01f5 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01F6 0x01f6 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01F7 0x01f7 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01F8 0x01f8 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01F9 0x01f9 FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01FA 0x01fa FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01FB 0x01fb FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01FC 0x01fc FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01FD 0x01fd FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01FE 0x01fe FTDI compatible adapter product MATRIXORBITAL FTDI_RANGE_01FF 0x01ff FTDI compatible adapter /* MCT Corp. */ product MCT HUB0100 0x0100 Hub product MCT DU_H3SP_USB232 0x0200 D-Link DU-H3SP USB BAY Hub product MCT USB232 0x0210 USB-232 Interface product MCT SITECOM_USB232 0x0230 Sitecom USB-232 Products /* Medeli */ product MEDELI DD305 0x5011 DD305 Digital Drum Set /* MediaTek, Inc. */ product MEDIATEK MTK3329 0x3329 MTK II GPS Receiver /* Meizu Electronics */ product MEIZU M6_SL 0x0140 MiniPlayer M6 (SL) /* Melco, Inc products */ product MELCO LUATX1 0x0001 LUA-TX Ethernet product MELCO LUATX5 0x0005 LUA-TX Ethernet product MELCO LUA2TX5 0x0009 LUA2-TX Ethernet product MELCO LUAKTX 0x0012 LUA-KTX Ethernet product MELCO DUBPXXG 0x001c DUB-PxxG product MELCO LUAU2KTX 0x003d LUA-U2-KTX Ethernet product MELCO KG54YB 0x005e WLI-U2-KG54-YB WLAN product MELCO KG54 0x0066 WLI-U2-KG54 WLAN product MELCO KG54AI 0x0067 WLI-U2-KG54-AI WLAN product MELCO LUA3U2AGT 0x006e LUA3-U2-AGT product MELCO NINWIFI 0x008b Nintendo Wi-Fi product MELCO PCOPRS1 0x00b3 PC-OP-RS1 RemoteStation product MELCO SG54HP 0x00d8 WLI-U2-SG54HP product MELCO G54HP 0x00d9 WLI-U2-G54HP product MELCO KG54L 0x00da WLI-U2-KG54L product MELCO WLIUCG300N 0x00e8 WLI-UC-G300N product MELCO SG54HG 0x00f4 WLI-U2-SG54HG product MELCO WLRUCG 0x0116 WLR-UC-G product MELCO WLRUCGAOSS 0x0119 WLR-UC-G-AOSS product MELCO WLIUCAG300N 0x012e WLI-UC-AG300N product MELCO WLIUCG 0x0137 WLI-UC-G product MELCO WLIUCG300HP 0x0148 WLI-UC-G300HP product MELCO RT2870_2 0x0150 RT2870 product MELCO WLIUCGN 0x015d WLI-UC-GN product MELCO WLIUCG301N 0x016f WLI-UC-G301N product MELCO WLIUCGNM 0x01a2 WLI-UC-GNM product MELCO WLIUCG300HPV1 0x01a8 WLI-UC-G300HP-V1 product MELCO WLIUCGNM2 0x01ee WLI-UC-GNM2 /* Merlin products */ product MERLIN V620 0x1110 Merlin V620 /* MetaGeek products */ product METAGEEK TELLSTICK 0x0c30 FTDI compatible adapter product METAGEEK WISPY1B 0x083e MetaGeek Wi-Spy product METAGEEK WISPY24X 0x083f MetaGeek Wi-Spy 2.4x product METAGEEK2 WISPYDBX 0x5000 MetaGeek Wi-Spy DBx /* Metricom products */ product METRICOM RICOCHET_GS 0x0001 Ricochet GS /* MGE UPS Systems */ product MGE UPS1 0x0001 MGE UPS SYSTEMS PROTECTIONCENTER 1 product MGE UPS2 0xffff MGE UPS SYSTEMS PROTECTIONCENTER 2 /* MEI products */ product MEI CASHFLOW_SC 0x1100 Cashflow-SC Cash Acceptor product MEI S2000 0x1101 Series 2000 Combo Acceptor /* Microdia / Sonix Techonology Co., Ltd. products */ product CHICONY2 YUREX 0x1010 YUREX product CHICONY2 CAM_1 0x62c0 CAM_1 product CHICONY2 TEMPER 0x7401 TEMPer sensor /* Micro Star International products */ product MSI BT_DONGLE 0x1967 Bluetooth USB dongle product MSI RT3070_1 0x3820 RT3070 product MSI RT3070_2 0x3821 RT3070 product MSI RT3070_8 0x3822 RT3070 product MSI RT3070_3 0x3870 RT3070 product MSI RT3070_9 0x3871 RT3070 product MSI UB11B 0x6823 UB11B product MSI RT2570 0x6861 RT2570 product MSI RT2570_2 0x6865 RT2570 product MSI RT2570_3 0x6869 RT2570 product MSI RT2573_1 0x6874 RT2573 product MSI RT2573_2 0x6877 RT2573 product MSI RT3070_4 0x6899 RT3070 product MSI RT3070_5 0x821a RT3070 product MSI RT3070_10 0x822a RT3070 product MSI RT3070_6 0x870a RT3070 product MSI RT3070_11 0x871a RT3070 product MSI RT3070_7 0x899a RT3070 product MSI RT2573_3 0xa861 RT2573 product MSI RT2573_4 0xa874 RT2573 /* Micron products */ product MICRON REALSSD 0x0655 Real SSD eUSB /* Microsoft products */ product MICROSOFT SIDEPREC 0x0008 SideWinder Precision Pro product MICROSOFT INTELLIMOUSE 0x0009 IntelliMouse product MICROSOFT NATURALKBD 0x000b Natural Keyboard Elite product MICROSOFT DDS80 0x0014 Digital Sound System 80 product MICROSOFT SIDEWINDER 0x001a Sidewinder Precision Racing Wheel product MICROSOFT INETPRO 0x001c Internet Keyboard Pro product MICROSOFT TBEXPLORER 0x0024 Trackball Explorer product MICROSOFT INTELLIEYE 0x0025 IntelliEye mouse product MICROSOFT INETPRO2 0x002b Internet Keyboard Pro product MICROSOFT INTELLIMOUSE5 0x0039 IntelliMouse 1.1 5-Button Mouse product MICROSOFT WHEELMOUSE 0x0040 Wheel Mouse Optical product MICROSOFT MN510 0x006e MN510 Wireless product MICROSOFT 700WX 0x0079 Palm 700WX product MICROSOFT MN110 0x007a 10/100 USB NIC product MICROSOFT WLINTELLIMOUSE 0x008c Wireless Optical IntelliMouse product MICROSOFT WLNOTEBOOK 0x00b9 Wireless Optical Mouse (Model 1023) product MICROSOFT COMFORT3000 0x00d1 Comfort Optical Mouse 3000 (Model 1043) product MICROSOFT WLNOTEBOOK3 0x00d2 Wireless Optical Mouse 3000 (Model 1049) product MICROSOFT NATURAL4000 0x00db Natural Ergonomic Keyboard 4000 product MICROSOFT WLNOTEBOOK2 0x00e1 Wireless Optical Mouse 3000 (Model 1056) product MICROSOFT XBOX360 0x0292 XBOX 360 WLAN /* Microtech products */ product MICROTECH SCSIDB25 0x0004 USB-SCSI-DB25 product MICROTECH SCSIHD50 0x0005 USB-SCSI-HD50 product MICROTECH DPCM 0x0006 USB CameraMate product MICROTECH FREECOM 0xfc01 Freecom USB-IDE /* Microtek products */ product MICROTEK 336CX 0x0094 Phantom 336CX - C3 scanner product MICROTEK X6U 0x0099 ScanMaker X6 - X6U product MICROTEK C6 0x009a Phantom C6 scanner product MICROTEK 336CX2 0x00a0 Phantom 336CX - C3 scanner product MICROTEK V6USL 0x00a3 ScanMaker V6USL product MICROTEK V6USL2 0x80a3 ScanMaker V6USL product MICROTEK V6UL 0x80ac ScanMaker V6UL /* Microtune, Inc. products */ product MICROTUNE BT_DONGLE 0x1000 Bluetooth USB dongle /* Midiman products */ product MAUDIO MIDISPORT2X2 0x1001 Midisport 2x2 product MAUDIO FASTTRACKULTRA 0x2080 Fast Track Ultra product MAUDIO FASTTRACKULTRA8R 0x2081 Fast Track Ultra 8R /* MindsAtWork products */ product MINDSATWORK WALLET 0x0001 Digital Wallet /* Minolta Co., Ltd. */ product MINOLTA 2300 0x4001 Dimage 2300 product MINOLTA S304 0x4007 Dimage S304 product MINOLTA X 0x4009 Dimage X product MINOLTA 5400 0x400e Dimage 5400 product MINOLTA F300 0x4011 Dimage F300 product MINOLTA E223 0x4017 Dimage E223 /* Mitsumi products */ product MITSUMI CDRRW 0x0000 CD-R/RW Drive product MITSUMI BT_DONGLE 0x641f Bluetooth USB dongle product MITSUMI FDD 0x6901 USB FDD /* Mobile Action products */ product MOBILEACTION MA620 0x0620 MA-620 Infrared Adapter /* Mobility products */ product MOBILITY USB_SERIAL 0x0202 FTDI compatible adapter product MOBILITY EA 0x0204 Ethernet product MOBILITY EASIDOCK 0x0304 EasiDock Ethernet /* MosChip products */ product MOSCHIP MCS7703 0x7703 MCS7703 Serial Port Adapter product MOSCHIP MCS7730 0x7730 MCS7730 Ethernet product MOSCHIP MCS7820 0x7820 MCS7820 Serial Port Adapter product MOSCHIP MCS7830 0x7830 MCS7830 Ethernet product MOSCHIP MCS7832 0x7832 MCS7832 Ethernet product MOSCHIP MCS7840 0x7840 MCS7840 Serial Port Adapter /* Motorola products */ product MOTOROLA MC141555 0x1555 MC141555 hub controller product MOTOROLA SB4100 0x4100 SB4100 USB Cable Modem product MOTOROLA2 T720C 0x2822 T720c product MOTOROLA2 A41XV32X 0x2a22 A41x/V32x Mobile Phones product MOTOROLA2 E398 0x4810 E398 Mobile Phone product MOTOROLA2 USBLAN 0x600c USBLAN product MOTOROLA2 USBLAN2 0x6027 USBLAN product MOTOROLA2 MB886 0x710f MB886 Mobile Phone (Atria HD) product MOTOROLA4 RT2770 0x9031 RT2770 product MOTOROLA4 RT3070 0x9032 RT3070 /* MpMan products */ product MPMAN MPF400_2 0x25a8 MPF400 Music Player 2Go product MPMAN MPF400_1 0x36d0 MPF400 Music Player 1Go /* MultiTech products */ product MULTITECH ATLAS 0xf101 MT5634ZBA-USB modem /* Mustek products */ product MUSTEK 1200CU 0x0001 1200 CU scanner product MUSTEK 600CU 0x0002 600 CU scanner product MUSTEK 1200USB 0x0003 1200 USB scanner product MUSTEK 1200UB 0x0006 1200 UB scanner product MUSTEK 1200USBPLUS 0x0007 1200 USB Plus scanner product MUSTEK 1200CUPLUS 0x0008 1200 CU Plus scanner product MUSTEK BEARPAW1200F 0x0010 BearPaw 1200F scanner product MUSTEK BEARPAW2400TA 0x0218 BearPaw 2400TA scanner product MUSTEK BEARPAW1200TA 0x021e BearPaw 1200TA scanner product MUSTEK 600USB 0x0873 600 USB scanner product MUSTEK MDC800 0xa800 MDC-800 digital camera /* M-Systems products */ product MSYSTEMS DISKONKEY 0x0010 DiskOnKey product MSYSTEMS DISKONKEY2 0x0011 DiskOnKey /* Myson products */ product MYSON HEDEN_8813 0x8813 USB-IDE product MYSON HEDEN 0x8818 USB-IDE product MYSON HUBREADER 0x8819 COMBO Card reader with USB HUB product MYSON STARREADER 0x9920 USB flash card adapter /* National Semiconductor */ product NATIONAL BEARPAW1200 0x1000 BearPaw 1200 product NATIONAL BEARPAW2400 0x1001 BearPaw 2400 /* NEC products */ product NEC HUB_0050 0x0050 USB 2.0 7-Port Hub product NEC HUB_005A 0x005a USB 2.0 4-Port Hub product NEC WL300NUG 0x0249 WL300NU-G product NEC HUB 0x55aa hub product NEC HUB_B 0x55ab hub /* NEODIO products */ product NEODIO ND3260 0x3260 8-in-1 Multi-format Flash Controller product NEODIO ND5010 0x5010 Multi-format Flash Controller /* Neotel products */ product NEOTEL PRIME 0x4000 Prime USB modem /* Netac products */ product NETAC CF_CARD 0x1060 USB-CF-Card product NETAC ONLYDISK 0x0003 OnlyDisk /* NetChip Technology Products */ product NETCHIP TURBOCONNECT 0x1080 Turbo-Connect product NETCHIP CLIK_40 0xa140 USB Clik! 40 product NETCHIP GADGETZERO 0xa4a0 Linux Gadget Zero product NETCHIP ETHERNETGADGET 0xa4a2 Linux Ethernet/RNDIS gadget on pxa210/25x/26x product NETCHIP POCKETBOOK 0xa4a5 PocketBook /* Netgear products */ product NETGEAR EA101 0x1001 Ethernet product NETGEAR EA101X 0x1002 Ethernet product NETGEAR FA101 0x1020 Ethernet 10/100, USB1.1 product NETGEAR FA120 0x1040 USB 2.0 Ethernet product NETGEAR M4100 0x1100 M4100/M5300/M7100 series switch product NETGEAR WG111V1_2 0x4240 PrismGT USB 2.0 WLAN product NETGEAR WG111V3 0x4260 WG111v3 product NETGEAR WG111U 0x4300 WG111U product NETGEAR WG111U_NF 0x4301 WG111U (no firmware) product NETGEAR WG111V2 0x6a00 WG111V2 product NETGEAR WN111V2 0x9001 WN111V2 product NETGEAR WNDA3100 0x9010 WNDA3100 product NETGEAR WNDA4100 0x9012 WNDA4100 product NETGEAR WNDA3200 0x9018 WNDA3200 product NETGEAR RTL8192CU 0x9021 RTL8192CU product NETGEAR WNA1000 0x9040 WNA1000 product NETGEAR WNA1000M 0x9041 WNA1000M product NETGEAR2 MA101 0x4100 MA101 product NETGEAR2 MA101B 0x4102 MA101 Rev B product NETGEAR3 WG111T 0x4250 WG111T product NETGEAR3 WG111T_NF 0x4251 WG111T (no firmware) product NETGEAR3 WPN111 0x5f00 WPN111 product NETGEAR3 WPN111_NF 0x5f01 WPN111 (no firmware) product NETGEAR3 WPN111_2 0x5f02 WPN111 product NETGEAR4 RTL8188CU 0x9041 RTL8188CU /* NetIndex products */ product NETINDEX WS002IN 0x2001 Willcom WS002IN /* NEWlink */ product NEWLINK USB2IDEBRIDGE 0x00ff USB 2.0 Hard Drive Enclosure /* Nikon products */ product NIKON E990 0x0102 Digital Camera E990 product NIKON LS40 0x4000 CoolScan LS40 ED product NIKON D300 0x041a Digital Camera D300 /* NovaTech Products */ product NOVATECH NV902 0x9020 NovaTech NV-902W product NOVATECH RT2573 0x9021 RT2573 product NOVATECH RTL8188CU 0x9071 RTL8188CU /* Nokia products */ product NOKIA N958GB 0x0070 Nokia N95 8GBc product NOKIA2 CA42 0x1234 CA-42 cable /* Novatel Wireless products */ product NOVATEL V640 0x1100 Merlin V620 product NOVATEL CDMA_MODEM 0x1110 Novatel Wireless Merlin CDMA product NOVATEL V620 0x1110 Merlin V620 product NOVATEL V740 0x1120 Merlin V740 product NOVATEL V720 0x1130 Merlin V720 product NOVATEL U740 0x1400 Merlin U740 product NOVATEL U740_2 0x1410 Merlin U740 product NOVATEL U870 0x1420 Merlin U870 product NOVATEL XU870 0x1430 Merlin XU870 product NOVATEL X950D 0x1450 Merlin X950D product NOVATEL ES620 0x2100 Expedite ES620 product NOVATEL E725 0x2120 Expedite E725 product NOVATEL ES620_2 0x2130 Expedite ES620 product NOVATEL ES620 0x2100 ES620 CDMA product NOVATEL U720 0x2110 Merlin U720 product NOVATEL EU730 0x2400 Expedite EU730 product NOVATEL EU740 0x2410 Expedite EU740 product NOVATEL EU870D 0x2420 Expedite EU870D product NOVATEL U727 0x4100 Merlin U727 CDMA product NOVATEL MC950D 0x4400 Novatel MC950D HSUPA product NOVATEL MC990D 0x7001 Novatel MC990D product NOVATEL ZEROCD 0x5010 Novatel ZeroCD product NOVATEL MIFI2200V 0x5020 Novatel MiFi 2200 CDMA Virgin Mobile product NOVATEL ZEROCD2 0x5030 Novatel ZeroCD product NOVATEL MIFI2200 0x5041 Novatel MiFi 2200 CDMA product NOVATEL U727_2 0x5100 Merlin U727 CDMA product NOVATEL U760 0x6000 Novatel U760 product NOVATEL MC760 0x6002 Novatel MC760 product NOVATEL MC547 0x7042 Novatel MC547 product NOVATEL MC679 0x7031 Novatel MC679 product NOVATEL2 FLEXPACKGPS 0x0100 NovAtel FlexPack GPS receiver /* Merlin products */ product MERLIN V620 0x1110 Merlin V620 /* O2Micro products */ product O2MICRO OZ776_HUB 0x7761 OZ776 hub product O2MICRO OZ776_CCID_SC 0x7772 OZ776 CCID SC Reader /* Olimex products */ product OLIMEX ARM_USB_OCD 0x0003 FTDI compatible adapter product OLIMEX ARM_USB_OCD_H 0x002b FTDI compatible adapter /* Olympus products */ product OLYMPUS C1 0x0102 C-1 Digital Camera product OLYMPUS C700 0x0105 C-700 Ultra Zoom /* OmniVision Technologies, Inc. products */ product OMNIVISION OV511 0x0511 OV511 Camera product OMNIVISION OV511PLUS 0xa511 OV511+ Camera /* OnSpec Electronic, Inc. */ product ONSPEC SDS_HOTFIND_D 0x0400 SDS-infrared.com Hotfind-D Infrared Camera product ONSPEC MDCFE_B_CF_READER 0xa000 MDCFE-B USB CF Reader product ONSPEC CFMS_RW 0xa001 SIIG/Datafab Memory Stick+CF Reader/Writer product ONSPEC READER 0xa003 Datafab-based Reader product ONSPEC CFSM_READER 0xa005 PNY/Datafab CF+SM Reader product ONSPEC CFSM_READER2 0xa006 Simple Tech/Datafab CF+SM Reader product ONSPEC MDSM_B_READER 0xa103 MDSM-B reader product ONSPEC CFSM_COMBO 0xa109 USB to CF + SM Combo (LC1) product ONSPEC UCF100 0xa400 FlashLink UCF-100 CompactFlash Reader product ONSPEC2 IMAGEMATE_SDDR55 0xa103 ImageMate SDDR55 /* Option products */ product OPTION VODAFONEMC3G 0x5000 Vodafone Mobile Connect 3G datacard product OPTION GT3G 0x6000 GlobeTrotter 3G datacard product OPTION GT3GQUAD 0x6300 GlobeTrotter 3G QUAD datacard product OPTION GT3GPLUS 0x6600 GlobeTrotter 3G+ datacard product OPTION GTICON322 0xd033 GlobeTrotter Icon322 storage product OPTION GTMAX36 0x6701 GlobeTrotter Max 3.6 Modem product OPTION GTMAX72 0x6711 GlobeTrotter Max 7.2 HSDPA product OPTION GTHSDPA 0x6971 GlobeTrotter HSDPA product OPTION GTMAXHSUPA 0x7001 GlobeTrotter HSUPA product OPTION GTMAXHSUPAE 0x6901 GlobeTrotter HSUPA PCIe product OPTION GTMAX380HSUPAE 0x7211 GlobeTrotter 380HSUPA PCIe product OPTION GT3G_1 0x6050 3G modem product OPTION GT3G_2 0x6100 3G modem product OPTION GT3G_3 0x6150 3G modem product OPTION GT3G_4 0x6200 3G modem product OPTION GT3G_5 0x6250 3G modem product OPTION GT3G_6 0x6350 3G modem product OPTION E6500 0x6500 3G modem product OPTION E6501 0x6501 3G modem product OPTION E6601 0x6601 3G modem product OPTION E6721 0x6721 3G modem product OPTION E6741 0x6741 3G modem product OPTION E6761 0x6761 3G modem product OPTION E6800 0x6800 3G modem product OPTION E7021 0x7021 3G modem product OPTION E7041 0x7041 3G modem product OPTION E7061 0x7061 3G modem product OPTION E7100 0x7100 3G modem product OPTION GTM380 0x7201 3G modem product OPTION GE40X 0x7601 Globetrotter HSUPA product OPTION GSICON72 0x6911 GlobeSurfer iCON product OPTION GSICONHSUPA 0x7251 Globetrotter HSUPA product OPTION ICON401 0x7401 GlobeSurfer iCON 401 product OPTION GTHSUPA 0x7011 Globetrotter HSUPA product OPTION GMT382 0x7501 Globetrotter HSUPA product OPTION GE40X_1 0x7301 Globetrotter HSUPA product OPTION GE40X_2 0x7361 Globetrotter HSUPA product OPTION GE40X_3 0x7381 Globetrotter HSUPA product OPTION GTM661W 0x9000 GTM661W product OPTION ICONEDGE 0xc031 GlobeSurfer iCON EDGE product OPTION MODHSXPA 0xd013 Globetrotter HSUPA product OPTION ICON321 0xd031 Globetrotter HSUPA product OPTION ICON505 0xd055 Globetrotter iCON 505 product OPTION ICON452 0x7901 Globetrotter iCON 452 /* Optoelectronics Co., Ltd */ product OPTO BARCODE 0x0001 Barcode Reader product OPTO OPTICONCODE 0x0009 Opticon Code Reader product OPTO BARCODE_1 0xa002 Barcode Reader product OPTO CRD7734 0xc000 USB Cradle CRD-7734-RU product OPTO CRD7734_1 0xc001 USB Cradle CRD-7734-RU /* OvisLink product */ product OVISLINK RT3072 0x3072 RT3072 /* OQO */ product OQO WIFI01 0x0002 model 01 WiFi interface product OQO BT01 0x0003 model 01 Bluetooth interface product OQO ETHER01PLUS 0x7720 model 01+ Ethernet product OQO ETHER01 0x8150 model 01 Ethernet interface /* Ours Technology Inc. */ product OTI DKU5 0x6858 DKU-5 Serial /* Owen.ru products */ product OWEN AC4 0x0004 AC4 USB-RS485 converter /* OWL producs */ product OWL CM_160 0xca05 OWL CM-160 power monitor /* Palm Computing, Inc. product */ product PALM SERIAL 0x0080 USB Serial product PALM M500 0x0001 Palm m500 product PALM M505 0x0002 Palm m505 product PALM M515 0x0003 Palm m515 product PALM I705 0x0020 Palm i705 product PALM TUNGSTEN_Z 0x0031 Palm Tungsten Z product PALM M125 0x0040 Palm m125 product PALM M130 0x0050 Palm m130 product PALM TUNGSTEN_T 0x0060 Palm Tungsten T product PALM ZIRE31 0x0061 Palm Zire 31 product PALM ZIRE 0x0070 Palm Zire /* Panasonic products */ product PANASONIC LS120CAM 0x0901 LS-120 Camera product PANASONIC KXL840AN 0x0d01 CD-R Drive KXL-840AN product PANASONIC KXLRW32AN 0x0d09 CD-R Drive KXL-RW32AN product PANASONIC KXLCB20AN 0x0d0a CD-R Drive KXL-CB20AN product PANASONIC KXLCB35AN 0x0d0e DVD-ROM & CD-R/RW product PANASONIC SDCAAE 0x1b00 MultiMediaCard product PANASONIC TYTP50P6S 0x3900 TY-TP50P6-S 50in Touch Panel /* Papouch products */ product PAPOUCH AD4USB 0x8003 FTDI compatible adapter product PAPOUCH AP485 0x0101 FTDI compatible adapter product PAPOUCH AP485_2 0x0104 FTDI compatible adapter product PAPOUCH DRAK5 0x0700 FTDI compatible adapter product PAPOUCH DRAK6 0x1000 FTDI compatible adapter product PAPOUCH GMSR 0x8005 FTDI compatible adapter product PAPOUCH GMUX 0x8004 FTDI compatible adapter product PAPOUCH IRAMP 0x0500 FTDI compatible adapter product PAPOUCH LEC 0x0300 FTDI compatible adapter product PAPOUCH MU 0x8001 FTDI compatible adapter product PAPOUCH QUIDO10X1 0x0b00 FTDI compatible adapter product PAPOUCH QUIDO2X16 0x0e00 FTDI compatible adapter product PAPOUCH QUIDO2X2 0x0a00 FTDI compatible adapter product PAPOUCH QUIDO30X3 0x0c00 FTDI compatible adapter product PAPOUCH QUIDO3X32 0x0f00 FTDI compatible adapter product PAPOUCH QUIDO4X4 0x0900 FTDI compatible adapter product PAPOUCH QUIDO60X3 0x0d00 FTDI compatible adapter product PAPOUCH QUIDO8X8 0x0800 FTDI compatible adapter product PAPOUCH SB232 0x0301 FTDI compatible adapter product PAPOUCH SB422 0x0102 FTDI compatible adapter product PAPOUCH SB422_2 0x0105 FTDI compatible adapter product PAPOUCH SB485 0x0100 FTDI compatible adapter product PAPOUCH SB485C 0x0107 FTDI compatible adapter product PAPOUCH SB485S 0x0106 FTDI compatible adapter product PAPOUCH SB485_2 0x0103 FTDI compatible adapter product PAPOUCH SIMUKEY 0x8002 FTDI compatible adapter product PAPOUCH TMU 0x0400 FTDI compatible adapter product PAPOUCH UPSUSB 0x8000 FTDI compatible adapter /* PARA Industrial products */ product PARA RT3070 0x8888 RT3070 /* Simtec Electronics products */ product SIMTEC ENTROPYKEY 0x0001 Entropy Key /* Pegatron products */ product PEGATRON RT2870 0x0002 RT2870 product PEGATRON RT3070 0x000c RT3070 product PEGATRON RT3070_2 0x000e RT3070 product PEGATRON RT3070_3 0x0010 RT3070 /* Peracom products */ product PERACOM SERIAL1 0x0001 Serial product PERACOM ENET 0x0002 Ethernet product PERACOM ENET3 0x0003 At Home Ethernet product PERACOM ENET2 0x0005 Ethernet /* Philips products */ product PHILIPS DSS350 0x0101 DSS 350 Digital Speaker System product PHILIPS DSS 0x0104 DSS XXX Digital Speaker System product PHILIPS HUB 0x0201 hub product PHILIPS PCA646VC 0x0303 PCA646VC PC Camera product PHILIPS PCVC680K 0x0308 PCVC680K Vesta Pro PC Camera product PHILIPS DSS150 0x0471 DSS 150 Digital Speaker System product PHILIPS ACE1001 0x066a AKTAKOM ACE-1001 cable product PHILIPS SPE3030CC 0x083a USB 2.0 External Disk product PHILIPS SNU5600 0x1236 SNU5600 product PHILIPS UM10016 0x1552 ISP 1581 Hi-Speed USB MPEG2 Encoder Reference Kit product PHILIPS DIVAUSB 0x1801 DIVA USB mp3 player product PHILIPS RT2870 0x200f RT2870 /* Philips Semiconductor products */ product PHILIPSSEMI HUB1122 0x1122 HUB /* Megatec */ product MEGATEC UPS 0x5161 Phoenixtec protocol based UPS /* P.I. Engineering products */ product PIENGINEERING PS2USB 0x020b PS2 to Mac USB Adapter /* Planex Communications products */ product PLANEX GW_US11H 0x14ea GW-US11H WLAN product PLANEX2 RTL8188CUS 0x1201 RTL8188CUS product PLANEX2 GW_US11S 0x3220 GW-US11S WLAN product PLANEX2 GW_US54GXS 0x5303 GW-US54GXS WLAN product PLANEX2 GW_US300 0x5304 GW-US300 product PLANEX2 RTL8188CU_1 0xab2a RTL8188CU product PLANEX2 RTL8188CU_2 0xed17 RTL8188CU product PLANEX2 RTL8188CU_3 0x4902 RTL8188CU product PLANEX2 RTL8188CU_4 0xab2e RTL8188CU product PLANEX2 RTL8192CU 0xab2b RTL8192CU product PLANEX2 GWUS54HP 0xab01 GW-US54HP product PLANEX2 GWUS300MINIS 0xab24 GW-US300MiniS product PLANEX2 RT3070 0xab25 RT3070 product PLANEX2 MZKUE150N 0xab2f MZK-UE150N product PLANEX2 GWUS54MINI2 0xab50 GW-US54Mini2 product PLANEX2 GWUS54SG 0xc002 GW-US54SG product PLANEX2 GWUS54GZL 0xc007 GW-US54GZL product PLANEX2 GWUS54GD 0xed01 GW-US54GD product PLANEX2 GWUSMM 0xed02 GW-USMM product PLANEX2 RT2870 0xed06 RT2870 product PLANEX2 GWUSMICRON 0xed14 GW-USMicroN product PLANEX2 GWUSVALUEEZ 0xed17 GW-USValue-EZ product PLANEX3 GWUS54GZ 0xab10 GW-US54GZ product PLANEX3 GU1000T 0xab11 GU-1000T product PLANEX3 GWUS54MINI 0xab13 GW-US54Mini product PLANEX2 GWUSNANO 0xab28 GW-USNano /* Plextor Corp. */ product PLEXTOR 40_12_40U 0x0011 PlexWriter 40/12/40U /* PLX products */ product PLX TESTBOARD 0x9060 test board product PLX CA42 0xac70 CA-42 /* PNY products */ product PNY ATTACHE2 0x0010 USB 2.0 Flash Drive /* PortGear products */ product PORTGEAR EA8 0x0008 Ethernet product PORTGEAR EA9 0x0009 Ethernet /* Portsmith products */ product PORTSMITH EEA 0x3003 Express Ethernet /* Posiflex products */ product POSIFLEX PP7000 0x0300 FTDI compatible adapter /* Primax products */ product PRIMAX G2X300 0x0300 G2-200 scanner product PRIMAX G2E300 0x0301 G2E-300 scanner product PRIMAX G2300 0x0302 G2-300 scanner product PRIMAX G2E3002 0x0303 G2E-300 scanner product PRIMAX 9600 0x0340 Colorado USB 9600 scanner product PRIMAX 600U 0x0341 Colorado 600u scanner product PRIMAX 6200 0x0345 Visioneer 6200 scanner product PRIMAX 19200 0x0360 Colorado USB 19200 scanner product PRIMAX 1200U 0x0361 Colorado 1200u scanner product PRIMAX G600 0x0380 G2-600 scanner product PRIMAX 636I 0x0381 ReadyScan 636i product PRIMAX G2600 0x0382 G2-600 scanner product PRIMAX G2E600 0x0383 G2E-600 scanner product PRIMAX COMFORT 0x4d01 Comfort product PRIMAX MOUSEINABOX 0x4d02 Mouse-in-a-Box product PRIMAX PCGAUMS1 0x4d04 Sony PCGA-UMS1 product PRIMAX HP_RH304AA 0x4d17 HP RH304AA mouse /* Prolific products */ product PROLIFIC PL2301 0x0000 PL2301 Host-Host interface product PROLIFIC PL2302 0x0001 PL2302 Host-Host interface product PROLIFIC MOTOROLA 0x0307 Motorola Cable product PROLIFIC RSAQ2 0x04bb PL2303 Serial (IODATA USB-RSAQ2) product PROLIFIC ALLTRONIX_GPRS 0x0609 Alltronix ACM003U00 modem product PROLIFIC ALDIGA_AL11U 0x0611 AlDiga AL-11U modem product PROLIFIC MICROMAX_610U 0x0612 Micromax 610U product PROLIFIC DCU11 0x1234 DCU-11 Phone Cable product PROLIFIC UIC_MSR206 0x206a UIC MSR206 Card Reader product PROLIFIC PL2303 0x2303 PL2303 Serial (ATEN/IOGEAR UC232A) product PROLIFIC PL2305 0x2305 Parallel printer product PROLIFIC ATAPI4 0x2307 ATAPI-4 Controller product PROLIFIC PL2501 0x2501 PL2501 Host-Host interface product PROLIFIC PL2506 0x2506 PL2506 USB to IDE Bridge product PROLIFIC HCR331 0x331a HCR331 Hybrid Card Reader product PROLIFIC PHAROS 0xaaa0 Prolific Pharos product PROLIFIC RSAQ3 0xaaa2 PL2303 Serial Adapter (IODATA USB-RSAQ3) product PROLIFIC2 PL2303 0x2303 PL2303 Serial Adapter /* Putercom products */ product PUTERCOM UPA100 0x047e USB-1284 BRIDGE /* Qcom products */ product QCOM RT2573 0x6196 RT2573 product QCOM RT2573_2 0x6229 RT2573 product QCOM RT2573_3 0x6238 RT2573 product QCOM RT2870 0x6259 RT2870 /* QI-hardware */ product QIHARDWARE JTAGSERIAL 0x0713 FTDI compatible adapter /* Qisda products */ product QISDA H21_1 0x4512 3G modem product QISDA H21_2 0x4523 3G modem product QISDA H20_1 0x4515 3G modem product QISDA H20_2 0x4519 3G modem /* Qualcomm products */ product QUALCOMM CDMA_MSM 0x6000 CDMA Technologies MSM phone product QUALCOMM NTT_L02C_MODEM 0x618f NTT DOCOMO L-02C product QUALCOMM NTT_L02C_STORAGE 0x61dd NTT DOCOMO L-02C product QUALCOMM2 MF330 0x6613 MF330 product QUALCOMM2 RWT_FCT 0x3100 RWT FCT-CDMA 2000 1xRTT modem product QUALCOMM2 CDMA_MSM 0x3196 CDMA Technologies MSM modem product QUALCOMM2 AC8700 0x6000 AC8700 product QUALCOMM2 VW110L 0x1000 Vertex Wireless 110L modem product QUALCOMM2 SIM5218 0x9000 SIM5218 product QUALCOMM2 WM620 0x9002 Neoway WM620 product QUALCOMM2 GOBI2000_QDL 0x9204 Qualcomm Gobi 2000 QDL product QUALCOMM2 GOBI2000 0x9205 Qualcomm Gobi 2000 modem product QUALCOMM2 VT80N 0x6500 Venus VT80N product QUALCOMM3 VFAST2 0x9909 Venus Fast2 modem product QUALCOMMINC CDMA_MSM 0x0001 CDMA Technologies MSM modem product QUALCOMMINC E0002 0x0002 3G modem product QUALCOMMINC E0003 0x0003 3G modem product QUALCOMMINC E0004 0x0004 3G modem product QUALCOMMINC E0005 0x0005 3G modem product QUALCOMMINC E0006 0x0006 3G modem product QUALCOMMINC E0007 0x0007 3G modem product QUALCOMMINC E0008 0x0008 3G modem product QUALCOMMINC E0009 0x0009 3G modem product QUALCOMMINC E000A 0x000a 3G modem product QUALCOMMINC E000B 0x000b 3G modem product QUALCOMMINC E000C 0x000c 3G modem product QUALCOMMINC E000D 0x000d 3G modem product QUALCOMMINC E000E 0x000e 3G modem product QUALCOMMINC E000F 0x000f 3G modem product QUALCOMMINC E0010 0x0010 3G modem product QUALCOMMINC E0011 0x0011 3G modem product QUALCOMMINC E0012 0x0012 3G modem product QUALCOMMINC E0013 0x0013 3G modem product QUALCOMMINC E0014 0x0014 3G modem product QUALCOMMINC MF628 0x0015 3G modem product QUALCOMMINC MF633R 0x0016 ZTE WCDMA modem product QUALCOMMINC E0017 0x0017 3G modem product QUALCOMMINC E0018 0x0018 3G modem product QUALCOMMINC E0019 0x0019 3G modem product QUALCOMMINC E0020 0x0020 3G modem product QUALCOMMINC E0021 0x0021 3G modem product QUALCOMMINC E0022 0x0022 3G modem product QUALCOMMINC E0023 0x0023 3G modem product QUALCOMMINC E0024 0x0024 3G modem product QUALCOMMINC E0025 0x0025 3G modem product QUALCOMMINC E0026 0x0026 3G modem product QUALCOMMINC E0027 0x0027 3G modem product QUALCOMMINC E0028 0x0028 3G modem product QUALCOMMINC E0029 0x0029 3G modem product QUALCOMMINC E0030 0x0030 3G modem product QUALCOMMINC MF626 0x0031 3G modem product QUALCOMMINC E0032 0x0032 3G modem product QUALCOMMINC E0033 0x0033 3G modem product QUALCOMMINC E0037 0x0037 3G modem product QUALCOMMINC E0039 0x0039 3G modem product QUALCOMMINC E0042 0x0042 3G modem product QUALCOMMINC E0043 0x0043 3G modem product QUALCOMMINC E0048 0x0048 3G modem product QUALCOMMINC E0049 0x0049 3G modem product QUALCOMMINC E0051 0x0051 3G modem product QUALCOMMINC E0052 0x0052 3G modem product QUALCOMMINC ZTE_STOR2 0x0053 USB ZTE Storage product QUALCOMMINC E0054 0x0054 3G modem product QUALCOMMINC E0055 0x0055 3G modem product QUALCOMMINC E0057 0x0057 3G modem product QUALCOMMINC E0058 0x0058 3G modem product QUALCOMMINC E0059 0x0059 3G modem product QUALCOMMINC E0060 0x0060 3G modem product QUALCOMMINC E0061 0x0061 3G modem product QUALCOMMINC E0062 0x0062 3G modem product QUALCOMMINC E0063 0x0063 3G modem product QUALCOMMINC E0064 0x0064 3G modem product QUALCOMMINC E0066 0x0066 3G modem product QUALCOMMINC E0069 0x0069 3G modem product QUALCOMMINC E0070 0x0070 3G modem product QUALCOMMINC E0073 0x0073 3G modem product QUALCOMMINC E0076 0x0076 3G modem product QUALCOMMINC E0078 0x0078 3G modem product QUALCOMMINC E0082 0x0082 3G modem product QUALCOMMINC E0086 0x0086 3G modem product QUALCOMMINC MF112 0x0103 3G modem product QUALCOMMINC SURFSTICK 0x0117 1&1 Surf Stick product QUALCOMMINC K3772_Z_INIT 0x1179 K3772-Z Initial product QUALCOMMINC K3772_Z 0x1181 K3772-Z product QUALCOMMINC ZTE_MF730M 0x1420 3G modem product QUALCOMMINC MF195E_INIT 0x1514 MF195E initial product QUALCOMMINC MF195E 0x1516 MF195E product QUALCOMMINC ZTE_STOR 0x2000 USB ZTE Storage product QUALCOMMINC E2002 0x2002 3G modem product QUALCOMMINC E2003 0x2003 3G modem product QUALCOMMINC AC682 0xffdd CDMA 1xEVDO USB modem product QUALCOMMINC AC682_INIT 0xffde CDMA 1xEVDO USB modem (initial) product QUALCOMMINC AC8710 0xfff1 3G modem product QUALCOMMINC AC2726 0xfff5 3G modem product QUALCOMMINC AC8700 0xfffe CDMA 1xEVDO USB modem /* Quanta products */ product QUANTA RW6815_1 0x00ce HP iPAQ rw6815 product QUANTA RT3070 0x0304 RT3070 product QUANTA Q101_STOR 0x1000 USB Q101 Storage product QUANTA Q101 0xea02 HSDPA modem product QUANTA Q111 0xea03 HSDPA modem product QUANTA GLX 0xea04 HSDPA modem product QUANTA GKE 0xea05 HSDPA modem product QUANTA GLE 0xea06 HSDPA modem product QUANTA RW6815R 0xf003 HP iPAQ rw6815 RNDIS /* Qtronix products */ product QTRONIX 980N 0x2011 Scorpion-980N keyboard /* Quickshot products */ product QUICKSHOT STRIKEPAD 0x6238 USB StrikePad /* Radio Shack */ product RADIOSHACK USBCABLE 0x4026 USB to Serial Cable /* Rainbow Technologies products */ product RAINBOW IKEY2000 0x1200 i-Key 2000 /* Ralink Technology products */ product RALINK RT2570 0x1706 RT2500USB Wireless Adapter product RALINK RT2070 0x2070 RT2070 product RALINK RT2570_2 0x2570 RT2500USB Wireless Adapter product RALINK RT2573 0x2573 RT2501USB Wireless Adapter product RALINK RT2671 0x2671 RT2601USB Wireless Adapter product RALINK RT2770 0x2770 RT2770 product RALINK RT2870 0x2870 RT2870 product RALINK RT_STOR 0x2878 USB Storage product RALINK RT3070 0x3070 RT3070 product RALINK RT3071 0x3071 RT3071 product RALINK RT3072 0x3072 RT3072 product RALINK RT3370 0x3370 RT3370 product RALINK RT3572 0x3572 RT3572 product RALINK RT3573 0x3573 RT3573 product RALINK RT5370 0x5370 RT5370 product RALINK RT5572 0x5572 RT5572 product RALINK RT8070 0x8070 RT8070 product RALINK RT2570_3 0x9020 RT2500USB Wireless Adapter product RALINK RT2573_2 0x9021 RT2501USB Wireless Adapter /* RATOC Systems products */ product RATOC REXUSB60 0xb000 USB serial adapter REX-USB60 product RATOC REXUSB60F 0xb020 USB serial adapter REX-USB60F /* Realtek products */ /* Green House and CompUSA OEM this part */ product REALTEK DUMMY 0x0000 Dummy product product REALTEK USB20CRW 0x0158 USB20CRW Card Reader product REALTEK RTL8188ETV 0x0179 RTL8188ETV product REALTEK RTL8188CTV 0x018a RTL8188CTV product REALTEK USBKR100 0x8150 USBKR100 USB Ethernet product REALTEK RTL8152 0x8152 RTL8152 USB Ethernet product REALTEK RTL8153 0x8153 RTL8153 USB Ethernet product REALTEK RTL8188CE_0 0x8170 RTL8188CE product REALTEK RTL8171 0x8171 RTL8171 product REALTEK RTL8172 0x8172 RTL8172 product REALTEK RTL8173 0x8173 RTL8173 product REALTEK RTL8174 0x8174 RTL8174 product REALTEK RTL8188CU_0 0x8176 RTL8188CU product REALTEK RTL8188EU 0x8179 RTL8188EU product REALTEK RTL8188CE_1 0x817e RTL8188CE product REALTEK RTL8188CU_1 0x817a RTL8188CU product REALTEK RTL8188CU_2 0x817b RTL8188CU product REALTEK RTL8187 0x8187 RTL8187 Wireless Adapter product REALTEK RTL8187B_0 0x8189 RTL8187B Wireless Adapter product REALTEK RTL8188CU_3 0x8191 RTL8188CU product REALTEK RTL8196EU 0x8196 RTL8196EU product REALTEK RTL8187B_1 0x8197 RTL8187B Wireless Adapter product REALTEK RTL8187B_2 0x8198 RTL8187B Wireless Adapter product REALTEK RTL8188CUS 0x818a RTL8188CUS product REALTEK RTL8188CU_COMBO 0x8754 RTL8188CU product REALTEK RTL8191CU 0x8177 RTL8191CU product REALTEK RTL8192CU 0x8178 RTL8192CU product REALTEK RTL8192CE 0x817c RTL8192CE product REALTEK RTL8188RU_1 0x817d RTL8188RU product REALTEK RTL8188RU_3 0x817f RTL8188RU product REALTEK RTL8712 0x8712 RTL8712 product REALTEK RTL8713 0x8712 RTL8713 product REALTEK RTL8188RU_2 0x317f RTL8188RU product REALTEK RTL8192SU 0xc512 RTL8192SU /* RedOctane products */ product REDOCTANE DUMMY 0x0000 Dummy product product REDOCTANE GHMIDI 0x474b GH MIDI INTERFACE /* Renesas products */ product RENESAS RX610 0x0053 RX610 RX-Stick /* Ricoh products */ product RICOH VGPVCC2 0x1830 VGP-VCC2 Camera product RICOH VGPVCC3 0x1832 VGP-VCC3 Camera product RICOH VGPVCC2_2 0x1833 VGP-VCC2 Camera product RICOH VGPVCC2_3 0x1834 VGP-VCC2 Camera product RICOH VGPVCC7 0x183a VGP-VCC7 Camera product RICOH VGPVCC8 0x183b VGP-VCC8 Camera /* Reiner-SCT products */ product REINERSCT CYBERJACK_ECOM 0x0100 e-com cyberJack /* Roland products */ product ROLAND UA100 0x0000 UA-100 Audio I/F product ROLAND UM4 0x0002 UM-4 MIDI I/F product ROLAND SC8850 0x0003 SC-8850 MIDI Synth product ROLAND U8 0x0004 U-8 Audio I/F product ROLAND UM2 0x0005 UM-2 MIDI I/F product ROLAND SC8820 0x0007 SC-8820 MIDI Synth product ROLAND PC300 0x0008 PC-300 MIDI Keyboard product ROLAND UM1 0x0009 UM-1 MIDI I/F product ROLAND SK500 0x000b SK-500 MIDI Keyboard product ROLAND SCD70 0x000c SC-D70 MIDI Synth product ROLAND UM880N 0x0014 EDIROL UM-880 MIDI I/F (native) product ROLAND UM880G 0x0015 EDIROL UM-880 MIDI I/F (generic) product ROLAND SD90 0x0016 SD-90 MIDI Synth product ROLAND UM550 0x0023 UM-550 MIDI I/F product ROLAND SD20 0x0027 SD-20 MIDI Synth product ROLAND SD80 0x0029 SD-80 MIDI Synth product ROLAND UA700 0x002b UA-700 Audio I/F /* Rockfire products */ product ROCKFIRE GAMEPAD 0x2033 gamepad 203USB /* RATOC Systems products */ product RATOC REXUSB60 0xb000 REX-USB60 product RATOC REXUSB60F 0xb020 REX-USB60F /* RT system products */ product RTSYSTEMS CT29B 0x9e54 FTDI compatible adapter product RTSYSTEMS SERIAL_VX7 0x9e52 FTDI compatible adapter /* Sagem products */ product SAGEM USBSERIAL 0x0027 USB-Serial Controller product SAGEM XG760A 0x004a XG-760A product SAGEM XG76NA 0x0062 XG-76NA /* Samsung products */ product SAMSUNG WIS09ABGN 0x2018 WIS09ABGN Wireless LAN adapter product SAMSUNG ML6060 0x3008 ML-6060 laser printer product SAMSUNG YP_U2 0x5050 YP-U2 MP3 Player product SAMSUNG YP_U4 0x5092 YP-U4 MP3 Player product SAMSUNG I500 0x6601 I500 Palm USB Phone product SAMSUNG I330 0x8001 I330 phone cradle product SAMSUNG2 RT2870_1 0x2018 RT2870 /* Samsung Techwin products */ product SAMSUNG_TECHWIN DIGIMAX_410 0x000a Digimax 410 /* SanDisk products */ product SANDISK SDDR05A 0x0001 ImageMate SDDR-05a product SANDISK SDDR31 0x0002 ImageMate SDDR-31 product SANDISK SDDR05 0x0005 ImageMate SDDR-05 product SANDISK SDDR12 0x0100 ImageMate SDDR-12 product SANDISK SDDR09 0x0200 ImageMate SDDR-09 product SANDISK SDDR75 0x0810 ImageMate SDDR-75 product SANDISK SDCZ2_128 0x7100 Cruzer Mini 128MB product SANDISK SDCZ2_256 0x7104 Cruzer Mini 256MB product SANDISK SDCZ4_128 0x7112 Cruzer Micro 128MB product SANDISK SDCZ4_256 0x7113 Cruzer Micro 256MB product SANDISK IMAGEMATE_SDDR289 0xb6ba ImageMate SDDR-289 /* Sanwa Electric Instrument Co., Ltd. products */ product SANWA KB_USB2 0x0701 KB-USB2 multimeter cable /* Sanyo Electric products */ product SANYO SCP4900 0x0701 Sanyo SCP-4900 USB Phone /* ScanLogic products */ product SCANLOGIC SL11R 0x0002 SL11R IDE Adapter product SCANLOGIC 336CX 0x0300 Phantom 336CX - C3 scanner /* Schweitzer Engineering Laboratories products */ product SEL C662 0x0001 C662 Cable /* Sealevel products */ product SEALEVEL 2101 0x2101 FTDI compatible adapter product SEALEVEL 2102 0x2102 FTDI compatible adapter product SEALEVEL 2103 0x2103 FTDI compatible adapter product SEALEVEL 2104 0x2104 FTDI compatible adapter product SEALEVEL 2106 0x9020 FTDI compatible adapter product SEALEVEL 2201_1 0x2211 FTDI compatible adapter product SEALEVEL 2201_2 0x2221 FTDI compatible adapter product SEALEVEL 2202_1 0x2212 FTDI compatible adapter product SEALEVEL 2202_2 0x2222 FTDI compatible adapter product SEALEVEL 2203_1 0x2213 FTDI compatible adapter product SEALEVEL 2203_2 0x2223 FTDI compatible adapter product SEALEVEL 2401_1 0x2411 FTDI compatible adapter product SEALEVEL 2401_2 0x2421 FTDI compatible adapter product SEALEVEL 2401_3 0x2431 FTDI compatible adapter product SEALEVEL 2401_4 0x2441 FTDI compatible adapter product SEALEVEL 2402_1 0x2412 FTDI compatible adapter product SEALEVEL 2402_2 0x2422 FTDI compatible adapter product SEALEVEL 2402_3 0x2432 FTDI compatible adapter product SEALEVEL 2402_4 0x2442 FTDI compatible adapter product SEALEVEL 2403_1 0x2413 FTDI compatible adapter product SEALEVEL 2403_2 0x2423 FTDI compatible adapter product SEALEVEL 2403_3 0x2433 FTDI compatible adapter product SEALEVEL 2403_4 0x2443 FTDI compatible adapter product SEALEVEL 2801_1 0x2811 FTDI compatible adapter product SEALEVEL 2801_2 0x2821 FTDI compatible adapter product SEALEVEL 2801_3 0x2831 FTDI compatible adapter product SEALEVEL 2801_4 0x2841 FTDI compatible adapter product SEALEVEL 2801_5 0x2851 FTDI compatible adapter product SEALEVEL 2801_6 0x2861 FTDI compatible adapter product SEALEVEL 2801_7 0x2871 FTDI compatible adapter product SEALEVEL 2801_8 0x2881 FTDI compatible adapter product SEALEVEL 2802_1 0x2812 FTDI compatible adapter product SEALEVEL 2802_2 0x2822 FTDI compatible adapter product SEALEVEL 2802_3 0x2832 FTDI compatible adapter product SEALEVEL 2802_4 0x2842 FTDI compatible adapter product SEALEVEL 2802_5 0x2852 FTDI compatible adapter product SEALEVEL 2802_6 0x2862 FTDI compatible adapter product SEALEVEL 2802_7 0x2872 FTDI compatible adapter product SEALEVEL 2802_8 0x2882 FTDI compatible adapter product SEALEVEL 2803_1 0x2813 FTDI compatible adapter product SEALEVEL 2803_2 0x2823 FTDI compatible adapter product SEALEVEL 2803_3 0x2833 FTDI compatible adapter product SEALEVEL 2803_4 0x2843 FTDI compatible adapter product SEALEVEL 2803_5 0x2853 FTDI compatible adapter product SEALEVEL 2803_6 0x2863 FTDI compatible adapter product SEALEVEL 2803_7 0x2873 FTDI compatible adapter product SEALEVEL 2803_8 0x2883 FTDI compatible adapter /* Senao products */ product SENAO RT2870_3 0x0605 RT2870 product SENAO RT2870_4 0x0615 RT2870 product SENAO NUB8301 0x2000 NUB-8301 product SENAO RT2870_1 0x9701 RT2870 product SENAO RT2870_2 0x9702 RT2870 product SENAO RT3070 0x9703 RT3070 product SENAO RT3071 0x9705 RT3071 product SENAO RT3072_1 0x9706 RT3072 product SENAO RT3072_2 0x9707 RT3072 product SENAO RT3072_3 0x9708 RT3072 product SENAO RT3072_4 0x9709 RT3072 product SENAO RT3072_5 0x9801 RT3072 product SENAO RTL8192SU_1 0x9603 RTL8192SU product SENAO RTL8192SU_2 0x9605 RTL8192SU /* ShanTou products */ product SHANTOU ST268 0x0268 ST268 product SHANTOU DM9601 0x9601 DM 9601 product SHANTOU ADM8515 0x8515 ADM8515 /* Shark products */ product SHARK PA 0x0400 Pocket Adapter /* Sharp products */ product SHARP SL5500 0x8004 Zaurus SL-5500 PDA product SHARP SLA300 0x8005 Zaurus SL-A300 PDA product SHARP SL5600 0x8006 Zaurus SL-5600 PDA product SHARP SLC700 0x8007 Zaurus SL-C700 PDA product SHARP SLC750 0x9031 Zaurus SL-C750 PDA product SHARP WZERO3ES 0x9123 W-ZERO3 ES Smartphone product SHARP WZERO3ADES 0x91ac Advanced W-ZERO3 ES Smartphone product SHARP WILLCOM03 0x9242 WILLCOM03 /* Shuttle Technology products */ product SHUTTLE EUSB 0x0001 E-USB Bridge product SHUTTLE EUSCSI 0x0002 eUSCSI Bridge product SHUTTLE SDDR09 0x0003 ImageMate SDDR09 product SHUTTLE EUSBCFSM 0x0005 eUSB SmartMedia / CompactFlash Adapter product SHUTTLE ZIOMMC 0x0006 eUSB MultiMediaCard Adapter product SHUTTLE HIFD 0x0007 Sony Hifd product SHUTTLE EUSBATAPI 0x0009 eUSB ATA/ATAPI Adapter product SHUTTLE CF 0x000a eUSB CompactFlash Adapter product SHUTTLE EUSCSI_B 0x000b eUSCSI Bridge product SHUTTLE EUSCSI_C 0x000c eUSCSI Bridge product SHUTTLE CDRW 0x0101 CD-RW Device product SHUTTLE EUSBORCA 0x0325 eUSB ORCA Quad Reader /* Siemens products */ product SIEMENS SPEEDSTREAM 0x1001 SpeedStream product SIEMENS SPEEDSTREAM22 0x1022 SpeedStream 1022 product SIEMENS2 WLL013 0x001b WLL013 product SIEMENS2 ES75 0x0034 GSM module MC35 product SIEMENS2 WL54G 0x3c06 54g USB Network Adapter product SIEMENS3 SX1 0x0001 SX1 product SIEMENS3 X65 0x0003 X65 product SIEMENS3 X75 0x0004 X75 product SIEMENS3 EF81 0x0005 EF81 /* Sierra Wireless products */ product SIERRA EM5625 0x0017 EM5625 product SIERRA MC5720_2 0x0018 MC5720 product SIERRA MC5725 0x0020 MC5725 product SIERRA AIRCARD580 0x0112 Sierra Wireless AirCard 580 product SIERRA AIRCARD595 0x0019 Sierra Wireless AirCard 595 product SIERRA AC595U 0x0120 Sierra Wireless AirCard 595U product SIERRA AC597E 0x0021 Sierra Wireless AirCard 597E product SIERRA EM5725 0x0022 EM5725 product SIERRA C597 0x0023 Sierra Wireless Compass 597 product SIERRA MC5727 0x0024 MC5727 product SIERRA T598 0x0025 T598 product SIERRA T11 0x0026 T11 product SIERRA AC402 0x0027 AC402 product SIERRA MC5728 0x0028 MC5728 product SIERRA E0029 0x0029 E0029 product SIERRA AIRCARD580 0x0112 Sierra Wireless AirCard 580 product SIERRA AC595U 0x0120 Sierra Wireless AirCard 595U product SIERRA MC5720 0x0218 MC5720 Wireless Modem product SIERRA MINI5725 0x0220 Sierra Wireless miniPCI 5275 product SIERRA MC5727_2 0x0224 MC5727 product SIERRA MC8755_2 0x6802 MC8755 product SIERRA MC8765 0x6803 MC8765 product SIERRA MC8755 0x6804 MC8755 product SIERRA MC8765_2 0x6805 MC8765 product SIERRA MC8755_4 0x6808 MC8755 product SIERRA MC8765_3 0x6809 MC8765 product SIERRA AC875U 0x6812 AC875U HSDPA USB Modem product SIERRA MC8755_3 0x6813 MC8755 HSDPA product SIERRA MC8775_2 0x6815 MC8775 product SIERRA MC8775 0x6816 MC8775 product SIERRA AC875 0x6820 Sierra Wireless AirCard 875 product SIERRA AC875U_2 0x6821 AC875U product SIERRA AC875E 0x6822 AC875E product SIERRA MC8780 0x6832 MC8780 product SIERRA MC8781 0x6833 MC8781 product SIERRA MC8780_2 0x6834 MC8780 product SIERRA MC8781_2 0x6835 MC8781 product SIERRA MC8780_3 0x6838 MC8780 product SIERRA MC8781_3 0x6839 MC8781 product SIERRA MC8785 0x683A MC8785 product SIERRA MC8785_2 0x683B MC8785 product SIERRA MC8790 0x683C MC8790 product SIERRA MC8791 0x683D MC8791 product SIERRA MC8792 0x683E MC8792 product SIERRA AC880 0x6850 Sierra Wireless AirCard 880 product SIERRA AC881 0x6851 Sierra Wireless AirCard 881 product SIERRA AC880E 0x6852 Sierra Wireless AirCard 880E product SIERRA AC881E 0x6853 Sierra Wireless AirCard 881E product SIERRA AC880U 0x6855 Sierra Wireless AirCard 880U product SIERRA AC881U 0x6856 Sierra Wireless AirCard 881U product SIERRA AC885E 0x6859 AC885E product SIERRA AC885E_2 0x685A AC885E product SIERRA AC885U 0x6880 Sierra Wireless AirCard 885U product SIERRA C888 0x6890 C888 product SIERRA C22 0x6891 C22 product SIERRA E6892 0x6892 E6892 product SIERRA E6893 0x6893 E6893 product SIERRA MC8700 0x68A3 MC8700 product SIERRA MC7354 0x68C0 MC7354 product SIERRA MC7355 0x9041 MC7355 product SIERRA AC313U 0x68aa Sierra Wireless AirCard 313U product SIERRA TRUINSTALL 0x0fff Aircard Tru Installer /* Sigmatel products */ product SIGMATEL WBT_3052 0x4200 WBT-3052 IrDA/USB Bridge product SIGMATEL I_BEAD100 0x8008 i-Bead 100 MP3 Player /* SIIG products */ /* Also: Omnidirectional Control Technology products */ product SIIG DIGIFILMREADER 0x0004 DigiFilm-Combo Reader product SIIG WINTERREADER 0x0330 WINTERREADER Reader product SIIG2 DK201 0x0103 FTDI compatible adapter product SIIG2 USBTOETHER 0x0109 USB TO Ethernet product SIIG2 US2308 0x0421 Serial /* Silicom products */ product SILICOM U2E 0x0001 U2E product SILICOM GPE 0x0002 Psion Gold Port Ethernet /* SI Labs */ product SILABS VSTABI 0x0f91 VStabi Controller product SILABS ARKHAM_DS101_M 0x1101 Arkham DS101 Monitor product SILABS ARKHAM_DS101_A 0x1601 Arkham DS101 Adapter product SILABS BSM7DUSB 0x800a SPORTident BSM7-D USB product SILABS POLOLU 0x803b Pololu Serial product SILABS CYGNAL_DEBUG 0x8044 Cygnal Debug Adapter product SILABS SB_PARAMOUNT_ME 0x8043 Software Bisque Paramount ME product SILABS SAEL 0x8053 SA-EL USB product SILABS GSM2228 0x8054 Enfora GSM2228 USB product SILABS ARGUSISP 0x8066 Argussoft ISP product SILABS IMS_USB_RS422 0x806f IMS USB-RS422 product SILABS CRUMB128 0x807a Crumb128 board product SILABS OPTRIS_MSPRO 0x80c4 Optris MSpro LT Thermometer product SILABS DEGREE 0x80ca Degree Controls Inc product SILABS TRACIENT 0x80dd Tracient RFID product SILABS TRAQMATE 0x80ed Track Systems Traqmate product SILABS SUUNTO 0x80f6 Suunto Sports Instrument product SILABS ARYGON_MIFARE 0x8115 Arygon Mifare RFID reader product SILABS BURNSIDE 0x813d Burnside Telecon Deskmobile product SILABS TAMSMASTER 0x813f Tams Master Easy Control product SILABS WMRBATT 0x814a WMR RIGblaster Plug&Play product SILABS WMRRIGBLASTER 0x814a WMR RIGblaster Plug&Play product SILABS WMRRIGTALK 0x814b WMR RIGtalk RT1 product SILABS B_G_H3000 0x8156 B&G H3000 Data Cable product SILABS HELICOM 0x815e Helicomm IP-Link 1220-DVM product SILABS HAMLINKUSB 0x815f Timewave HamLinkUSB product SILABS AVIT_USB_TTL 0x818b AVIT Research USB-TTL product SILABS MJS_TOSLINK 0x819f MJS USB-TOSLINK product SILABS WAVIT 0x81a6 ThinkOptics WavIt product SILABS MULTIPLEX_RC 0x81a9 Multiplex RC adapter product SILABS MSD_DASHHAWK 0x81ac MSD DashHawk product SILABS INSYS_MODEM 0x81ad INSYS Modem product SILABS LIPOWSKY_JTAG 0x81c8 Lipowsky Baby-JTAG product SILABS LIPOWSKY_LIN 0x81e2 Lipowsky Baby-LIN product SILABS AEROCOMM 0x81e7 Aerocomm Radio product SILABS ZEPHYR_BIO 0x81e8 Zephyr Bioharness product SILABS EMS_C1007 0x81f2 EMS C1007 HF RFID controller product SILABS LIPOWSKY_HARP 0x8218 Lipowsky HARP-1 product SILABS C2_EDGE_MODEM 0x822b Commander 2 EDGE(GSM) Modem product SILABS CYGNAL_GPS 0x826b Cygnal Fasttrax GPS product SILABS TELEGESIS_ETRX2 0x8293 Telegesis ETRX2USB product SILABS PROCYON_AVS 0x82f9 Procyon AVS product SILABS MC35PU 0x8341 MC35pu product SILABS CYGNAL 0x8382 Cygnal product SILABS AMBER_AMB2560 0x83a8 Amber Wireless AMB2560 product SILABS DEKTEK_DTAPLUS 0x83d8 DekTec DTA Plus VHF/UHF Booster product SILABS KYOCERA_GPS 0x8411 Kyocera GPS product SILABS IRZ_SG10 0x8418 IRZ SG-10 GSM/GPRS Modem product SILABS BEI_VCP 0x846e BEI USB Sensor (VCP) product SILABS BALLUFF_RFID 0x8477 Balluff RFID reader product SILABS AC_SERV_IBUS 0x85ea AC-Services IBUS Interface product SILABS AC_SERV_CIS 0x85eb AC-Services CIS-IBUS product SILABS V_PREON32 0x85f8 Virtenio Preon32 product SILABS AC_SERV_CAN 0x8664 AC-Services CAN Interface product SILABS AC_SERV_OBD 0x8665 AC-Services OBD Interface product SILABS MMB_ZIGBEE 0x88a4 MMB Networks ZigBee product SILABS INGENI_ZIGBEE 0x88a5 Planet Innovation Ingeni ZigBee product SILABS CP2102 0xea60 SILABS USB UART product SILABS CP210X_2 0xea61 CP210x Serial product SILABS CP210X_3 0xea70 CP210x Serial product SILABS CP210X_4 0xea80 CP210x Serial product SILABS INFINITY_MIC 0xea71 Infinity GPS-MIC-1 Radio Monophone product SILABS USBSCOPE50 0xf001 USBscope50 product SILABS USBWAVE12 0xf002 USBwave12 product SILABS USBPULSE100 0xf003 USBpulse100 product SILABS USBCOUNT50 0xf004 USBcount50 product SILABS2 DCU11CLONE 0xaa26 DCU-11 clone product SILABS3 GPRS_MODEM 0xea61 GPRS Modem product SILABS4 100EU_MODEM 0xea61 GPRS Modem 100EU /* Silicon Portals Inc. */ product SILICONPORTALS YAPPH_NF 0x0200 YAP Phone (no firmware) product SILICONPORTALS YAPPHONE 0x0201 YAP Phone /* Sirius Technologies products */ product SIRIUS ROADSTER 0x0001 NetComm Roadster II 56 USB /* Sitecom products */ product SITECOM LN029 0x182d USB 2.0 Ethernet product SITECOM SERIAL 0x2068 USB to serial cable (v2) product SITECOM2 WL022 0x182d WL-022 /* Sitecom Europe products */ product SITECOMEU RT2870_1 0x0017 RT2870 product SITECOMEU WL168V1 0x000d WL-168 v1 product SITECOMEU LN030 0x0021 MCS7830 product SITECOMEU WL168V4 0x0028 WL-168 v4 product SITECOMEU RT2870_2 0x002b RT2870 product SITECOMEU RT2870_3 0x002c RT2870 product SITECOMEU RT2870_4 0x002d RT2870 product SITECOMEU RT2770 0x0039 RT2770 product SITECOMEU RT3070_2 0x003b RT3070 product SITECOMEU RT3070_3 0x003c RT3070 product SITECOMEU RT3070_4 0x003d RT3070 product SITECOMEU RT3070 0x003e RT3070 product SITECOMEU WL608 0x003f WL-608 product SITECOMEU RT3071 0x0040 RT3071 product SITECOMEU RT3072_1 0x0041 RT3072 product SITECOMEU RT3072_2 0x0042 RT3072 product SITECOMEU WL353 0x0045 WL-353 product SITECOMEU RT3072_3 0x0047 RT3072 product SITECOMEU RT3072_4 0x0048 RT3072 product SITECOMEU RT3072_5 0x004a RT3072 product SITECOMEU WL349V1 0x004b WL-349 v1 product SITECOMEU RT3072_6 0x004d RT3072 product SITECOMEU RTL8188CU_1 0x0052 RTL8188CU product SITECOMEU RTL8188CU_2 0x005c RTL8188CU product SITECOMEU RTL8192CU 0x0061 RTL8192CU product SITECOMEU LN032 0x0072 LN-032 product SITECOMEU LN031 0x0056 LN-031 product SITECOMEU LN028 0x061c LN-028 product SITECOMEU WL113 0x9071 WL-113 product SITECOMEU ZD1211B 0x9075 ZD1211B product SITECOMEU WL172 0x90ac WL-172 product SITECOMEU WL113R2 0x9712 WL-113 rev 2 /* Skanhex Technology products */ product SKANHEX MD_7425 0x410a MD 7425 Camera product SKANHEX SX_520Z 0x5200 SX 520z Camera /* Smart Technologies products */ product SMART PL2303 0x2303 Serial adapter /* SmartBridges products */ product SMARTBRIDGES SMARTLINK 0x0001 SmartLink USB Ethernet product SMARTBRIDGES SMARTNIC 0x0003 smartNIC 2 PnP Ethernet /* SMC products */ product SMC 2102USB 0x0100 10Mbps Ethernet product SMC 2202USB 0x0200 10/100 Ethernet product SMC 2206USB 0x0201 EZ Connect USB Ethernet product SMC 2862WG 0xee13 EZ Connect Wireless Adapter product SMC2 2020HUB 0x2020 USB Hub product SMC2 2514HUB 0x2514 USB Hub product SMC3 2662WUSB 0xa002 2662W-AR Wireless product SMC2 LAN9500_ETH 0x9500 USB/Ethernet product SMC2 LAN9505_ETH 0x9505 USB/Ethernet product SMC2 LAN9530_ETH 0x9530 USB/Ethernet product SMC2 LAN9730_ETH 0x9730 USB/Ethernet product SMC2 LAN9500_SAL10 0x9900 USB/Ethernet product SMC2 LAN9505_SAL10 0x9901 USB/Ethernet product SMC2 LAN9500A_SAL10 0x9902 USB/Ethernet product SMC2 LAN9505A_SAL10 0x9903 USB/Ethernet product SMC2 LAN9514_SAL10 0x9904 USB/Ethernet product SMC2 LAN9500A_HAL 0x9905 USB/Ethernet product SMC2 LAN9505A_HAL 0x9906 USB/Ethernet product SMC2 LAN9500_ETH_2 0x9907 USB/Ethernet product SMC2 LAN9500A_ETH_2 0x9908 USB/Ethernet product SMC2 LAN9514_ETH_2 0x9909 USB/Ethernet product SMC2 LAN9500A_ETH 0x9e00 USB/Ethernet product SMC2 LAN9505A_ETH 0x9e01 USB/Ethernet product SMC2 LAN89530_ETH 0x9e08 USB/Ethernet product SMC2 LAN9514_ETH 0xec00 USB/Ethernet /* SOHOware products */ product SOHOWARE NUB100 0x9100 10/100 USB Ethernet product SOHOWARE NUB110 0x9110 10/100 USB Ethernet /* SOLID YEAR products */ product SOLIDYEAR KEYBOARD 0x2101 Solid Year USB keyboard /* SONY products */ product SONY DSC 0x0010 DSC cameras product SONY MS_NW_MS7 0x0025 Memorystick NW-MS7 product SONY PORTABLE_HDD_V2 0x002b Portable USB Harddrive V2 product SONY MSACUS1 0x002d Memorystick MSAC-US1 product SONY HANDYCAM 0x002e Handycam product SONY MSC 0x0032 MSC memory stick slot product SONY CLIE_35 0x0038 Sony Clie v3.5 product SONY MS_PEG_N760C 0x0058 PEG N760c Memorystick product SONY CLIE_40 0x0066 Sony Clie v4.0 product SONY MS_MSC_U03 0x0069 Memorystick MSC-U03 product SONY CLIE_40_MS 0x006d Sony Clie v4.0 Memory Stick slot product SONY CLIE_S360 0x0095 Sony Clie s360 product SONY CLIE_41_MS 0x0099 Sony Clie v4.1 Memory Stick slot product SONY CLIE_41 0x009a Sony Clie v4.1 product SONY CLIE_NX60 0x00da Sony Clie nx60 product SONY CLIE_TH55 0x0144 Sony Clie th55 product SONY CLIE_TJ37 0x0169 Sony Clie tj37 product SONY RF_RECEIVER 0x01db Sony RF mouse/kbd Receiver VGP-WRC1 product SONY QN3 0x0437 Sony QN3 CMD-Jxx phone cable /* Sony Ericsson products */ product SONYERICSSON DCU10 0x0528 DCU-10 Phone Data Cable product SONYERICSSON DATAPILOT 0x2003 Datapilot Phone Cable /* SOURCENEXT products */ product SOURCENEXT KEIKAI8 0x039f KeikaiDenwa 8 product SOURCENEXT KEIKAI8_CHG 0x012e KeikaiDenwa 8 with charger /* SparkLAN products */ product SPARKLAN RT2573 0x0004 RT2573 product SPARKLAN RT2870_1 0x0006 RT2870 product SPARKLAN RT3070 0x0010 RT3070 /* Soundgraph products */ product SOUNDGRAPH IMON_VFD 0x0044 Antec Veris Elite VFD Panel, Knob, and Remote product SOUNDGRAPH SSTONE_LC16 0xffdc Silverstone LC16 VFD Panel, Knob, and Remote /* Speed Dragon Multimedia products */ product SPEEDDRAGON MS3303H 0x110b MS3303H Serial /* Sphairon Access Systems GmbH products */ product SPHAIRON UB801R 0x0110 UB801R /* Stelera Wireless products */ product STELERA ZEROCD 0x1000 Zerocd Installer product STELERA C105 0x1002 Stelera/Bandrish C105 USB product STELERA E1003 0x1003 3G modem product STELERA E1004 0x1004 3G modem product STELERA E1005 0x1005 3G modem product STELERA E1006 0x1006 3G modem product STELERA E1007 0x1007 3G modem product STELERA E1008 0x1008 3G modem product STELERA E1009 0x1009 3G modem product STELERA E100A 0x100a 3G modem product STELERA E100B 0x100b 3G modem product STELERA E100C 0x100c 3G modem product STELERA E100D 0x100d 3G modem product STELERA E100E 0x100e 3G modem product STELERA E100F 0x100f 3G modem product STELERA E1010 0x1010 3G modem product STELERA E1011 0x1011 3G modem product STELERA E1012 0x1012 3G modem /* STMicroelectronics products */ product STMICRO BIOCPU 0x2016 Biometric Coprocessor product STMICRO COMMUNICATOR 0x7554 USB Communicator product STMICRO ST72682 0xfada USB 2.0 Flash drive controller /* STSN products */ product STSN STSN0001 0x0001 Internet Access Device /* SUN Corporation products */ product SUNTAC DS96L 0x0003 SUNTAC U-Cable type D2 product SUNTAC PS64P1 0x0005 SUNTAC U-Cable type P1 product SUNTAC VS10U 0x0009 SUNTAC Slipper U product SUNTAC IS96U 0x000a SUNTAC Ir-Trinity product SUNTAC AS64LX 0x000b SUNTAC U-Cable type A3 product SUNTAC AS144L4 0x0011 SUNTAC U-Cable type A4 /* Sun Microsystems products */ product SUN KEYBOARD_TYPE_6 0x0005 Type 6 USB keyboard product SUN KEYBOARD_TYPE_7 0x00a2 Type 7 USB keyboard /* XXX The above is a North American PC style keyboard possibly */ product SUN MOUSE 0x0100 Type 6 USB mouse product SUN KBD_HUB 0x100e Kbd Hub /* Sunplus Innovation Technology Inc. products */ product SUNPLUS USBMOUSE 0x0007 USB Optical Mouse /* Super Top products */ product SUPERTOP IDE 0x6600 USB-IDE product SUPERTOP FLASHDRIVE 0x121c extrememory Snippy /* Syntech products */ product SYNTECH CPT8001C 0x0001 CPT-8001C Barcode scanner product SYNTECH CYPHERLAB100 0x1000 CipherLab USB Barcode Scanner /* Teclast products */ product TECLAST TLC300 0x3203 USB Media Player /* Testo products */ product TESTO USB_INTERFACE 0x0001 FTDI compatible adapter /* TexTech products */ product TEXTECH DUMMY 0x0000 Dummy product product TEXTECH U2M_1 0x0101 Textech USB MIDI cable product TEXTECH U2M_2 0x1806 Textech USB MIDI cable /* The Mobility Lab products */ product TML USB_SERIAL 0x0064 FTDI compatible adapter /* Thurlby Thandar Instrument products */ product TTI QL355P 0x03e8 FTDI compatible adapter /* Supra products */ product DIAMOND2 SUPRAEXPRESS56K 0x07da Supra Express 56K modem product DIAMOND2 SUPRA2890 0x0b4a SupraMax 2890 56K Modem product DIAMOND2 RIO600USB 0x5001 Rio 600 USB product DIAMOND2 RIO800USB 0x5002 Rio 800 USB /* Surecom Technology products */ product SURECOM EP9001G2A 0x11f2 EP-9001-G rev 2A product SURECOM RT2570 0x11f3 RT2570 product SURECOM RT2573 0x31f3 RT2573 /* Sweex products */ product SWEEX ZD1211 0x1809 ZD1211 product SWEEX2 LW153 0x0153 LW153 product SWEEX2 LW154 0x0154 LW154 product SWEEX2 LW303 0x0302 LW303 product SWEEX2 LW313 0x0313 LW313 /* System TALKS, Inc. */ product SYSTEMTALKS SGCX2UL 0x1920 SGC-X2UL /* Tapwave products */ product TAPWAVE ZODIAC 0x0100 Zodiac /* Taugagreining products */ product TAUGA CAMERAMATE 0x0005 CameraMate (DPCM_USB) /* TCTMobile products */ product TCTMOBILE X060S 0x0000 X060S 3G modem product TCTMOBILE X080S 0xf000 X080S 3G modem /* TDK products */ product TDK UPA9664 0x0115 USB-PDC Adapter UPA9664 product TDK UCA1464 0x0116 USB-cdmaOne Adapter UCA1464 product TDK UHA6400 0x0117 USB-PHS Adapter UHA6400 product TDK UPA6400 0x0118 USB-PHS Adapter UPA6400 product TDK BT_DONGLE 0x0309 Bluetooth USB dongle /* TEAC products */ product TEAC FD05PUB 0x0000 FD-05PUB floppy /* Tekram Technology products */ product TEKRAM QUICKWLAN 0x1630 QuickWLAN product TEKRAM ZD1211_1 0x5630 ZD1211 product TEKRAM ZD1211_2 0x6630 ZD1211 /* Telex Communications products */ product TELEX MIC1 0x0001 Enhanced USB Microphone /* Telit products */ product TELIT UC864E 0x1003 UC864E 3G modem product TELIT UC864G 0x1004 UC864G 3G modem /* Ten X Technology, Inc. */ product TENX UAUDIO0 0xf211 USB audio headset /* Texas Intel products */ product TI UTUSB41 0x1446 UT-USB41 hub product TI TUSB2046 0x2046 TUSB2046 hub /* Thrustmaster products */ product THRUST FUSION_PAD 0xa0a3 Fusion Digital Gamepad /* TLayTech products */ product TLAYTECH TEU800 0x1682 TEU800 3G modem /* Topre Corporation products */ product TOPRE HHKB 0x0100 HHKB Professional /* Toshiba Corporation products */ product TOSHIBA POCKETPC_E740 0x0706 PocketPC e740 product TOSHIBA RT3070 0x0a07 RT3070 product TOSHIBA G450 0x0d45 G450 modem product TOSHIBA HSDPA 0x1302 G450 modem product TOSHIBA TRANSMEMORY 0x6545 USB ThumbDrive /* Trek Technology products */ product TREK THUMBDRIVE 0x1111 ThumbDrive product TREK MEMKEY 0x8888 IBM USB Memory Key product TREK THUMBDRIVE_8MB 0x9988 ThumbDrive_8MB /* TRENDnet products */ product TRENDNET RTL8192CU 0x624d RTL8192CU product TRENDNET TEW646UBH 0x646b TEW-646UBH product TRENDNET RTL8188CU 0x648b RTL8188CU /* Tripp-Lite products */ product TRIPPLITE U209 0x2008 Serial /* Trumpion products */ product TRUMPION T33520 0x1001 T33520 USB Flash Card Controller product TRUMPION C3310 0x1100 Comotron C3310 MP3 player product TRUMPION MP3 0x1200 MP3 player /* TwinMOS */ product TWINMOS G240 0xa006 G240 product TWINMOS MDIV 0x1325 Memory Disk IV /* Ubiquam products */ product UBIQUAM UALL 0x3100 CDMA 1xRTT USB Modem (U-100/105/200/300/520) /* Ultima products */ product ULTIMA 1200UBPLUS 0x4002 1200 UB Plus scanner /* UMAX products */ product UMAX ASTRA1236U 0x0002 Astra 1236U Scanner product UMAX ASTRA1220U 0x0010 Astra 1220U Scanner product UMAX ASTRA2000U 0x0030 Astra 2000U Scanner product UMAX ASTRA2100U 0x0130 Astra 2100U Scanner product UMAX ASTRA2200U 0x0230 Astra 2200U Scanner product UMAX ASTRA3400 0x0060 Astra 3400 Scanner /* U-MEDIA Communications products */ product UMEDIA TEW444UBEU 0x3006 TEW-444UB EU product UMEDIA TEW444UBEU_NF 0x3007 TEW-444UB EU (no firmware) product UMEDIA TEW429UB_A 0x300a TEW-429UB_A product UMEDIA TEW429UB 0x300b TEW-429UB product UMEDIA TEW429UBC1 0x300d TEW-429UB C1 product UMEDIA RT2870_1 0x300e RT2870 product UMEDIA ALL0298V2 0x3204 ALL0298 v2 product UMEDIA AR5523_2 0x3205 AR5523 product UMEDIA AR5523_2_NF 0x3206 AR5523 (no firmware) /* Universal Access products */ product UNIACCESS PANACHE 0x0101 Panache Surf USB ISDN Adapter /* Unknown products */ product UNKNOWN4 NF_RIC 0x0001 FTDI compatible adapter /* USI products */ product USI MC60 0x10c5 MC60 Serial /* U.S. Robotics products */ product USR USR5422 0x0118 USR5422 WLAN product USR USR5423 0x0121 USR5423 WLAN /* VIA Technologies products */ product VIA USB2IDEBRIDGE 0x6204 USB 2.0 IDE Bridge /* VIA Labs */ product VIALABS USB30SATABRIDGE 0x0700 USB 3.0 SATA Bridge /* Vaisala products */ product VAISALA CABLE 0x0200 USB Interface cable /* Vertex products */ product VERTEX VW110L 0x0100 Vertex VW110L modem /* VidzMedia products */ product VIDZMEDIA MONSTERTV 0x4fb1 MonsterTV P2H /* Vision products */ product VISION VC6452V002 0x0002 CPiA Camera /* Visioneer products */ product VISIONEER 7600 0x0211 OneTouch 7600 product VISIONEER 5300 0x0221 OneTouch 5300 product VISIONEER 3000 0x0224 Scanport 3000 product VISIONEER 6100 0x0231 OneTouch 6100 product VISIONEER 6200 0x0311 OneTouch 6200 product VISIONEER 8100 0x0321 OneTouch 8100 product VISIONEER 8600 0x0331 OneTouch 8600 /* Vivitar products */ product VIVITAR 35XX 0x0003 Vivicam 35Xx /* VTech products */ product VTECH RT2570 0x3012 RT2570 product VTECH ZD1211B 0x3014 ZD1211B /* Wacom products */ product WACOM CT0405U 0x0000 CT-0405-U Tablet product WACOM GRAPHIRE 0x0010 Graphire product WACOM GRAPHIRE3_4X5 0x0013 Graphire 3 4x5 product WACOM INTUOSA5 0x0021 Intuos A5 product WACOM GD0912U 0x0022 Intuos 9x12 Graphics Tablet /* WAGO Kontakttechnik GmbH products */ product WAGO SERVICECABLE 0x07a6 USB Service Cable 750-923 /* WaveSense products */ product WAVESENSE JAZZ 0xaaaa Jazz blood glucose meter /* WCH products */ product WCH CH341SER 0x5523 CH341/CH340 USB-Serial Bridge product WCH2 DUMMY 0x0000 Dummy product product WCH2 CH341SER_2 0x5523 CH341/CH340 USB-Serial Bridge product WCH2 CH341SER 0x7523 CH341/CH340 USB-Serial Bridge product WCH2 U2M 0X752d CH345 USB2.0-MIDI /* West Mountain Radio products */ product WESTMOUNTAIN RIGBLASTER_ADVANTAGE 0x0003 RIGblaster Advantage /* Western Digital products */ product WESTERN COMBO 0x0200 Firewire USB Combo product WESTERN EXTHDD 0x0400 External HDD product WESTERN HUB 0x0500 USB HUB product WESTERN MYBOOK 0x0901 MyBook External HDD product WESTERN MYPASSPORT_00 0x0704 MyPassport External HDD product WESTERN MYPASSPORT_11 0x0741 MyPassport External HDD product WESTERN MYPASSPORT_01 0x0746 MyPassport External HDD product WESTERN MYPASSPORT_02 0x0748 MyPassport External HDD product WESTERN MYPASSPORT_03 0x074A MyPassport External HDD product WESTERN MYPASSPORT_04 0x074C MyPassport External HDD product WESTERN MYPASSPORT_05 0x074E MyPassport External HDD product WESTERN MYPASSPORT_06 0x07A6 MyPassport External HDD product WESTERN MYPASSPORT_07 0x07A8 MyPassport External HDD product WESTERN MYPASSPORT_08 0x07AA MyPassport External HDD product WESTERN MYPASSPORT_09 0x07AC MyPassport External HDD product WESTERN MYPASSPORT_10 0x07AE MyPassport External HDD product WESTERN MYPASSPORTES_00 0x070A MyPassport Essential External HDD product WESTERN MYPASSPORTES_01 0x071A MyPassport Essential External HDD product WESTERN MYPASSPORTES_02 0x0730 MyPassport Essential External HDD product WESTERN MYPASSPORTES_03 0x0732 MyPassport Essential External HDD product WESTERN MYPASSPORTES_04 0x0740 MyPassport Essential External HDD product WESTERN MYPASSPORTES_05 0x0742 MyPassport Essential External HDD product WESTERN MYPASSPORTES_06 0x0750 MyPassport Essential External HDD product WESTERN MYPASSPORTES_07 0x0752 MyPassport Essential External HDD product WESTERN MYPASSPORTES_08 0x07A0 MyPassport Essential External HDD product WESTERN MYPASSPORTES_09 0x07A2 MyPassport Essential External HDD /* WeTelecom products */ product WETELECOM WM_D200 0x6801 WM-D200 /* WIENER Plein & Baus GmbH products */ product WIENERPLEINBAUS PL512 0x0010 PL512 PSU product WIENERPLEINBAUS RCM 0x0011 RCM Remote Control product WIENERPLEINBAUS MPOD 0x0012 MPOD PSU product WIENERPLEINBAUS CML 0x0015 CML Data Logger /* Windbond Electronics */ product WINBOND UH104 0x5518 4-port USB Hub /* WinMaxGroup products */ product WINMAXGROUP FLASH64MC 0x6660 USB Flash Disk 64M-C /* Wistron NeWeb products */ product WISTRONNEWEB WNC0600 0x0326 WNC-0600USB product WISTRONNEWEB UR045G 0x0427 PrismGT USB 2.0 WLAN product WISTRONNEWEB UR055G 0x0711 UR055G product WISTRONNEWEB O8494 0x0804 ORiNOCO 802.11n product WISTRONNEWEB AR5523_1 0x0826 AR5523 product WISTRONNEWEB AR5523_1_NF 0x0827 AR5523 (no firmware) product WISTRONNEWEB AR5523_2 0x082a AR5523 product WISTRONNEWEB AR5523_2_NF 0x0829 AR5523 (no firmware) /* Xerox products */ product XEROX WCM15 0xffef WorkCenter M15 /* Xirlink products */ product XIRLINK PCCAM 0x8080 IBM PC Camera /* Xyratex products */ product XYRATEX PRISM_GT_1 0x2000 PrismGT USB 2.0 WLAN product XYRATEX PRISM_GT_2 0x2002 PrismGT USB 2.0 WLAN /* Yamaha products */ product YAMAHA UX256 0x1000 UX256 MIDI I/F product YAMAHA UX96 0x1008 UX96 MIDI I/F product YAMAHA RPU200 0x3104 RP-U200 product YAMAHA RTA54I 0x4000 NetVolante RTA54i Broadband&ISDN Router product YAMAHA RTW65B 0x4001 NetVolante RTW65b Broadband Wireless Router product YAMAHA RTW65I 0x4002 NetVolante RTW65i Broadband&ISDN Wireless Router product YAMAHA RTA55I 0x4004 NetVolante RTA55i Broadband VoIP Router /* Yano products */ product YANO U640MO 0x0101 U640MO-03 product YANO FW800HD 0x05fc METALWEAR-HDD /* Y.C. Cable products */ product YCCABLE PL2303 0x0fba PL2303 Serial /* Y-E Data products */ product YEDATA FLASHBUSTERU 0x0000 Flashbuster-U /* Yiso Wireless Co. products */ product YISO C893 0xc893 CDMA 2000 1xEVDO PC Card /* Z-Com products */ product ZCOM M4Y750 0x0001 M4Y-750 product ZCOM XI725 0x0002 XI-725/726 product ZCOM XI735 0x0005 XI-735 product ZCOM XG703A 0x0008 PrismGT USB 2.0 WLAN product ZCOM ZD1211 0x0011 ZD1211 product ZCOM AR5523 0x0012 AR5523 product ZCOM AR5523_NF 0x0013 AR5523 driver (no firmware) product ZCOM XM142 0x0015 XM-142 product ZCOM ZD1211B 0x001a ZD1211B product ZCOM RT2870_1 0x0022 RT2870 product ZCOM UB81 0x0023 UB81 product ZCOM RT2870_2 0x0025 RT2870 product ZCOM UB82 0x0026 UB82 /* Zinwell products */ product ZINWELL RT2570 0x0260 RT2570 product ZINWELL RT2870_1 0x0280 RT2870 product ZINWELL RT2870_2 0x0282 RT2870 product ZINWELL RT3072_1 0x0283 RT3072 product ZINWELL RT3072_2 0x0284 RT3072 product ZINWELL RT3070 0x5257 RT3070 /* Zoom Telephonics, Inc. products */ product ZOOM 2986L 0x9700 2986L Fax modem /* Zoran Microelectronics products */ product ZORAN EX20DSC 0x4343 Digital Camera EX-20 DSC /* Zydas Technology Corporation products */ product ZYDAS ZD1211 0x1211 ZD1211 WLAN abg product ZYDAS ZD1211B 0x1215 ZD1211B product ZYDAS ZD1221 0x1221 ZD1221 /* ZyXEL Communication Co. products */ product ZYXEL OMNI56K 0x1500 Omni 56K Plus product ZYXEL 980N 0x2011 Scorpion-980N keyboard product ZYXEL ZYAIRG220 0x3401 ZyAIR G-220 product ZYXEL G200V2 0x3407 G-200 v2 product ZYXEL AG225H 0x3409 AG-225H product ZYXEL M202 0x340a M-202 product ZYXEL G220V2 0x340f G-220 v2 product ZYXEL G202 0x3410 G-202 product ZYXEL RT2870_1 0x3416 RT2870 product ZYXEL NWD271N 0x3417 NWD-271N product ZYXEL NWD211AN 0x3418 NWD-211AN product ZYXEL RT2870_2 0x341a RT2870 product ZYXEL RT3070 0x341e NWD2105 product ZYXEL RTL8192CU 0x341f RTL8192CU product ZYXEL NWD2705 0x3421 NWD2705 Index: projects/vnet/sys/kern/init_main.c =================================================================== --- projects/vnet/sys/kern/init_main.c (revision 302276) +++ projects/vnet/sys/kern/init_main.c (revision 302277) @@ -1,884 +1,884 @@ /*- * Copyright (c) 1995 Terrence R. Lambert * All rights reserved. * * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)init_main.c 8.9 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_init_path.h" #include "opt_verbose_sysinit.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include void mi_startup(void); /* Should be elsewhere */ /* Components of the first process -- never freed. */ static struct session session0; static struct pgrp pgrp0; struct proc proc0; struct thread0_storage thread0_st __aligned(16); struct vmspace vmspace0; struct proc *initproc; #ifndef BOOTHOWTO #define BOOTHOWTO 0 #endif int boothowto = BOOTHOWTO; /* initialized so that it can be patched */ SYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD, &boothowto, 0, "Boot control flags, passed from loader"); #ifndef BOOTVERBOSE #define BOOTVERBOSE 0 #endif int bootverbose = BOOTVERBOSE; SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0, "Control the output of verbose kernel messages"); #ifdef INVARIANTS FEATURE(invariants, "Kernel compiled with INVARIANTS, may affect performance"); #endif /* * This ensures that there is at least one entry so that the sysinit_set * symbol is not undefined. A sybsystem ID of SI_SUB_DUMMY is never * executed. */ SYSINIT(placeholder, SI_SUB_DUMMY, SI_ORDER_ANY, NULL, NULL); /* * The sysinit table itself. Items are checked off as the are run. * If we want to register new sysinit types, add them to newsysinit. */ SET_DECLARE(sysinit_set, struct sysinit); struct sysinit **sysinit, **sysinit_end; struct sysinit **newsysinit, **newsysinit_end; /* * Merge a new sysinit set into the current set, reallocating it if * necessary. This can only be called after malloc is running. */ void sysinit_add(struct sysinit **set, struct sysinit **set_end) { struct sysinit **newset; struct sysinit **sipp; struct sysinit **xipp; int count; count = set_end - set; if (newsysinit) count += newsysinit_end - newsysinit; else count += sysinit_end - sysinit; newset = malloc(count * sizeof(*sipp), M_TEMP, M_NOWAIT); if (newset == NULL) panic("cannot malloc for sysinit"); xipp = newset; if (newsysinit) for (sipp = newsysinit; sipp < newsysinit_end; sipp++) *xipp++ = *sipp; else for (sipp = sysinit; sipp < sysinit_end; sipp++) *xipp++ = *sipp; for (sipp = set; sipp < set_end; sipp++) *xipp++ = *sipp; if (newsysinit) free(newsysinit, M_TEMP); newsysinit = newset; newsysinit_end = newset + count; } #if defined (DDB) && defined(VERBOSE_SYSINIT) static const char * symbol_name(vm_offset_t va, db_strategy_t strategy) { const char *name; c_db_sym_t sym; db_expr_t offset; if (va == 0) return (NULL); sym = db_search_symbol(va, strategy, &offset); if (offset != 0) return (NULL); db_symbol_values(sym, &name, NULL); return (name); } #endif /* * System startup; initialize the world, create process 0, mount root * filesystem, and fork to create init and pagedaemon. Most of the * hard work is done in the lower-level initialization routines including * startup(), which does memory initialization and autoconfiguration. * * This allows simple addition of new kernel subsystems that require * boot time initialization. It also allows substitution of subsystem * (for instance, a scheduler, kernel profiler, or VM system) by object * module. Finally, it allows for optional "kernel threads". */ void mi_startup(void) { register struct sysinit **sipp; /* system initialization*/ register struct sysinit **xipp; /* interior loop of sort*/ register struct sysinit *save; /* bubble*/ #if defined(VERBOSE_SYSINIT) int last; int verbose; #endif if (boothowto & RB_VERBOSE) bootverbose++; if (sysinit == NULL) { sysinit = SET_BEGIN(sysinit_set); sysinit_end = SET_LIMIT(sysinit_set); } restart: /* * Perform a bubble sort of the system initialization objects by * their subsystem (primary key) and order (secondary key). */ for (sipp = sysinit; sipp < sysinit_end; sipp++) { for (xipp = sipp + 1; xipp < sysinit_end; xipp++) { if ((*sipp)->subsystem < (*xipp)->subsystem || ((*sipp)->subsystem == (*xipp)->subsystem && (*sipp)->order <= (*xipp)->order)) continue; /* skip*/ save = *sipp; *sipp = *xipp; *xipp = save; } } #if defined(VERBOSE_SYSINIT) last = SI_SUB_COPYRIGHT; verbose = 0; #if !defined(DDB) printf("VERBOSE_SYSINIT: DDB not enabled, symbol lookups disabled.\n"); #endif #endif /* * Traverse the (now) ordered list of system initialization tasks. * Perform each task, and continue on to the next task. */ for (sipp = sysinit; sipp < sysinit_end; sipp++) { if ((*sipp)->subsystem == SI_SUB_DUMMY) continue; /* skip dummy task(s)*/ if ((*sipp)->subsystem == SI_SUB_DONE) continue; #if defined(VERBOSE_SYSINIT) if ((*sipp)->subsystem > last) { verbose = 1; last = (*sipp)->subsystem; printf("subsystem %x\n", last); } if (verbose) { #if defined(DDB) const char *func, *data; func = symbol_name((vm_offset_t)(*sipp)->func, DB_STGY_PROC); data = symbol_name((vm_offset_t)(*sipp)->udata, DB_STGY_ANY); if (func != NULL && data != NULL) printf(" %s(&%s)... ", func, data); else if (func != NULL) printf(" %s(%p)... ", func, (*sipp)->udata); else #endif printf(" %p(%p)... ", (*sipp)->func, (*sipp)->udata); } #endif /* Call function */ (*((*sipp)->func))((*sipp)->udata); #if defined(VERBOSE_SYSINIT) if (verbose) printf("done.\n"); #endif /* Check off the one we're just done */ (*sipp)->subsystem = SI_SUB_DONE; /* Check if we've installed more sysinit items via KLD */ if (newsysinit != NULL) { if (sysinit != SET_BEGIN(sysinit_set)) free(sysinit, M_TEMP); sysinit = newsysinit; sysinit_end = newsysinit_end; newsysinit = NULL; newsysinit_end = NULL; goto restart; } } mtx_assert(&Giant, MA_OWNED | MA_NOTRECURSED); mtx_unlock(&Giant); /* * Now hand over this thread to swapper. */ swapper(); /* NOTREACHED*/ } /* *************************************************************************** **** **** The following SYSINIT's belong elsewhere, but have not yet **** been moved. **** *************************************************************************** */ static void print_caddr_t(void *data) { printf("%s", (char *)data); } static void print_version(void *data __unused) { int len; /* Strip a trailing newline from version. */ len = strlen(version); while (len > 0 && version[len - 1] == '\n') len--; printf("%.*s %s\n", len, version, machine); printf("%s\n", compiler_version); } SYSINIT(announce, SI_SUB_COPYRIGHT, SI_ORDER_FIRST, print_caddr_t, copyright); SYSINIT(trademark, SI_SUB_COPYRIGHT, SI_ORDER_SECOND, print_caddr_t, trademark); SYSINIT(version, SI_SUB_COPYRIGHT, SI_ORDER_THIRD, print_version, NULL); #ifdef WITNESS static char wit_warn[] = "WARNING: WITNESS option enabled, expect reduced performance.\n"; SYSINIT(witwarn, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 1, print_caddr_t, wit_warn); SYSINIT(witwarn2, SI_SUB_LAST, SI_ORDER_THIRD + 1, print_caddr_t, wit_warn); #endif #ifdef DIAGNOSTIC static char diag_warn[] = "WARNING: DIAGNOSTIC option enabled, expect reduced performance.\n"; SYSINIT(diagwarn, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 2, print_caddr_t, diag_warn); SYSINIT(diagwarn2, SI_SUB_LAST, SI_ORDER_THIRD + 2, print_caddr_t, diag_warn); #endif static int null_fetch_syscall_args(struct thread *td __unused, struct syscall_args *sa __unused) { panic("null_fetch_syscall_args"); } static void null_set_syscall_retval(struct thread *td __unused, int error __unused) { panic("null_set_syscall_retval"); } struct sysentvec null_sysvec = { .sv_size = 0, .sv_table = NULL, .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = NULL, .sv_sendsig = NULL, .sv_sigcode = NULL, .sv_szsigcode = NULL, .sv_name = "null", .sv_coredump = NULL, .sv_imgact_try = NULL, .sv_minsigstksz = 0, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = NULL, .sv_setregs = NULL, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = 0, .sv_set_syscall_retval = null_set_syscall_retval, .sv_fetch_syscall_args = null_fetch_syscall_args, .sv_syscallnames = NULL, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, }; /* *************************************************************************** **** **** The two following SYSINIT's are proc0 specific glue code. I am not **** convinced that they can not be safely combined, but their order of **** operation has been maintained as the same as the original init_main.c **** for right now. **** **** These probably belong in init_proc.c or kern_proc.c, since they **** deal with proc0 (the fork template process). **** *************************************************************************** */ /* ARGSUSED*/ static void proc0_init(void *dummy __unused) { struct proc *p; struct thread *td; struct ucred *newcred; vm_paddr_t pageablemem; int i; GIANT_REQUIRED; p = &proc0; td = &thread0; /* * Initialize magic number and osrel. */ p->p_magic = P_MAGIC; p->p_osrel = osreldate; /* * Initialize thread and process structures. */ procinit(); /* set up proc zone */ threadinit(); /* set up UMA zones */ /* * Initialise scheduler resources. * Add scheduler specific parts to proc, thread as needed. */ schedinit(); /* scheduler gets its house in order */ /* * Create process 0 (the swapper). */ LIST_INSERT_HEAD(&allproc, p, p_list); LIST_INSERT_HEAD(PIDHASH(0), p, p_hash); mtx_init(&pgrp0.pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK); p->p_pgrp = &pgrp0; LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash); LIST_INIT(&pgrp0.pg_members); LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist); pgrp0.pg_session = &session0; mtx_init(&session0.s_mtx, "session", NULL, MTX_DEF); refcount_init(&session0.s_count, 1); session0.s_leader = p; p->p_sysent = &null_sysvec; p->p_flag = P_SYSTEM | P_INMEM | P_KPROC; p->p_flag2 = 0; p->p_state = PRS_NORMAL; - knlist_init_mtx(&p->p_klist, &p->p_mtx); + p->p_klist = knlist_alloc(&p->p_mtx); STAILQ_INIT(&p->p_ktr); p->p_nice = NZERO; /* pid_max cannot be greater than PID_MAX */ td->td_tid = PID_MAX + 1; LIST_INSERT_HEAD(TIDHASH(td->td_tid), td, td_hash); td->td_state = TDS_RUNNING; td->td_pri_class = PRI_TIMESHARE; td->td_user_pri = PUSER; td->td_base_user_pri = PUSER; td->td_lend_user_pri = PRI_MAX; td->td_priority = PVM; td->td_base_pri = PVM; td->td_oncpu = 0; td->td_flags = TDF_INMEM; td->td_pflags = TDP_KTHREAD; td->td_cpuset = cpuset_thread0(); vm_domain_policy_init(&td->td_vm_dom_policy); vm_domain_policy_set(&td->td_vm_dom_policy, VM_POLICY_NONE, -1); vm_domain_policy_init(&p->p_vm_dom_policy); vm_domain_policy_set(&p->p_vm_dom_policy, VM_POLICY_NONE, -1); prison0_init(); p->p_peers = 0; p->p_leader = p; p->p_reaper = p; LIST_INIT(&p->p_reaplist); strncpy(p->p_comm, "kernel", sizeof (p->p_comm)); strncpy(td->td_name, "swapper", sizeof (td->td_name)); callout_init_mtx(&p->p_itcallout, &p->p_mtx, 0); callout_init_mtx(&p->p_limco, &p->p_mtx, 0); callout_init(&td->td_slpcallout, 1); /* Create credentials. */ newcred = crget(); newcred->cr_ngroups = 1; /* group 0 */ newcred->cr_uidinfo = uifind(0); newcred->cr_ruidinfo = uifind(0); newcred->cr_prison = &prison0; newcred->cr_loginclass = loginclass_find("default"); proc_set_cred_init(p, newcred); #ifdef AUDIT audit_cred_kproc0(newcred); #endif #ifdef MAC mac_cred_create_swapper(newcred); #endif /* Create sigacts. */ p->p_sigacts = sigacts_alloc(); /* Initialize signal state for process 0. */ siginit(&proc0); /* Create the file descriptor table. */ p->p_fd = fdinit(NULL, false); p->p_fdtol = NULL; /* Create the limits structures. */ p->p_limit = lim_alloc(); for (i = 0; i < RLIM_NLIMITS; i++) p->p_limit->pl_rlimit[i].rlim_cur = p->p_limit->pl_rlimit[i].rlim_max = RLIM_INFINITY; p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_cur = p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles; p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_cur = p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc; p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_cur = dfldsiz; p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_max = maxdsiz; p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_cur = dflssiz; p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_max = maxssiz; /* Cast to avoid overflow on i386/PAE. */ pageablemem = ptoa((vm_paddr_t)vm_cnt.v_free_count); p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_cur = p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = pageablemem; p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = pageablemem / 3; p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = pageablemem; p->p_cpulimit = RLIM_INFINITY; PROC_LOCK(p); thread_cow_get_proc(td, p); PROC_UNLOCK(p); /* Initialize resource accounting structures. */ racct_create(&p->p_racct); p->p_stats = pstats_alloc(); /* Allocate a prototype map so we have something to fork. */ p->p_vmspace = &vmspace0; vmspace0.vm_refcnt = 1; pmap_pinit0(vmspace_pmap(&vmspace0)); /* * proc0 is not expected to enter usermode, so there is no special * handling for sv_minuser here, like is done for exec_new_vmspace(). */ vm_map_init(&vmspace0.vm_map, vmspace_pmap(&vmspace0), p->p_sysent->sv_minuser, p->p_sysent->sv_maxuser); /* * Call the init and ctor for the new thread and proc. We wait * to do this until all other structures are fairly sane. */ EVENTHANDLER_INVOKE(process_init, p); EVENTHANDLER_INVOKE(thread_init, td); EVENTHANDLER_INVOKE(process_ctor, p); EVENTHANDLER_INVOKE(thread_ctor, td); /* * Charge root for one process. */ (void)chgproccnt(p->p_ucred->cr_ruidinfo, 1, 0); PROC_LOCK(p); racct_add_force(p, RACCT_NPROC, 1); PROC_UNLOCK(p); } SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL); /* ARGSUSED*/ static void proc0_post(void *dummy __unused) { struct timespec ts; struct proc *p; struct rusage ru; struct thread *td; /* * Now we can look at the time, having had a chance to verify the * time from the filesystem. Pretend that proc0 started now. */ sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { microuptime(&p->p_stats->p_start); PROC_STATLOCK(p); rufetch(p, &ru); /* Clears thread stats */ PROC_STATUNLOCK(p); p->p_rux.rux_runtime = 0; p->p_rux.rux_uticks = 0; p->p_rux.rux_sticks = 0; p->p_rux.rux_iticks = 0; FOREACH_THREAD_IN_PROC(p, td) { td->td_runtime = 0; } } sx_sunlock(&allproc_lock); PCPU_SET(switchtime, cpu_ticks()); PCPU_SET(switchticks, ticks); /* * Give the ``random'' number generator a thump. */ nanotime(&ts); srandom(ts.tv_sec ^ ts.tv_nsec); } SYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL); static void random_init(void *dummy __unused) { /* * After CPU has been started we have some randomness on most * platforms via get_cyclecount(). For platforms that don't * we will reseed random(9) in proc0_post() as well. */ srandom(get_cyclecount()); } SYSINIT(random, SI_SUB_RANDOM, SI_ORDER_FIRST, random_init, NULL); /* *************************************************************************** **** **** The following SYSINIT's and glue code should be moved to the **** respective files on a per subsystem basis. **** *************************************************************************** */ /* *************************************************************************** **** **** The following code probably belongs in another file, like **** kern/init_init.c. **** *************************************************************************** */ /* * List of paths to try when searching for "init". */ static char init_path[MAXPATHLEN] = #ifdef INIT_PATH __XSTRING(INIT_PATH); #else "/sbin/init:/sbin/oinit:/sbin/init.bak:/rescue/init"; #endif SYSCTL_STRING(_kern, OID_AUTO, init_path, CTLFLAG_RD, init_path, 0, "Path used to search the init process"); /* * Shutdown timeout of init(8). * Unused within kernel, but used to control init(8), hence do not remove. */ #ifndef INIT_SHUTDOWN_TIMEOUT #define INIT_SHUTDOWN_TIMEOUT 120 #endif static int init_shutdown_timeout = INIT_SHUTDOWN_TIMEOUT; SYSCTL_INT(_kern, OID_AUTO, init_shutdown_timeout, CTLFLAG_RW, &init_shutdown_timeout, 0, "Shutdown timeout of init(8). " "Unused within kernel, but used to control init(8)"); /* * Start the initial user process; try exec'ing each pathname in init_path. * The program is invoked with one argument containing the boot flags. */ static void start_init(void *dummy) { vm_offset_t addr; struct execve_args args; int options, error; char *var, *path, *next, *s; char *ucp, **uap, *arg0, *arg1; struct thread *td; struct proc *p; mtx_lock(&Giant); GIANT_REQUIRED; td = curthread; p = td->td_proc; vfs_mountroot(); /* Wipe GELI passphrase from the environment. */ kern_unsetenv("kern.geom.eli.passphrase"); /* * Need just enough stack to hold the faked-up "execve()" arguments. */ addr = p->p_sysent->sv_usrstack - PAGE_SIZE; if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE, 0, VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0) panic("init: couldn't allocate argument space"); p->p_vmspace->vm_maxsaddr = (caddr_t)addr; p->p_vmspace->vm_ssize = 1; if ((var = kern_getenv("init_path")) != NULL) { strlcpy(init_path, var, sizeof(init_path)); freeenv(var); } for (path = init_path; *path != '\0'; path = next) { while (*path == ':') path++; if (*path == '\0') break; for (next = path; *next != '\0' && *next != ':'; next++) /* nothing */ ; if (bootverbose) printf("start_init: trying %.*s\n", (int)(next - path), path); /* * Move out the boot flag argument. */ options = 0; ucp = (char *)p->p_sysent->sv_usrstack; (void)subyte(--ucp, 0); /* trailing zero */ if (boothowto & RB_SINGLE) { (void)subyte(--ucp, 's'); options = 1; } #ifdef notyet if (boothowto & RB_FASTBOOT) { (void)subyte(--ucp, 'f'); options = 1; } #endif #ifdef BOOTCDROM (void)subyte(--ucp, 'C'); options = 1; #endif if (options == 0) (void)subyte(--ucp, '-'); (void)subyte(--ucp, '-'); /* leading hyphen */ arg1 = ucp; /* * Move out the file name (also arg 0). */ (void)subyte(--ucp, 0); for (s = next - 1; s >= path; s--) (void)subyte(--ucp, *s); arg0 = ucp; /* * Move out the arg pointers. */ uap = (char **)rounddown2((intptr_t)ucp, sizeof(intptr_t)); (void)suword((caddr_t)--uap, (long)0); /* terminator */ (void)suword((caddr_t)--uap, (long)(intptr_t)arg1); (void)suword((caddr_t)--uap, (long)(intptr_t)arg0); /* * Point at the arguments. */ args.fname = arg0; args.argv = uap; args.envv = NULL; /* * Now try to exec the program. If can't for any reason * other than it doesn't exist, complain. * * Otherwise, return via fork_trampoline() all the way * to user mode as init! */ if ((error = sys_execve(td, &args)) == 0) { mtx_unlock(&Giant); return; } if (error != ENOENT) printf("exec %.*s: error %d\n", (int)(next - path), path, error); } printf("init: not found in path %s\n", init_path); panic("no init"); } /* * Like kproc_create(), but runs in it's own address space. * We do this early to reserve pid 1. * * Note special case - do not make it runnable yet. Other work * in progress will change this more. */ static void create_init(const void *udata __unused) { struct fork_req fr; struct ucred *newcred, *oldcred; struct thread *td; int error; bzero(&fr, sizeof(fr)); fr.fr_flags = RFFDG | RFPROC | RFSTOPPED; fr.fr_procp = &initproc; error = fork1(&thread0, &fr); if (error) panic("cannot fork init: %d\n", error); KASSERT(initproc->p_pid == 1, ("create_init: initproc->p_pid != 1")); /* divorce init's credentials from the kernel's */ newcred = crget(); sx_xlock(&proctree_lock); PROC_LOCK(initproc); initproc->p_flag |= P_SYSTEM | P_INMEM; initproc->p_treeflag |= P_TREE_REAPER; LIST_INSERT_HEAD(&initproc->p_reaplist, &proc0, p_reapsibling); oldcred = initproc->p_ucred; crcopy(newcred, oldcred); #ifdef MAC mac_cred_create_init(newcred); #endif #ifdef AUDIT audit_cred_proc1(newcred); #endif proc_set_cred(initproc, newcred); td = FIRST_THREAD_IN_PROC(initproc); crfree(td->td_ucred); td->td_ucred = crhold(initproc->p_ucred); PROC_UNLOCK(initproc); sx_xunlock(&proctree_lock); crfree(oldcred); cpu_fork_kthread_handler(FIRST_THREAD_IN_PROC(initproc), start_init, NULL); } SYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL); /* * Make it runnable now. */ static void kick_init(const void *udata __unused) { struct thread *td; td = FIRST_THREAD_IN_PROC(initproc); thread_lock(td); TD_SET_CAN_RUN(td); sched_add(td, SRQ_BORING); thread_unlock(td); } SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_MIDDLE, kick_init, NULL); Index: projects/vnet/sys/kern/kern_event.c =================================================================== --- projects/vnet/sys/kern/kern_event.c (revision 302276) +++ projects/vnet/sys/kern/kern_event.c (revision 302277) @@ -1,2483 +1,2509 @@ /*- * Copyright (c) 1999,2000,2001 Jonathan Lemon * Copyright 2004 John-Mark Gurney * Copyright (c) 2009 Apple, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ktrace.h" #include "opt_kqueue.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include static MALLOC_DEFINE(M_KQUEUE, "kqueue", "memory for kqueue system"); /* * This lock is used if multiple kq locks are required. This possibly * should be made into a per proc lock. */ static struct mtx kq_global; MTX_SYSINIT(kq_global, &kq_global, "kqueue order", MTX_DEF); #define KQ_GLOBAL_LOCK(lck, haslck) do { \ if (!haslck) \ mtx_lock(lck); \ haslck = 1; \ } while (0) #define KQ_GLOBAL_UNLOCK(lck, haslck) do { \ if (haslck) \ mtx_unlock(lck); \ haslck = 0; \ } while (0) TASKQUEUE_DEFINE_THREAD(kqueue_ctx); static int kevent_copyout(void *arg, struct kevent *kevp, int count); static int kevent_copyin(void *arg, struct kevent *kevp, int count); static int kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td, int waitok); static int kqueue_acquire(struct file *fp, struct kqueue **kqp); static void kqueue_release(struct kqueue *kq, int locked); static void kqueue_destroy(struct kqueue *kq); static void kqueue_drain(struct kqueue *kq, struct thread *td); static int kqueue_expand(struct kqueue *kq, struct filterops *fops, uintptr_t ident, int waitok); static void kqueue_task(void *arg, int pending); static int kqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops, const struct timespec *timeout, struct kevent *keva, struct thread *td); static void kqueue_wakeup(struct kqueue *kq); static struct filterops *kqueue_fo_find(int filt); static void kqueue_fo_release(int filt); static fo_ioctl_t kqueue_ioctl; static fo_poll_t kqueue_poll; static fo_kqfilter_t kqueue_kqfilter; static fo_stat_t kqueue_stat; static fo_close_t kqueue_close; static fo_fill_kinfo_t kqueue_fill_kinfo; static struct fileops kqueueops = { .fo_read = invfo_rdwr, .fo_write = invfo_rdwr, .fo_truncate = invfo_truncate, .fo_ioctl = kqueue_ioctl, .fo_poll = kqueue_poll, .fo_kqfilter = kqueue_kqfilter, .fo_stat = kqueue_stat, .fo_close = kqueue_close, .fo_chmod = invfo_chmod, .fo_chown = invfo_chown, .fo_sendfile = invfo_sendfile, .fo_fill_kinfo = kqueue_fill_kinfo, }; static int knote_attach(struct knote *kn, struct kqueue *kq); static void knote_drop(struct knote *kn, struct thread *td); static void knote_enqueue(struct knote *kn); static void knote_dequeue(struct knote *kn); static void knote_init(void); static struct knote *knote_alloc(int waitok); static void knote_free(struct knote *kn); static void filt_kqdetach(struct knote *kn); static int filt_kqueue(struct knote *kn, long hint); static int filt_procattach(struct knote *kn); static void filt_procdetach(struct knote *kn); static int filt_proc(struct knote *kn, long hint); static int filt_fileattach(struct knote *kn); static void filt_timerexpire(void *knx); static int filt_timerattach(struct knote *kn); static void filt_timerdetach(struct knote *kn); static int filt_timer(struct knote *kn, long hint); static int filt_userattach(struct knote *kn); static void filt_userdetach(struct knote *kn); static int filt_user(struct knote *kn, long hint); static void filt_usertouch(struct knote *kn, struct kevent *kev, u_long type); static struct filterops file_filtops = { .f_isfd = 1, .f_attach = filt_fileattach, }; static struct filterops kqread_filtops = { .f_isfd = 1, .f_detach = filt_kqdetach, .f_event = filt_kqueue, }; /* XXX - move to kern_proc.c? */ static struct filterops proc_filtops = { .f_isfd = 0, .f_attach = filt_procattach, .f_detach = filt_procdetach, .f_event = filt_proc, }; static struct filterops timer_filtops = { .f_isfd = 0, .f_attach = filt_timerattach, .f_detach = filt_timerdetach, .f_event = filt_timer, }; static struct filterops user_filtops = { .f_attach = filt_userattach, .f_detach = filt_userdetach, .f_event = filt_user, .f_touch = filt_usertouch, }; static uma_zone_t knote_zone; static atomic_uint kq_ncallouts = ATOMIC_VAR_INIT(0); static unsigned int kq_calloutmax = 4 * 1024; SYSCTL_UINT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue"); /* XXX - ensure not KN_INFLUX?? */ #define KNOTE_ACTIVATE(kn, islock) do { \ if ((islock)) \ mtx_assert(&(kn)->kn_kq->kq_lock, MA_OWNED); \ else \ KQ_LOCK((kn)->kn_kq); \ (kn)->kn_status |= KN_ACTIVE; \ if (((kn)->kn_status & (KN_QUEUED | KN_DISABLED)) == 0) \ knote_enqueue((kn)); \ if (!(islock)) \ KQ_UNLOCK((kn)->kn_kq); \ } while(0) #define KQ_LOCK(kq) do { \ mtx_lock(&(kq)->kq_lock); \ } while (0) #define KQ_FLUX_WAKEUP(kq) do { \ if (((kq)->kq_state & KQ_FLUXWAIT) == KQ_FLUXWAIT) { \ (kq)->kq_state &= ~KQ_FLUXWAIT; \ wakeup((kq)); \ } \ } while (0) #define KQ_UNLOCK_FLUX(kq) do { \ KQ_FLUX_WAKEUP(kq); \ mtx_unlock(&(kq)->kq_lock); \ } while (0) #define KQ_UNLOCK(kq) do { \ mtx_unlock(&(kq)->kq_lock); \ } while (0) #define KQ_OWNED(kq) do { \ mtx_assert(&(kq)->kq_lock, MA_OWNED); \ } while (0) #define KQ_NOTOWNED(kq) do { \ mtx_assert(&(kq)->kq_lock, MA_NOTOWNED); \ } while (0) -#define KN_LIST_LOCK(kn) do { \ - if (kn->kn_knlist != NULL) \ - kn->kn_knlist->kl_lock(kn->kn_knlist->kl_lockarg); \ -} while (0) -#define KN_LIST_UNLOCK(kn) do { \ - if (kn->kn_knlist != NULL) \ - kn->kn_knlist->kl_unlock(kn->kn_knlist->kl_lockarg); \ -} while (0) + +static struct knlist * +kn_list_lock(struct knote *kn) +{ + struct knlist *knl; + + knl = kn->kn_knlist; + if (knl != NULL) + knl->kl_lock(knl->kl_lockarg); + return (knl); +} + +static void +kn_list_unlock(struct knlist *knl) +{ + bool do_free; + + if (knl == NULL) + return; + do_free = knl->kl_autodestroy && knlist_empty(knl); + knl->kl_unlock(knl->kl_lockarg); + if (do_free) { + knlist_destroy(knl); + free(knl, M_KQUEUE); + } +} + #define KNL_ASSERT_LOCK(knl, islocked) do { \ if (islocked) \ KNL_ASSERT_LOCKED(knl); \ else \ KNL_ASSERT_UNLOCKED(knl); \ } while (0) #ifdef INVARIANTS #define KNL_ASSERT_LOCKED(knl) do { \ knl->kl_assert_locked((knl)->kl_lockarg); \ } while (0) #define KNL_ASSERT_UNLOCKED(knl) do { \ knl->kl_assert_unlocked((knl)->kl_lockarg); \ } while (0) #else /* !INVARIANTS */ #define KNL_ASSERT_LOCKED(knl) do {} while(0) #define KNL_ASSERT_UNLOCKED(knl) do {} while (0) #endif /* INVARIANTS */ #ifndef KN_HASHSIZE #define KN_HASHSIZE 64 /* XXX should be tunable */ #endif #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) static int filt_nullattach(struct knote *kn) { return (ENXIO); }; struct filterops null_filtops = { .f_isfd = 0, .f_attach = filt_nullattach, }; /* XXX - make SYSINIT to add these, and move into respective modules. */ extern struct filterops sig_filtops; extern struct filterops fs_filtops; /* * Table for for all system-defined filters. */ static struct mtx filterops_lock; MTX_SYSINIT(kqueue_filterops, &filterops_lock, "protect sysfilt_ops", MTX_DEF); static struct { struct filterops *for_fop; int for_nolock; int for_refcnt; } sysfilt_ops[EVFILT_SYSCOUNT] = { { &file_filtops, 1 }, /* EVFILT_READ */ { &file_filtops, 1 }, /* EVFILT_WRITE */ { &null_filtops }, /* EVFILT_AIO */ { &file_filtops, 1 }, /* EVFILT_VNODE */ { &proc_filtops, 1 }, /* EVFILT_PROC */ { &sig_filtops, 1 }, /* EVFILT_SIGNAL */ { &timer_filtops, 1 }, /* EVFILT_TIMER */ { &file_filtops, 1 }, /* EVFILT_PROCDESC */ { &fs_filtops, 1 }, /* EVFILT_FS */ { &null_filtops }, /* EVFILT_LIO */ { &user_filtops, 1 }, /* EVFILT_USER */ { &null_filtops }, /* EVFILT_SENDFILE */ }; /* * Simple redirection for all cdevsw style objects to call their fo_kqfilter * method. */ static int filt_fileattach(struct knote *kn) { return (fo_kqfilter(kn->kn_fp, kn)); } /*ARGSUSED*/ static int kqueue_kqfilter(struct file *fp, struct knote *kn) { struct kqueue *kq = kn->kn_fp->f_data; if (kn->kn_filter != EVFILT_READ) return (EINVAL); kn->kn_status |= KN_KQUEUE; kn->kn_fop = &kqread_filtops; knlist_add(&kq->kq_sel.si_note, kn, 0); return (0); } static void filt_kqdetach(struct knote *kn) { struct kqueue *kq = kn->kn_fp->f_data; knlist_remove(&kq->kq_sel.si_note, kn, 0); } /*ARGSUSED*/ static int filt_kqueue(struct knote *kn, long hint) { struct kqueue *kq = kn->kn_fp->f_data; kn->kn_data = kq->kq_count; return (kn->kn_data > 0); } /* XXX - move to kern_proc.c? */ static int filt_procattach(struct knote *kn) { struct proc *p; - int immediate; int error; + bool exiting, immediate; - immediate = 0; + exiting = immediate = false; p = pfind(kn->kn_id); if (p == NULL && (kn->kn_sfflags & NOTE_EXIT)) { p = zpfind(kn->kn_id); - immediate = 1; + exiting = true; } else if (p != NULL && (p->p_flag & P_WEXIT)) { - immediate = 1; + exiting = true; } if (p == NULL) return (ESRCH); if ((error = p_cansee(curthread, p))) { PROC_UNLOCK(p); return (error); } kn->kn_ptr.p_proc = p; kn->kn_flags |= EV_CLEAR; /* automatically set */ /* * Internal flag indicating registration done by kernel for the * purposes of getting a NOTE_CHILD notification. */ if (kn->kn_flags & EV_FLAG2) { kn->kn_flags &= ~EV_FLAG2; kn->kn_data = kn->kn_sdata; /* ppid */ kn->kn_fflags = NOTE_CHILD; - kn->kn_sfflags &= ~NOTE_EXIT; - immediate = 1; /* Force immediate activation of child note. */ + kn->kn_sfflags &= ~(NOTE_EXIT | NOTE_EXEC | NOTE_FORK); + immediate = true; /* Force immediate activation of child note. */ } /* * Internal flag indicating registration done by kernel (for other than * NOTE_CHILD). */ if (kn->kn_flags & EV_FLAG1) { kn->kn_flags &= ~EV_FLAG1; } - if (immediate == 0) - knlist_add(&p->p_klist, kn, 1); + knlist_add(p->p_klist, kn, 1); /* * Immediately activate any child notes or, in the case of a zombie * target process, exit notes. The latter is necessary to handle the * case where the target process, e.g. a child, dies before the kevent * is registered. */ - if (immediate && filt_proc(kn, NOTE_EXIT)) + if (immediate || (exiting && filt_proc(kn, NOTE_EXIT))) KNOTE_ACTIVATE(kn, 0); PROC_UNLOCK(p); return (0); } /* * The knote may be attached to a different process, which may exit, * leaving nothing for the knote to be attached to. So when the process * exits, the knote is marked as DETACHED and also flagged as ONESHOT so * it will be deleted when read out. However, as part of the knote deletion, * this routine is called, so a check is needed to avoid actually performing * a detach, because the original process does not exist any more. */ /* XXX - move to kern_proc.c? */ static void filt_procdetach(struct knote *kn) { - struct proc *p; - p = kn->kn_ptr.p_proc; - knlist_remove(&p->p_klist, kn, 0); + knlist_remove(kn->kn_knlist, kn, 0); kn->kn_ptr.p_proc = NULL; } /* XXX - move to kern_proc.c? */ static int filt_proc(struct knote *kn, long hint) { struct proc *p; u_int event; p = kn->kn_ptr.p_proc; /* Mask off extra data. */ event = (u_int)hint & NOTE_PCTRLMASK; /* If the user is interested in this event, record it. */ if (kn->kn_sfflags & event) kn->kn_fflags |= event; /* Process is gone, so flag the event as finished. */ if (event == NOTE_EXIT) { - if (!(kn->kn_status & KN_DETACHED)) - knlist_remove_inevent(&p->p_klist, kn); kn->kn_flags |= EV_EOF | EV_ONESHOT; kn->kn_ptr.p_proc = NULL; if (kn->kn_fflags & NOTE_EXIT) kn->kn_data = KW_EXITCODE(p->p_xexit, p->p_xsig); if (kn->kn_fflags == 0) kn->kn_flags |= EV_DROP; return (1); } return (kn->kn_fflags != 0); } /* * Called when the process forked. It mostly does the same as the * knote(), activating all knotes registered to be activated when the * process forked. Additionally, for each knote attached to the * parent, check whether user wants to track the new process. If so * attach a new knote to it, and immediately report an event with the * child's pid. */ void knote_fork(struct knlist *list, int pid) { struct kqueue *kq; struct knote *kn; struct kevent kev; int error; if (list == NULL) return; list->kl_lock(list->kl_lockarg); SLIST_FOREACH(kn, &list->kl_list, kn_selnext) { - /* - * XXX - Why do we skip the kn if it is _INFLUX? Does this - * mean we will not properly wake up some notes? - */ - if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) - continue; kq = kn->kn_kq; KQ_LOCK(kq); if ((kn->kn_status & (KN_INFLUX | KN_SCAN)) == KN_INFLUX) { KQ_UNLOCK(kq); continue; } /* * The same as knote(), activate the event. */ if ((kn->kn_sfflags & NOTE_TRACK) == 0) { kn->kn_status |= KN_HASKQLOCK; if (kn->kn_fop->f_event(kn, NOTE_FORK)) KNOTE_ACTIVATE(kn, 1); kn->kn_status &= ~KN_HASKQLOCK; KQ_UNLOCK(kq); continue; } /* * The NOTE_TRACK case. In addition to the activation * of the event, we need to register new events to * track the child. Drop the locks in preparation for * the call to kqueue_register(). */ kn->kn_status |= KN_INFLUX; KQ_UNLOCK(kq); list->kl_unlock(list->kl_lockarg); /* * Activate existing knote and register tracking knotes with * new process. * * First register a knote to get just the child notice. This * must be a separate note from a potential NOTE_EXIT * notification since both NOTE_CHILD and NOTE_EXIT are defined * to use the data field (in conflicting ways). */ kev.ident = pid; kev.filter = kn->kn_filter; - kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_ONESHOT | EV_FLAG2; + kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_ONESHOT | + EV_FLAG2; kev.fflags = kn->kn_sfflags; kev.data = kn->kn_id; /* parent */ kev.udata = kn->kn_kevent.udata;/* preserve udata */ error = kqueue_register(kq, &kev, NULL, 0); if (error) kn->kn_fflags |= NOTE_TRACKERR; /* * Then register another knote to track other potential events * from the new process. */ kev.ident = pid; kev.filter = kn->kn_filter; kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1; kev.fflags = kn->kn_sfflags; kev.data = kn->kn_id; /* parent */ kev.udata = kn->kn_kevent.udata;/* preserve udata */ error = kqueue_register(kq, &kev, NULL, 0); if (error) kn->kn_fflags |= NOTE_TRACKERR; if (kn->kn_fop->f_event(kn, NOTE_FORK)) KNOTE_ACTIVATE(kn, 0); KQ_LOCK(kq); kn->kn_status &= ~KN_INFLUX; KQ_UNLOCK_FLUX(kq); list->kl_lock(list->kl_lockarg); } list->kl_unlock(list->kl_lockarg); } /* * XXX: EVFILT_TIMER should perhaps live in kern_time.c beside the * interval timer support code. */ #define NOTE_TIMER_PRECMASK (NOTE_SECONDS|NOTE_MSECONDS|NOTE_USECONDS| \ NOTE_NSECONDS) static sbintime_t timer2sbintime(intptr_t data, int flags) { /* * Macros for converting to the fractional second portion of an * sbintime_t using 64bit multiplication to improve precision. */ #define NS_TO_SBT(ns) (((ns) * (((uint64_t)1 << 63) / 500000000)) >> 32) #define US_TO_SBT(us) (((us) * (((uint64_t)1 << 63) / 500000)) >> 32) #define MS_TO_SBT(ms) (((ms) * (((uint64_t)1 << 63) / 500)) >> 32) switch (flags & NOTE_TIMER_PRECMASK) { case NOTE_SECONDS: #ifdef __LP64__ if (data > (SBT_MAX / SBT_1S)) return SBT_MAX; #endif return ((sbintime_t)data << 32); case NOTE_MSECONDS: /* FALLTHROUGH */ case 0: if (data >= 1000) { int64_t secs = data / 1000; #ifdef __LP64__ if (secs > (SBT_MAX / SBT_1S)) return SBT_MAX; #endif return (secs << 32 | MS_TO_SBT(data % 1000)); } return MS_TO_SBT(data); case NOTE_USECONDS: if (data >= 1000000) { int64_t secs = data / 1000000; #ifdef __LP64__ if (secs > (SBT_MAX / SBT_1S)) return SBT_MAX; #endif return (secs << 32 | US_TO_SBT(data % 1000000)); } return US_TO_SBT(data); case NOTE_NSECONDS: if (data >= 1000000000) { int64_t secs = data / 1000000000; #ifdef __LP64__ if (secs > (SBT_MAX / SBT_1S)) return SBT_MAX; #endif return (secs << 32 | US_TO_SBT(data % 1000000000)); } return NS_TO_SBT(data); default: break; } return (-1); } static void filt_timerexpire(void *knx) { struct callout *calloutp; struct knote *kn; kn = knx; kn->kn_data++; KNOTE_ACTIVATE(kn, 0); /* XXX - handle locking */ if ((kn->kn_flags & EV_ONESHOT) != EV_ONESHOT) { calloutp = (struct callout *)kn->kn_hook; *kn->kn_ptr.p_nexttime += timer2sbintime(kn->kn_sdata, kn->kn_sfflags); callout_reset_sbt_on(calloutp, *kn->kn_ptr.p_nexttime, 0, filt_timerexpire, kn, PCPU_GET(cpuid), C_ABSOLUTE); } } /* * data contains amount of time to sleep */ static int filt_timerattach(struct knote *kn) { struct callout *calloutp; sbintime_t to; unsigned int ncallouts; if ((intptr_t)kn->kn_sdata < 0) return (EINVAL); if ((intptr_t)kn->kn_sdata == 0 && (kn->kn_flags & EV_ONESHOT) == 0) kn->kn_sdata = 1; /* Only precision unit are supported in flags so far */ if (kn->kn_sfflags & ~NOTE_TIMER_PRECMASK) return (EINVAL); to = timer2sbintime(kn->kn_sdata, kn->kn_sfflags); if (to < 0) return (EINVAL); ncallouts = atomic_load_explicit(&kq_ncallouts, memory_order_relaxed); do { if (ncallouts >= kq_calloutmax) return (ENOMEM); } while (!atomic_compare_exchange_weak_explicit(&kq_ncallouts, &ncallouts, ncallouts + 1, memory_order_relaxed, memory_order_relaxed)); kn->kn_flags |= EV_CLEAR; /* automatically set */ kn->kn_status &= ~KN_DETACHED; /* knlist_add clears it */ kn->kn_ptr.p_nexttime = malloc(sizeof(sbintime_t), M_KQUEUE, M_WAITOK); calloutp = malloc(sizeof(*calloutp), M_KQUEUE, M_WAITOK); callout_init(calloutp, 1); kn->kn_hook = calloutp; *kn->kn_ptr.p_nexttime = to + sbinuptime(); callout_reset_sbt_on(calloutp, *kn->kn_ptr.p_nexttime, 0, filt_timerexpire, kn, PCPU_GET(cpuid), C_ABSOLUTE); return (0); } static void filt_timerdetach(struct knote *kn) { struct callout *calloutp; unsigned int old; calloutp = (struct callout *)kn->kn_hook; callout_drain(calloutp); free(calloutp, M_KQUEUE); free(kn->kn_ptr.p_nexttime, M_KQUEUE); old = atomic_fetch_sub_explicit(&kq_ncallouts, 1, memory_order_relaxed); KASSERT(old > 0, ("Number of callouts cannot become negative")); kn->kn_status |= KN_DETACHED; /* knlist_remove sets it */ } static int filt_timer(struct knote *kn, long hint) { return (kn->kn_data != 0); } static int filt_userattach(struct knote *kn) { /* * EVFILT_USER knotes are not attached to anything in the kernel. */ kn->kn_hook = NULL; if (kn->kn_fflags & NOTE_TRIGGER) kn->kn_hookid = 1; else kn->kn_hookid = 0; return (0); } static void filt_userdetach(__unused struct knote *kn) { /* * EVFILT_USER knotes are not attached to anything in the kernel. */ } static int filt_user(struct knote *kn, __unused long hint) { return (kn->kn_hookid); } static void filt_usertouch(struct knote *kn, struct kevent *kev, u_long type) { u_int ffctrl; switch (type) { case EVENT_REGISTER: if (kev->fflags & NOTE_TRIGGER) kn->kn_hookid = 1; ffctrl = kev->fflags & NOTE_FFCTRLMASK; kev->fflags &= NOTE_FFLAGSMASK; switch (ffctrl) { case NOTE_FFNOP: break; case NOTE_FFAND: kn->kn_sfflags &= kev->fflags; break; case NOTE_FFOR: kn->kn_sfflags |= kev->fflags; break; case NOTE_FFCOPY: kn->kn_sfflags = kev->fflags; break; default: /* XXX Return error? */ break; } kn->kn_sdata = kev->data; if (kev->flags & EV_CLEAR) { kn->kn_hookid = 0; kn->kn_data = 0; kn->kn_fflags = 0; } break; case EVENT_PROCESS: *kev = kn->kn_kevent; kev->fflags = kn->kn_sfflags; kev->data = kn->kn_sdata; if (kn->kn_flags & EV_CLEAR) { kn->kn_hookid = 0; kn->kn_data = 0; kn->kn_fflags = 0; } break; default: panic("filt_usertouch() - invalid type (%ld)", type); break; } } int sys_kqueue(struct thread *td, struct kqueue_args *uap) { return (kern_kqueue(td, 0, NULL)); } static void kqueue_init(struct kqueue *kq) { mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF | MTX_DUPOK); TAILQ_INIT(&kq->kq_head); knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock); TASK_INIT(&kq->kq_task, 0, kqueue_task, kq); } int kern_kqueue(struct thread *td, int flags, struct filecaps *fcaps) { struct filedesc *fdp; struct kqueue *kq; struct file *fp; struct ucred *cred; int fd, error; fdp = td->td_proc->p_fd; cred = td->td_ucred; if (!chgkqcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_KQUEUES))) return (ENOMEM); error = falloc_caps(td, &fp, &fd, flags, fcaps); if (error != 0) { chgkqcnt(cred->cr_ruidinfo, -1, 0); return (error); } /* An extra reference on `fp' has been held for us by falloc(). */ kq = malloc(sizeof *kq, M_KQUEUE, M_WAITOK | M_ZERO); kqueue_init(kq); kq->kq_fdp = fdp; kq->kq_cred = crhold(cred); FILEDESC_XLOCK(fdp); TAILQ_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list); FILEDESC_XUNLOCK(fdp); finit(fp, FREAD | FWRITE, DTYPE_KQUEUE, kq, &kqueueops); fdrop(fp, td); td->td_retval[0] = fd; return (0); } #ifndef _SYS_SYSPROTO_H_ struct kevent_args { int fd; const struct kevent *changelist; int nchanges; struct kevent *eventlist; int nevents; const struct timespec *timeout; }; #endif int sys_kevent(struct thread *td, struct kevent_args *uap) { struct timespec ts, *tsp; struct kevent_copyops k_ops = { uap, kevent_copyout, kevent_copyin}; int error; #ifdef KTRACE struct uio ktruio; struct iovec ktriov; struct uio *ktruioin = NULL; struct uio *ktruioout = NULL; #endif if (uap->timeout != NULL) { error = copyin(uap->timeout, &ts, sizeof(ts)); if (error) return (error); tsp = &ts; } else tsp = NULL; #ifdef KTRACE if (KTRPOINT(td, KTR_GENIO)) { ktriov.iov_base = uap->changelist; ktriov.iov_len = uap->nchanges * sizeof(struct kevent); ktruio = (struct uio){ .uio_iov = &ktriov, .uio_iovcnt = 1, .uio_segflg = UIO_USERSPACE, .uio_rw = UIO_READ, .uio_td = td }; ktruioin = cloneuio(&ktruio); ktriov.iov_base = uap->eventlist; ktriov.iov_len = uap->nevents * sizeof(struct kevent); ktruioout = cloneuio(&ktruio); } #endif error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents, &k_ops, tsp); #ifdef KTRACE if (ktruioin != NULL) { ktruioin->uio_resid = uap->nchanges * sizeof(struct kevent); ktrgenio(uap->fd, UIO_WRITE, ktruioin, 0); ktruioout->uio_resid = td->td_retval[0] * sizeof(struct kevent); ktrgenio(uap->fd, UIO_READ, ktruioout, error); } #endif return (error); } /* * Copy 'count' items into the destination list pointed to by uap->eventlist. */ static int kevent_copyout(void *arg, struct kevent *kevp, int count) { struct kevent_args *uap; int error; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct kevent_args *)arg; error = copyout(kevp, uap->eventlist, count * sizeof *kevp); if (error == 0) uap->eventlist += count; return (error); } /* * Copy 'count' items from the list pointed to by uap->changelist. */ static int kevent_copyin(void *arg, struct kevent *kevp, int count) { struct kevent_args *uap; int error; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct kevent_args *)arg; error = copyin(uap->changelist, kevp, count * sizeof *kevp); if (error == 0) uap->changelist += count; return (error); } int kern_kevent(struct thread *td, int fd, int nchanges, int nevents, struct kevent_copyops *k_ops, const struct timespec *timeout) { cap_rights_t rights; struct file *fp; int error; cap_rights_init(&rights); if (nchanges > 0) cap_rights_set(&rights, CAP_KQUEUE_CHANGE); if (nevents > 0) cap_rights_set(&rights, CAP_KQUEUE_EVENT); error = fget(td, fd, &rights, &fp); if (error != 0) return (error); error = kern_kevent_fp(td, fp, nchanges, nevents, k_ops, timeout); fdrop(fp, td); return (error); } static int kqueue_kevent(struct kqueue *kq, struct thread *td, int nchanges, int nevents, struct kevent_copyops *k_ops, const struct timespec *timeout) { struct kevent keva[KQ_NEVENTS]; struct kevent *kevp, *changes; int i, n, nerrors, error; nerrors = 0; while (nchanges > 0) { n = nchanges > KQ_NEVENTS ? KQ_NEVENTS : nchanges; error = k_ops->k_copyin(k_ops->arg, keva, n); if (error) return (error); changes = keva; for (i = 0; i < n; i++) { kevp = &changes[i]; if (!kevp->filter) continue; kevp->flags &= ~EV_SYSFLAGS; error = kqueue_register(kq, kevp, td, 1); if (error || (kevp->flags & EV_RECEIPT)) { if (nevents == 0) return (error); kevp->flags = EV_ERROR; kevp->data = error; (void)k_ops->k_copyout(k_ops->arg, kevp, 1); nevents--; nerrors++; } } nchanges -= n; } if (nerrors) { td->td_retval[0] = nerrors; return (0); } return (kqueue_scan(kq, nevents, k_ops, timeout, keva, td)); } int kern_kevent_fp(struct thread *td, struct file *fp, int nchanges, int nevents, struct kevent_copyops *k_ops, const struct timespec *timeout) { struct kqueue *kq; int error; error = kqueue_acquire(fp, &kq); if (error != 0) return (error); error = kqueue_kevent(kq, td, nchanges, nevents, k_ops, timeout); kqueue_release(kq, 0); return (error); } /* * Performs a kevent() call on a temporarily created kqueue. This can be * used to perform one-shot polling, similar to poll() and select(). */ int kern_kevent_anonymous(struct thread *td, int nevents, struct kevent_copyops *k_ops) { struct kqueue kq = {}; int error; kqueue_init(&kq); kq.kq_refcnt = 1; error = kqueue_kevent(&kq, td, nevents, nevents, k_ops, NULL); kqueue_drain(&kq, td); kqueue_destroy(&kq); return (error); } int kqueue_add_filteropts(int filt, struct filterops *filtops) { int error; error = 0; if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) { printf( "trying to add a filterop that is out of range: %d is beyond %d\n", ~filt, EVFILT_SYSCOUNT); return EINVAL; } mtx_lock(&filterops_lock); if (sysfilt_ops[~filt].for_fop != &null_filtops && sysfilt_ops[~filt].for_fop != NULL) error = EEXIST; else { sysfilt_ops[~filt].for_fop = filtops; sysfilt_ops[~filt].for_refcnt = 0; } mtx_unlock(&filterops_lock); return (error); } int kqueue_del_filteropts(int filt) { int error; error = 0; if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) return EINVAL; mtx_lock(&filterops_lock); if (sysfilt_ops[~filt].for_fop == &null_filtops || sysfilt_ops[~filt].for_fop == NULL) error = EINVAL; else if (sysfilt_ops[~filt].for_refcnt != 0) error = EBUSY; else { sysfilt_ops[~filt].for_fop = &null_filtops; sysfilt_ops[~filt].for_refcnt = 0; } mtx_unlock(&filterops_lock); return error; } static struct filterops * kqueue_fo_find(int filt) { if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) return NULL; if (sysfilt_ops[~filt].for_nolock) return sysfilt_ops[~filt].for_fop; mtx_lock(&filterops_lock); sysfilt_ops[~filt].for_refcnt++; if (sysfilt_ops[~filt].for_fop == NULL) sysfilt_ops[~filt].for_fop = &null_filtops; mtx_unlock(&filterops_lock); return sysfilt_ops[~filt].for_fop; } static void kqueue_fo_release(int filt) { if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) return; if (sysfilt_ops[~filt].for_nolock) return; mtx_lock(&filterops_lock); KASSERT(sysfilt_ops[~filt].for_refcnt > 0, ("filter object refcount not valid on release")); sysfilt_ops[~filt].for_refcnt--; mtx_unlock(&filterops_lock); } /* * A ref to kq (obtained via kqueue_acquire) must be held. waitok will * influence if memory allocation should wait. Make sure it is 0 if you * hold any mutexes. */ static int kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td, int waitok) { struct filterops *fops; struct file *fp; struct knote *kn, *tkn; + struct knlist *knl; cap_rights_t rights; int error, filt, event; int haskqglobal, filedesc_unlock; if ((kev->flags & (EV_ENABLE | EV_DISABLE)) == (EV_ENABLE | EV_DISABLE)) return (EINVAL); fp = NULL; kn = NULL; + knl = NULL; error = 0; haskqglobal = 0; filedesc_unlock = 0; filt = kev->filter; fops = kqueue_fo_find(filt); if (fops == NULL) return EINVAL; if (kev->flags & EV_ADD) { /* * Prevent waiting with locks. Non-sleepable * allocation failures are handled in the loop, only * if the spare knote appears to be actually required. */ tkn = knote_alloc(waitok); } else { tkn = NULL; } findkn: if (fops->f_isfd) { KASSERT(td != NULL, ("td is NULL")); error = fget(td, kev->ident, cap_rights_init(&rights, CAP_EVENT), &fp); if (error) goto done; if ((kev->flags & EV_ADD) == EV_ADD && kqueue_expand(kq, fops, kev->ident, 0) != 0) { /* try again */ fdrop(fp, td); fp = NULL; error = kqueue_expand(kq, fops, kev->ident, waitok); if (error) goto done; goto findkn; } if (fp->f_type == DTYPE_KQUEUE) { /* * If we add some intelligence about what we are doing, * we should be able to support events on ourselves. * We need to know when we are doing this to prevent * getting both the knlist lock and the kq lock since * they are the same thing. */ if (fp->f_data == kq) { error = EINVAL; goto done; } /* * Pre-lock the filedesc before the global * lock mutex, see the comment in * kqueue_close(). */ FILEDESC_XLOCK(td->td_proc->p_fd); filedesc_unlock = 1; KQ_GLOBAL_LOCK(&kq_global, haskqglobal); } KQ_LOCK(kq); if (kev->ident < kq->kq_knlistsize) { SLIST_FOREACH(kn, &kq->kq_knlist[kev->ident], kn_link) if (kev->filter == kn->kn_filter) break; } } else { if ((kev->flags & EV_ADD) == EV_ADD) kqueue_expand(kq, fops, kev->ident, waitok); KQ_LOCK(kq); /* * If possible, find an existing knote to use for this kevent. */ if (kev->filter == EVFILT_PROC && (kev->flags & (EV_FLAG1 | EV_FLAG2)) != 0) { /* This is an internal creation of a process tracking * note. Don't attempt to coalesce this with an * existing note. */ ; } else if (kq->kq_knhashmask != 0) { struct klist *list; list = &kq->kq_knhash[ KN_HASH((u_long)kev->ident, kq->kq_knhashmask)]; SLIST_FOREACH(kn, list, kn_link) if (kev->ident == kn->kn_id && kev->filter == kn->kn_filter) break; } } /* knote is in the process of changing, wait for it to stabilize. */ if (kn != NULL && (kn->kn_status & KN_INFLUX) == KN_INFLUX) { KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); if (filedesc_unlock) { FILEDESC_XUNLOCK(td->td_proc->p_fd); filedesc_unlock = 0; } kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqflxwt", 0); if (fp != NULL) { fdrop(fp, td); fp = NULL; } goto findkn; } /* * kn now contains the matching knote, or NULL if no match */ if (kn == NULL) { if (kev->flags & EV_ADD) { kn = tkn; tkn = NULL; if (kn == NULL) { KQ_UNLOCK(kq); error = ENOMEM; goto done; } kn->kn_fp = fp; kn->kn_kq = kq; kn->kn_fop = fops; /* * apply reference counts to knote structure, and * do not release it at the end of this routine. */ fops = NULL; fp = NULL; kn->kn_sfflags = kev->fflags; kn->kn_sdata = kev->data; kev->fflags = 0; kev->data = 0; kn->kn_kevent = *kev; kn->kn_kevent.flags &= ~(EV_ADD | EV_DELETE | EV_ENABLE | EV_DISABLE | EV_FORCEONESHOT); kn->kn_status = KN_INFLUX|KN_DETACHED; error = knote_attach(kn, kq); KQ_UNLOCK(kq); if (error != 0) { tkn = kn; goto done; } if ((error = kn->kn_fop->f_attach(kn)) != 0) { knote_drop(kn, td); goto done; } - KN_LIST_LOCK(kn); + knl = kn_list_lock(kn); goto done_ev_add; } else { /* No matching knote and the EV_ADD flag is not set. */ KQ_UNLOCK(kq); error = ENOENT; goto done; } } if (kev->flags & EV_DELETE) { kn->kn_status |= KN_INFLUX; KQ_UNLOCK(kq); if (!(kn->kn_status & KN_DETACHED)) kn->kn_fop->f_detach(kn); knote_drop(kn, td); goto done; } if (kev->flags & EV_FORCEONESHOT) { kn->kn_flags |= EV_ONESHOT; KNOTE_ACTIVATE(kn, 1); } /* * The user may change some filter values after the initial EV_ADD, * but doing so will not reset any filter which has already been * triggered. */ kn->kn_status |= KN_INFLUX | KN_SCAN; KQ_UNLOCK(kq); - KN_LIST_LOCK(kn); + knl = kn_list_lock(kn); kn->kn_kevent.udata = kev->udata; if (!fops->f_isfd && fops->f_touch != NULL) { fops->f_touch(kn, kev, EVENT_REGISTER); } else { kn->kn_sfflags = kev->fflags; kn->kn_sdata = kev->data; } /* * We can get here with kn->kn_knlist == NULL. This can happen when * the initial attach event decides that the event is "completed" * already. i.e. filt_procattach is called on a zombie process. It * will call filt_proc which will remove it from the list, and NULL * kn_knlist. */ done_ev_add: if ((kev->flags & EV_ENABLE) != 0) kn->kn_status &= ~KN_DISABLED; else if ((kev->flags & EV_DISABLE) != 0) kn->kn_status |= KN_DISABLED; if ((kn->kn_status & KN_DISABLED) == 0) event = kn->kn_fop->f_event(kn, 0); else event = 0; KQ_LOCK(kq); if (event) kn->kn_status |= KN_ACTIVE; if ((kn->kn_status & (KN_ACTIVE | KN_DISABLED | KN_QUEUED)) == KN_ACTIVE) knote_enqueue(kn); kn->kn_status &= ~(KN_INFLUX | KN_SCAN); - KN_LIST_UNLOCK(kn); + kn_list_unlock(knl); KQ_UNLOCK_FLUX(kq); done: KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); if (filedesc_unlock) FILEDESC_XUNLOCK(td->td_proc->p_fd); if (fp != NULL) fdrop(fp, td); knote_free(tkn); if (fops != NULL) kqueue_fo_release(filt); return (error); } static int kqueue_acquire(struct file *fp, struct kqueue **kqp) { int error; struct kqueue *kq; error = 0; kq = fp->f_data; if (fp->f_type != DTYPE_KQUEUE || kq == NULL) return (EBADF); *kqp = kq; KQ_LOCK(kq); if ((kq->kq_state & KQ_CLOSING) == KQ_CLOSING) { KQ_UNLOCK(kq); return (EBADF); } kq->kq_refcnt++; KQ_UNLOCK(kq); return error; } static void kqueue_release(struct kqueue *kq, int locked) { if (locked) KQ_OWNED(kq); else KQ_LOCK(kq); kq->kq_refcnt--; if (kq->kq_refcnt == 1) wakeup(&kq->kq_refcnt); if (!locked) KQ_UNLOCK(kq); } static void kqueue_schedtask(struct kqueue *kq) { KQ_OWNED(kq); KASSERT(((kq->kq_state & KQ_TASKDRAIN) != KQ_TASKDRAIN), ("scheduling kqueue task while draining")); if ((kq->kq_state & KQ_TASKSCHED) != KQ_TASKSCHED) { taskqueue_enqueue(taskqueue_kqueue_ctx, &kq->kq_task); kq->kq_state |= KQ_TASKSCHED; } } /* * Expand the kq to make sure we have storage for fops/ident pair. * * Return 0 on success (or no work necessary), return errno on failure. * * Not calling hashinit w/ waitok (proper malloc flag) should be safe. * If kqueue_register is called from a non-fd context, there usually/should * be no locks held. */ static int kqueue_expand(struct kqueue *kq, struct filterops *fops, uintptr_t ident, int waitok) { struct klist *list, *tmp_knhash, *to_free; u_long tmp_knhashmask; int size; int fd; int mflag = waitok ? M_WAITOK : M_NOWAIT; KQ_NOTOWNED(kq); to_free = NULL; if (fops->f_isfd) { fd = ident; if (kq->kq_knlistsize <= fd) { size = kq->kq_knlistsize; while (size <= fd) size += KQEXTENT; list = malloc(size * sizeof(*list), M_KQUEUE, mflag); if (list == NULL) return ENOMEM; KQ_LOCK(kq); if (kq->kq_knlistsize > fd) { to_free = list; list = NULL; } else { if (kq->kq_knlist != NULL) { bcopy(kq->kq_knlist, list, kq->kq_knlistsize * sizeof(*list)); to_free = kq->kq_knlist; kq->kq_knlist = NULL; } bzero((caddr_t)list + kq->kq_knlistsize * sizeof(*list), (size - kq->kq_knlistsize) * sizeof(*list)); kq->kq_knlistsize = size; kq->kq_knlist = list; } KQ_UNLOCK(kq); } } else { if (kq->kq_knhashmask == 0) { tmp_knhash = hashinit(KN_HASHSIZE, M_KQUEUE, &tmp_knhashmask); if (tmp_knhash == NULL) return ENOMEM; KQ_LOCK(kq); if (kq->kq_knhashmask == 0) { kq->kq_knhash = tmp_knhash; kq->kq_knhashmask = tmp_knhashmask; } else { to_free = tmp_knhash; } KQ_UNLOCK(kq); } } free(to_free, M_KQUEUE); KQ_NOTOWNED(kq); return 0; } static void kqueue_task(void *arg, int pending) { struct kqueue *kq; int haskqglobal; haskqglobal = 0; kq = arg; KQ_GLOBAL_LOCK(&kq_global, haskqglobal); KQ_LOCK(kq); KNOTE_LOCKED(&kq->kq_sel.si_note, 0); kq->kq_state &= ~KQ_TASKSCHED; if ((kq->kq_state & KQ_TASKDRAIN) == KQ_TASKDRAIN) { wakeup(&kq->kq_state); } KQ_UNLOCK(kq); KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); } /* * Scan, update kn_data (if not ONESHOT), and copyout triggered events. * We treat KN_MARKER knotes as if they are INFLUX. */ static int kqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops, const struct timespec *tsp, struct kevent *keva, struct thread *td) { struct kevent *kevp; struct knote *kn, *marker; + struct knlist *knl; sbintime_t asbt, rsbt; int count, error, haskqglobal, influx, nkev, touch; count = maxevents; nkev = 0; error = 0; haskqglobal = 0; if (maxevents == 0) goto done_nl; rsbt = 0; if (tsp != NULL) { if (tsp->tv_sec < 0 || tsp->tv_nsec < 0 || tsp->tv_nsec >= 1000000000) { error = EINVAL; goto done_nl; } if (timespecisset(tsp)) { if (tsp->tv_sec <= INT32_MAX) { rsbt = tstosbt(*tsp); if (TIMESEL(&asbt, rsbt)) asbt += tc_tick_sbt; if (asbt <= SBT_MAX - rsbt) asbt += rsbt; else asbt = 0; rsbt >>= tc_precexp; } else asbt = 0; } else asbt = -1; } else asbt = 0; marker = knote_alloc(1); marker->kn_status = KN_MARKER; KQ_LOCK(kq); retry: kevp = keva; if (kq->kq_count == 0) { if (asbt == -1) { error = EWOULDBLOCK; } else { kq->kq_state |= KQ_SLEEP; error = msleep_sbt(kq, &kq->kq_lock, PSOCK | PCATCH, "kqread", asbt, rsbt, C_ABSOLUTE); } if (error == 0) goto retry; /* don't restart after signals... */ if (error == ERESTART) error = EINTR; else if (error == EWOULDBLOCK) error = 0; goto done; } TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe); influx = 0; while (count) { KQ_OWNED(kq); kn = TAILQ_FIRST(&kq->kq_head); if ((kn->kn_status == KN_MARKER && kn != marker) || (kn->kn_status & KN_INFLUX) == KN_INFLUX) { if (influx) { influx = 0; KQ_FLUX_WAKEUP(kq); } kq->kq_state |= KQ_FLUXWAIT; error = msleep(kq, &kq->kq_lock, PSOCK, "kqflxwt", 0); continue; } TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); if ((kn->kn_status & KN_DISABLED) == KN_DISABLED) { kn->kn_status &= ~KN_QUEUED; kq->kq_count--; continue; } if (kn == marker) { KQ_FLUX_WAKEUP(kq); if (count == maxevents) goto retry; goto done; } KASSERT((kn->kn_status & KN_INFLUX) == 0, ("KN_INFLUX set when not suppose to be")); if ((kn->kn_flags & EV_DROP) == EV_DROP) { kn->kn_status &= ~KN_QUEUED; kn->kn_status |= KN_INFLUX; kq->kq_count--; KQ_UNLOCK(kq); /* * We don't need to lock the list since we've marked * it _INFLUX. */ if (!(kn->kn_status & KN_DETACHED)) kn->kn_fop->f_detach(kn); knote_drop(kn, td); KQ_LOCK(kq); continue; } else if ((kn->kn_flags & EV_ONESHOT) == EV_ONESHOT) { kn->kn_status &= ~KN_QUEUED; kn->kn_status |= KN_INFLUX; kq->kq_count--; KQ_UNLOCK(kq); /* * We don't need to lock the list since we've marked * it _INFLUX. */ *kevp = kn->kn_kevent; if (!(kn->kn_status & KN_DETACHED)) kn->kn_fop->f_detach(kn); knote_drop(kn, td); KQ_LOCK(kq); kn = NULL; } else { kn->kn_status |= KN_INFLUX | KN_SCAN; KQ_UNLOCK(kq); if ((kn->kn_status & KN_KQUEUE) == KN_KQUEUE) KQ_GLOBAL_LOCK(&kq_global, haskqglobal); - KN_LIST_LOCK(kn); + knl = kn_list_lock(kn); if (kn->kn_fop->f_event(kn, 0) == 0) { KQ_LOCK(kq); KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE | KN_INFLUX | KN_SCAN); kq->kq_count--; - KN_LIST_UNLOCK(kn); + kn_list_unlock(knl); influx = 1; continue; } touch = (!kn->kn_fop->f_isfd && kn->kn_fop->f_touch != NULL); if (touch) kn->kn_fop->f_touch(kn, kevp, EVENT_PROCESS); else *kevp = kn->kn_kevent; KQ_LOCK(kq); KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); if (kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) { /* * Manually clear knotes who weren't * 'touch'ed. */ if (touch == 0 && kn->kn_flags & EV_CLEAR) { kn->kn_data = 0; kn->kn_fflags = 0; } if (kn->kn_flags & EV_DISPATCH) kn->kn_status |= KN_DISABLED; kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE); kq->kq_count--; } else TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); kn->kn_status &= ~(KN_INFLUX | KN_SCAN); - KN_LIST_UNLOCK(kn); + kn_list_unlock(knl); influx = 1; } /* we are returning a copy to the user */ kevp++; nkev++; count--; if (nkev == KQ_NEVENTS) { influx = 0; KQ_UNLOCK_FLUX(kq); error = k_ops->k_copyout(k_ops->arg, keva, nkev); nkev = 0; kevp = keva; KQ_LOCK(kq); if (error) break; } } TAILQ_REMOVE(&kq->kq_head, marker, kn_tqe); done: KQ_OWNED(kq); KQ_UNLOCK_FLUX(kq); knote_free(marker); done_nl: KQ_NOTOWNED(kq); if (nkev != 0) error = k_ops->k_copyout(k_ops->arg, keva, nkev); td->td_retval[0] = maxevents - count; return (error); } /*ARGSUSED*/ static int kqueue_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred, struct thread *td) { /* * Enabling sigio causes two major problems: * 1) infinite recursion: * Synopsys: kevent is being used to track signals and have FIOASYNC * set. On receipt of a signal this will cause a kqueue to recurse * into itself over and over. Sending the sigio causes the kqueue * to become ready, which in turn posts sigio again, forever. * Solution: this can be solved by setting a flag in the kqueue that * we have a SIGIO in progress. * 2) locking problems: * Synopsys: Kqueue is a leaf subsystem, but adding signalling puts * us above the proc and pgrp locks. * Solution: Post a signal using an async mechanism, being sure to * record a generation count in the delivery so that we do not deliver * a signal to the wrong process. * * Note, these two mechanisms are somewhat mutually exclusive! */ #if 0 struct kqueue *kq; kq = fp->f_data; switch (cmd) { case FIOASYNC: if (*(int *)data) { kq->kq_state |= KQ_ASYNC; } else { kq->kq_state &= ~KQ_ASYNC; } return (0); case FIOSETOWN: return (fsetown(*(int *)data, &kq->kq_sigio)); case FIOGETOWN: *(int *)data = fgetown(&kq->kq_sigio); return (0); } #endif return (ENOTTY); } /*ARGSUSED*/ static int kqueue_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td) { struct kqueue *kq; int revents = 0; int error; if ((error = kqueue_acquire(fp, &kq))) return POLLERR; KQ_LOCK(kq); if (events & (POLLIN | POLLRDNORM)) { if (kq->kq_count) { revents |= events & (POLLIN | POLLRDNORM); } else { selrecord(td, &kq->kq_sel); if (SEL_WAITING(&kq->kq_sel)) kq->kq_state |= KQ_SEL; } } kqueue_release(kq, 1); KQ_UNLOCK(kq); return (revents); } /*ARGSUSED*/ static int kqueue_stat(struct file *fp, struct stat *st, struct ucred *active_cred, struct thread *td) { bzero((void *)st, sizeof *st); /* * We no longer return kq_count because the unlocked value is useless. * If you spent all this time getting the count, why not spend your * syscall better by calling kevent? * * XXX - This is needed for libc_r. */ st->st_mode = S_IFIFO; return (0); } static void kqueue_drain(struct kqueue *kq, struct thread *td) { struct knote *kn; int i; KQ_LOCK(kq); KASSERT((kq->kq_state & KQ_CLOSING) != KQ_CLOSING, ("kqueue already closing")); kq->kq_state |= KQ_CLOSING; if (kq->kq_refcnt > 1) msleep(&kq->kq_refcnt, &kq->kq_lock, PSOCK, "kqclose", 0); KASSERT(kq->kq_refcnt == 1, ("other refs are out there!")); KASSERT(knlist_empty(&kq->kq_sel.si_note), ("kqueue's knlist not empty")); for (i = 0; i < kq->kq_knlistsize; i++) { while ((kn = SLIST_FIRST(&kq->kq_knlist[i])) != NULL) { if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) { kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK, "kqclo1", 0); continue; } kn->kn_status |= KN_INFLUX; KQ_UNLOCK(kq); if (!(kn->kn_status & KN_DETACHED)) kn->kn_fop->f_detach(kn); knote_drop(kn, td); KQ_LOCK(kq); } } if (kq->kq_knhashmask != 0) { for (i = 0; i <= kq->kq_knhashmask; i++) { while ((kn = SLIST_FIRST(&kq->kq_knhash[i])) != NULL) { if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) { kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK, "kqclo2", 0); continue; } kn->kn_status |= KN_INFLUX; KQ_UNLOCK(kq); if (!(kn->kn_status & KN_DETACHED)) kn->kn_fop->f_detach(kn); knote_drop(kn, td); KQ_LOCK(kq); } } } if ((kq->kq_state & KQ_TASKSCHED) == KQ_TASKSCHED) { kq->kq_state |= KQ_TASKDRAIN; msleep(&kq->kq_state, &kq->kq_lock, PSOCK, "kqtqdr", 0); } if ((kq->kq_state & KQ_SEL) == KQ_SEL) { selwakeuppri(&kq->kq_sel, PSOCK); if (!SEL_WAITING(&kq->kq_sel)) kq->kq_state &= ~KQ_SEL; } KQ_UNLOCK(kq); } static void kqueue_destroy(struct kqueue *kq) { KASSERT(kq->kq_fdp == NULL, ("kqueue still attached to a file descriptor")); seldrain(&kq->kq_sel); knlist_destroy(&kq->kq_sel.si_note); mtx_destroy(&kq->kq_lock); if (kq->kq_knhash != NULL) free(kq->kq_knhash, M_KQUEUE); if (kq->kq_knlist != NULL) free(kq->kq_knlist, M_KQUEUE); funsetown(&kq->kq_sigio); } /*ARGSUSED*/ static int kqueue_close(struct file *fp, struct thread *td) { struct kqueue *kq = fp->f_data; struct filedesc *fdp; int error; int filedesc_unlock; if ((error = kqueue_acquire(fp, &kq))) return error; kqueue_drain(kq, td); /* * We could be called due to the knote_drop() doing fdrop(), * called from kqueue_register(). In this case the global * lock is owned, and filedesc sx is locked before, to not * take the sleepable lock after non-sleepable. */ fdp = kq->kq_fdp; kq->kq_fdp = NULL; if (!sx_xlocked(FILEDESC_LOCK(fdp))) { FILEDESC_XLOCK(fdp); filedesc_unlock = 1; } else filedesc_unlock = 0; TAILQ_REMOVE(&fdp->fd_kqlist, kq, kq_list); if (filedesc_unlock) FILEDESC_XUNLOCK(fdp); kqueue_destroy(kq); chgkqcnt(kq->kq_cred->cr_ruidinfo, -1, 0); crfree(kq->kq_cred); free(kq, M_KQUEUE); fp->f_data = NULL; return (0); } static int kqueue_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) { kif->kf_type = KF_TYPE_KQUEUE; return (0); } static void kqueue_wakeup(struct kqueue *kq) { KQ_OWNED(kq); if ((kq->kq_state & KQ_SLEEP) == KQ_SLEEP) { kq->kq_state &= ~KQ_SLEEP; wakeup(kq); } if ((kq->kq_state & KQ_SEL) == KQ_SEL) { selwakeuppri(&kq->kq_sel, PSOCK); if (!SEL_WAITING(&kq->kq_sel)) kq->kq_state &= ~KQ_SEL; } if (!knlist_empty(&kq->kq_sel.si_note)) kqueue_schedtask(kq); if ((kq->kq_state & KQ_ASYNC) == KQ_ASYNC) { pgsigio(&kq->kq_sigio, SIGIO, 0); } } /* * Walk down a list of knotes, activating them if their event has triggered. * * There is a possibility to optimize in the case of one kq watching another. * Instead of scheduling a task to wake it up, you could pass enough state * down the chain to make up the parent kqueue. Make this code functional * first. */ void knote(struct knlist *list, long hint, int lockflags) { struct kqueue *kq; struct knote *kn, *tkn; int error; if (list == NULL) return; KNL_ASSERT_LOCK(list, lockflags & KNF_LISTLOCKED); if ((lockflags & KNF_LISTLOCKED) == 0) list->kl_lock(list->kl_lockarg); /* * If we unlock the list lock (and set KN_INFLUX), we can * eliminate the kqueue scheduling, but this will introduce * four lock/unlock's for each knote to test. Also, marker * would be needed to keep iteration position, since filters * or other threads could remove events. */ SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, tkn) { kq = kn->kn_kq; KQ_LOCK(kq); if ((kn->kn_status & (KN_INFLUX | KN_SCAN)) == KN_INFLUX) { /* * Do not process the influx notes, except for * the influx coming from the kq unlock in the * kqueue_scan(). In the later case, we do * not interfere with the scan, since the code * fragment in kqueue_scan() locks the knlist, * and cannot proceed until we finished. */ KQ_UNLOCK(kq); } else if ((lockflags & KNF_NOKQLOCK) != 0) { kn->kn_status |= KN_INFLUX; KQ_UNLOCK(kq); error = kn->kn_fop->f_event(kn, hint); KQ_LOCK(kq); kn->kn_status &= ~KN_INFLUX; if (error) KNOTE_ACTIVATE(kn, 1); KQ_UNLOCK_FLUX(kq); } else { kn->kn_status |= KN_HASKQLOCK; if (kn->kn_fop->f_event(kn, hint)) KNOTE_ACTIVATE(kn, 1); kn->kn_status &= ~KN_HASKQLOCK; KQ_UNLOCK(kq); } } if ((lockflags & KNF_LISTLOCKED) == 0) list->kl_unlock(list->kl_lockarg); } /* * add a knote to a knlist */ void knlist_add(struct knlist *knl, struct knote *kn, int islocked) { KNL_ASSERT_LOCK(knl, islocked); KQ_NOTOWNED(kn->kn_kq); KASSERT((kn->kn_status & (KN_INFLUX|KN_DETACHED)) == (KN_INFLUX|KN_DETACHED), ("knote not KN_INFLUX and KN_DETACHED")); if (!islocked) knl->kl_lock(knl->kl_lockarg); SLIST_INSERT_HEAD(&knl->kl_list, kn, kn_selnext); if (!islocked) knl->kl_unlock(knl->kl_lockarg); KQ_LOCK(kn->kn_kq); kn->kn_knlist = knl; kn->kn_status &= ~KN_DETACHED; KQ_UNLOCK(kn->kn_kq); } static void -knlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked, int kqislocked) +knlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked, + int kqislocked) { KASSERT(!(!!kqislocked && !knlislocked), ("kq locked w/o knl locked")); KNL_ASSERT_LOCK(knl, knlislocked); mtx_assert(&kn->kn_kq->kq_lock, kqislocked ? MA_OWNED : MA_NOTOWNED); if (!kqislocked) KASSERT((kn->kn_status & (KN_INFLUX|KN_DETACHED)) == KN_INFLUX, ("knlist_remove called w/o knote being KN_INFLUX or already removed")); if (!knlislocked) knl->kl_lock(knl->kl_lockarg); SLIST_REMOVE(&knl->kl_list, kn, knote, kn_selnext); kn->kn_knlist = NULL; if (!knlislocked) - knl->kl_unlock(knl->kl_lockarg); + kn_list_unlock(knl); if (!kqislocked) KQ_LOCK(kn->kn_kq); kn->kn_status |= KN_DETACHED; if (!kqislocked) KQ_UNLOCK(kn->kn_kq); } /* * remove knote from the specified knlist */ void knlist_remove(struct knlist *knl, struct knote *kn, int islocked) { knlist_remove_kq(knl, kn, islocked, 0); } -/* - * remove knote from the specified knlist while in f_event handler. - */ -void -knlist_remove_inevent(struct knlist *knl, struct knote *kn) -{ - - knlist_remove_kq(knl, kn, 1, - (kn->kn_status & KN_HASKQLOCK) == KN_HASKQLOCK); -} - int knlist_empty(struct knlist *knl) { KNL_ASSERT_LOCKED(knl); return SLIST_EMPTY(&knl->kl_list); } static struct mtx knlist_lock; MTX_SYSINIT(knlist_lock, &knlist_lock, "knlist lock for lockless objects", MTX_DEF); static void knlist_mtx_lock(void *arg); static void knlist_mtx_unlock(void *arg); static void knlist_mtx_lock(void *arg) { mtx_lock((struct mtx *)arg); } static void knlist_mtx_unlock(void *arg) { mtx_unlock((struct mtx *)arg); } static void knlist_mtx_assert_locked(void *arg) { mtx_assert((struct mtx *)arg, MA_OWNED); } static void knlist_mtx_assert_unlocked(void *arg) { mtx_assert((struct mtx *)arg, MA_NOTOWNED); } static void knlist_rw_rlock(void *arg) { rw_rlock((struct rwlock *)arg); } static void knlist_rw_runlock(void *arg) { rw_runlock((struct rwlock *)arg); } static void knlist_rw_assert_locked(void *arg) { rw_assert((struct rwlock *)arg, RA_LOCKED); } static void knlist_rw_assert_unlocked(void *arg) { rw_assert((struct rwlock *)arg, RA_UNLOCKED); } void knlist_init(struct knlist *knl, void *lock, void (*kl_lock)(void *), void (*kl_unlock)(void *), void (*kl_assert_locked)(void *), void (*kl_assert_unlocked)(void *)) { if (lock == NULL) knl->kl_lockarg = &knlist_lock; else knl->kl_lockarg = lock; if (kl_lock == NULL) knl->kl_lock = knlist_mtx_lock; else knl->kl_lock = kl_lock; if (kl_unlock == NULL) knl->kl_unlock = knlist_mtx_unlock; else knl->kl_unlock = kl_unlock; if (kl_assert_locked == NULL) knl->kl_assert_locked = knlist_mtx_assert_locked; else knl->kl_assert_locked = kl_assert_locked; if (kl_assert_unlocked == NULL) knl->kl_assert_unlocked = knlist_mtx_assert_unlocked; else knl->kl_assert_unlocked = kl_assert_unlocked; + knl->kl_autodestroy = 0; SLIST_INIT(&knl->kl_list); } void knlist_init_mtx(struct knlist *knl, struct mtx *lock) { knlist_init(knl, lock, NULL, NULL, NULL, NULL); } +struct knlist * +knlist_alloc(struct mtx *lock) +{ + struct knlist *knl; + + knl = malloc(sizeof(struct knlist), M_KQUEUE, M_WAITOK); + knlist_init_mtx(knl, lock); + return (knl); +} + void knlist_init_rw_reader(struct knlist *knl, struct rwlock *lock) { knlist_init(knl, lock, knlist_rw_rlock, knlist_rw_runlock, knlist_rw_assert_locked, knlist_rw_assert_unlocked); } void knlist_destroy(struct knlist *knl) { #ifdef INVARIANTS /* * if we run across this error, we need to find the offending * driver and have it call knlist_clear or knlist_delete. */ if (!SLIST_EMPTY(&knl->kl_list)) printf("WARNING: destroying knlist w/ knotes on it!\n"); #endif knl->kl_lockarg = knl->kl_lock = knl->kl_unlock = NULL; SLIST_INIT(&knl->kl_list); } +void +knlist_detach(struct knlist *knl) +{ + + KNL_ASSERT_LOCKED(knl); + knl->kl_autodestroy = 1; + if (knlist_empty(knl)) { + knlist_destroy(knl); + free(knl, M_KQUEUE); + } +} + /* * Even if we are locked, we may need to drop the lock to allow any influx * knotes time to "settle". */ void knlist_cleardel(struct knlist *knl, struct thread *td, int islocked, int killkn) { struct knote *kn, *kn2; struct kqueue *kq; + KASSERT(!knl->kl_autodestroy, ("cleardel for autodestroy %p", knl)); if (islocked) KNL_ASSERT_LOCKED(knl); else { KNL_ASSERT_UNLOCKED(knl); again: /* need to reacquire lock since we have dropped it */ knl->kl_lock(knl->kl_lockarg); } SLIST_FOREACH_SAFE(kn, &knl->kl_list, kn_selnext, kn2) { kq = kn->kn_kq; KQ_LOCK(kq); if ((kn->kn_status & KN_INFLUX)) { KQ_UNLOCK(kq); continue; } knlist_remove_kq(knl, kn, 1, 1); if (killkn) { kn->kn_status |= KN_INFLUX | KN_DETACHED; KQ_UNLOCK(kq); knote_drop(kn, td); } else { /* Make sure cleared knotes disappear soon */ kn->kn_flags |= (EV_EOF | EV_ONESHOT); KQ_UNLOCK(kq); } kq = NULL; } if (!SLIST_EMPTY(&knl->kl_list)) { /* there are still KN_INFLUX remaining */ kn = SLIST_FIRST(&knl->kl_list); kq = kn->kn_kq; KQ_LOCK(kq); KASSERT(kn->kn_status & KN_INFLUX, ("knote removed w/o list lock")); knl->kl_unlock(knl->kl_lockarg); kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqkclr", 0); kq = NULL; goto again; } if (islocked) KNL_ASSERT_LOCKED(knl); else { knl->kl_unlock(knl->kl_lockarg); KNL_ASSERT_UNLOCKED(knl); } } /* * Remove all knotes referencing a specified fd must be called with FILEDESC * lock. This prevents a race where a new fd comes along and occupies the * entry and we attach a knote to the fd. */ void knote_fdclose(struct thread *td, int fd) { struct filedesc *fdp = td->td_proc->p_fd; struct kqueue *kq; struct knote *kn; int influx; FILEDESC_XLOCK_ASSERT(fdp); /* * We shouldn't have to worry about new kevents appearing on fd * since filedesc is locked. */ TAILQ_FOREACH(kq, &fdp->fd_kqlist, kq_list) { KQ_LOCK(kq); again: influx = 0; while (kq->kq_knlistsize > fd && (kn = SLIST_FIRST(&kq->kq_knlist[fd])) != NULL) { if (kn->kn_status & KN_INFLUX) { /* someone else might be waiting on our knote */ if (influx) wakeup(kq); kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK, "kqflxwt", 0); goto again; } kn->kn_status |= KN_INFLUX; KQ_UNLOCK(kq); if (!(kn->kn_status & KN_DETACHED)) kn->kn_fop->f_detach(kn); knote_drop(kn, td); influx = 1; KQ_LOCK(kq); } KQ_UNLOCK_FLUX(kq); } } static int knote_attach(struct knote *kn, struct kqueue *kq) { struct klist *list; KASSERT(kn->kn_status & KN_INFLUX, ("knote not marked INFLUX")); KQ_OWNED(kq); if (kn->kn_fop->f_isfd) { if (kn->kn_id >= kq->kq_knlistsize) return ENOMEM; list = &kq->kq_knlist[kn->kn_id]; } else { if (kq->kq_knhash == NULL) return ENOMEM; list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; } SLIST_INSERT_HEAD(list, kn, kn_link); return 0; } /* * knote must already have been detached using the f_detach method. * no lock need to be held, it is assumed that the KN_INFLUX flag is set * to prevent other removal. */ static void knote_drop(struct knote *kn, struct thread *td) { struct kqueue *kq; struct klist *list; kq = kn->kn_kq; KQ_NOTOWNED(kq); KASSERT((kn->kn_status & KN_INFLUX) == KN_INFLUX, ("knote_drop called without KN_INFLUX set in kn_status")); KQ_LOCK(kq); if (kn->kn_fop->f_isfd) list = &kq->kq_knlist[kn->kn_id]; else list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; if (!SLIST_EMPTY(list)) SLIST_REMOVE(list, kn, knote, kn_link); if (kn->kn_status & KN_QUEUED) knote_dequeue(kn); KQ_UNLOCK_FLUX(kq); if (kn->kn_fop->f_isfd) { fdrop(kn->kn_fp, td); kn->kn_fp = NULL; } kqueue_fo_release(kn->kn_kevent.filter); kn->kn_fop = NULL; knote_free(kn); } static void knote_enqueue(struct knote *kn) { struct kqueue *kq = kn->kn_kq; KQ_OWNED(kn->kn_kq); KASSERT((kn->kn_status & KN_QUEUED) == 0, ("knote already queued")); TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); kn->kn_status |= KN_QUEUED; kq->kq_count++; kqueue_wakeup(kq); } static void knote_dequeue(struct knote *kn) { struct kqueue *kq = kn->kn_kq; KQ_OWNED(kn->kn_kq); KASSERT(kn->kn_status & KN_QUEUED, ("knote not queued")); TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); kn->kn_status &= ~KN_QUEUED; kq->kq_count--; } static void knote_init(void) { knote_zone = uma_zcreate("KNOTE", sizeof(struct knote), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); } SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL); static struct knote * knote_alloc(int waitok) { return (uma_zalloc(knote_zone, (waitok ? M_WAITOK : M_NOWAIT) | M_ZERO)); } static void knote_free(struct knote *kn) { uma_zfree(knote_zone, kn); } /* * Register the kev w/ the kq specified by fd. */ int kqfd_register(int fd, struct kevent *kev, struct thread *td, int waitok) { struct kqueue *kq; struct file *fp; cap_rights_t rights; int error; error = fget(td, fd, cap_rights_init(&rights, CAP_KQUEUE_CHANGE), &fp); if (error != 0) return (error); if ((error = kqueue_acquire(fp, &kq)) != 0) goto noacquire; error = kqueue_register(kq, kev, td, waitok); kqueue_release(kq, 0); noacquire: fdrop(fp, td); return error; } Index: projects/vnet/sys/kern/kern_exec.c =================================================================== --- projects/vnet/sys/kern/kern_exec.c (revision 302276) +++ projects/vnet/sys/kern/kern_exec.c (revision 302277) @@ -1,1630 +1,1630 @@ /*- * Copyright (c) 1993, David Greenman * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_capsicum.h" #include "opt_hwpmc_hooks.h" #include "opt_ktrace.h" #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include #include #include #include #include #include #include #include #include #ifdef HWPMC_HOOKS #include #endif #include #include #include #ifdef KDTRACE_HOOKS #include dtrace_execexit_func_t dtrace_fasttrap_exec; #endif SDT_PROVIDER_DECLARE(proc); SDT_PROBE_DEFINE1(proc, , , exec, "char *"); SDT_PROBE_DEFINE1(proc, , , exec__failure, "int"); SDT_PROBE_DEFINE1(proc, , , exec__success, "char *"); MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments"); int coredump_pack_fileinfo = 1; SYSCTL_INT(_kern, OID_AUTO, coredump_pack_fileinfo, CTLFLAG_RWTUN, &coredump_pack_fileinfo, 0, "Enable file path packing in 'procstat -f' coredump notes"); int coredump_pack_vmmapinfo = 1; SYSCTL_INT(_kern, OID_AUTO, coredump_pack_vmmapinfo, CTLFLAG_RWTUN, &coredump_pack_vmmapinfo, 0, "Enable file path packing in 'procstat -v' coredump notes"); static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS); static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS); static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS); static int do_execve(struct thread *td, struct image_args *args, struct mac *mac_p); /* XXX This should be vm_size_t. */ SYSCTL_PROC(_kern, KERN_PS_STRINGS, ps_strings, CTLTYPE_ULONG|CTLFLAG_RD, NULL, 0, sysctl_kern_ps_strings, "LU", ""); /* XXX This should be vm_size_t. */ SYSCTL_PROC(_kern, KERN_USRSTACK, usrstack, CTLTYPE_ULONG|CTLFLAG_RD| CTLFLAG_CAPRD, NULL, 0, sysctl_kern_usrstack, "LU", ""); SYSCTL_PROC(_kern, OID_AUTO, stackprot, CTLTYPE_INT|CTLFLAG_RD, NULL, 0, sysctl_kern_stackprot, "I", ""); u_long ps_arg_cache_limit = PAGE_SIZE / 16; SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW, &ps_arg_cache_limit, 0, ""); static int disallow_high_osrel; SYSCTL_INT(_kern, OID_AUTO, disallow_high_osrel, CTLFLAG_RW, &disallow_high_osrel, 0, "Disallow execution of binaries built for higher version of the world"); static int map_at_zero = 0; SYSCTL_INT(_security_bsd, OID_AUTO, map_at_zero, CTLFLAG_RWTUN, &map_at_zero, 0, "Permit processes to map an object at virtual address 0."); static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS) { struct proc *p; int error; p = curproc; #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) { unsigned int val; val = (unsigned int)p->p_sysent->sv_psstrings; error = SYSCTL_OUT(req, &val, sizeof(val)); } else #endif error = SYSCTL_OUT(req, &p->p_sysent->sv_psstrings, sizeof(p->p_sysent->sv_psstrings)); return error; } static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS) { struct proc *p; int error; p = curproc; #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) { unsigned int val; val = (unsigned int)p->p_sysent->sv_usrstack; error = SYSCTL_OUT(req, &val, sizeof(val)); } else #endif error = SYSCTL_OUT(req, &p->p_sysent->sv_usrstack, sizeof(p->p_sysent->sv_usrstack)); return error; } static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS) { struct proc *p; p = curproc; return (SYSCTL_OUT(req, &p->p_sysent->sv_stackprot, sizeof(p->p_sysent->sv_stackprot))); } /* * Each of the items is a pointer to a `const struct execsw', hence the * double pointer here. */ static const struct execsw **execsw; #ifndef _SYS_SYSPROTO_H_ struct execve_args { char *fname; char **argv; char **envv; }; #endif int sys_execve(struct thread *td, struct execve_args *uap) { struct image_args args; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE, uap->argv, uap->envv); if (error == 0) error = kern_execve(td, &args, NULL); post_execve(td, error, oldvmspace); return (error); } #ifndef _SYS_SYSPROTO_H_ struct fexecve_args { int fd; char **argv; char **envv; } #endif int sys_fexecve(struct thread *td, struct fexecve_args *uap) { struct image_args args; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); error = exec_copyin_args(&args, NULL, UIO_SYSSPACE, uap->argv, uap->envv); if (error == 0) { args.fd = uap->fd; error = kern_execve(td, &args, NULL); } post_execve(td, error, oldvmspace); return (error); } #ifndef _SYS_SYSPROTO_H_ struct __mac_execve_args { char *fname; char **argv; char **envv; struct mac *mac_p; }; #endif int sys___mac_execve(struct thread *td, struct __mac_execve_args *uap) { #ifdef MAC struct image_args args; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE, uap->argv, uap->envv); if (error == 0) error = kern_execve(td, &args, uap->mac_p); post_execve(td, error, oldvmspace); return (error); #else return (ENOSYS); #endif } int pre_execve(struct thread *td, struct vmspace **oldvmspace) { struct proc *p; int error; KASSERT(td == curthread, ("non-current thread %p", td)); error = 0; p = td->td_proc; if ((p->p_flag & P_HADTHREADS) != 0) { PROC_LOCK(p); if (thread_single(p, SINGLE_BOUNDARY) != 0) error = ERESTART; PROC_UNLOCK(p); } KASSERT(error != 0 || (td->td_pflags & TDP_EXECVMSPC) == 0, ("nested execve")); *oldvmspace = p->p_vmspace; return (error); } void post_execve(struct thread *td, int error, struct vmspace *oldvmspace) { struct proc *p; KASSERT(td == curthread, ("non-current thread %p", td)); p = td->td_proc; if ((p->p_flag & P_HADTHREADS) != 0) { PROC_LOCK(p); /* * If success, we upgrade to SINGLE_EXIT state to * force other threads to suicide. */ if (error == 0) thread_single(p, SINGLE_EXIT); else thread_single_end(p, SINGLE_BOUNDARY); PROC_UNLOCK(p); } if ((td->td_pflags & TDP_EXECVMSPC) != 0) { KASSERT(p->p_vmspace != oldvmspace, ("oldvmspace still used")); vmspace_free(oldvmspace); td->td_pflags &= ~TDP_EXECVMSPC; } } /* * XXX: kern_execve has the astonishing property of not always returning to * the caller. If sufficiently bad things happen during the call to * do_execve(), it can end up calling exit1(); as a result, callers must * avoid doing anything which they might need to undo (e.g., allocating * memory). */ int kern_execve(struct thread *td, struct image_args *args, struct mac *mac_p) { AUDIT_ARG_ARGV(args->begin_argv, args->argc, args->begin_envv - args->begin_argv); AUDIT_ARG_ENVV(args->begin_envv, args->envc, args->endp - args->begin_envv); return (do_execve(td, args, mac_p)); } /* * In-kernel implementation of execve(). All arguments are assumed to be * userspace pointers from the passed thread. */ static int do_execve(td, args, mac_p) struct thread *td; struct image_args *args; struct mac *mac_p; { struct proc *p = td->td_proc; struct nameidata nd; struct ucred *oldcred; struct uidinfo *euip = NULL; register_t *stack_base; int error, i; struct image_params image_params, *imgp; struct vattr attr; int (*img_first)(struct image_params *); struct pargs *oldargs = NULL, *newargs = NULL; struct sigacts *oldsigacts = NULL, *newsigacts = NULL; #ifdef KTRACE struct vnode *tracevp = NULL; struct ucred *tracecred = NULL; #endif struct vnode *oldtextvp = NULL, *newtextvp; cap_rights_t rights; int credential_changing; int textset; #ifdef MAC struct label *interpvplabel = NULL; int will_transition; #endif #ifdef HWPMC_HOOKS struct pmckern_procexec pe; #endif static const char fexecv_proc_title[] = "(fexecv)"; imgp = &image_params; /* * Lock the process and set the P_INEXEC flag to indicate that * it should be left alone until we're done here. This is * necessary to avoid race conditions - e.g. in ptrace() - * that might allow a local user to illicitly obtain elevated * privileges. */ PROC_LOCK(p); KASSERT((p->p_flag & P_INEXEC) == 0, ("%s(): process already has P_INEXEC flag", __func__)); p->p_flag |= P_INEXEC; PROC_UNLOCK(p); /* * Initialize part of the common data */ bzero(imgp, sizeof(*imgp)); imgp->proc = p; imgp->attr = &attr; imgp->args = args; oldcred = p->p_ucred; #ifdef MAC error = mac_execve_enter(imgp, mac_p); if (error) goto exec_fail; #endif /* * Translate the file name. namei() returns a vnode pointer * in ni_vp among other things. * * XXXAUDIT: It would be desirable to also audit the name of the * interpreter if this is an interpreted binary. */ if (args->fname != NULL) { NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | FOLLOW | SAVENAME | AUDITVNODE1, UIO_SYSSPACE, args->fname, td); } SDT_PROBE1(proc, , , exec, args->fname); interpret: if (args->fname != NULL) { #ifdef CAPABILITY_MODE /* * While capability mode can't reach this point via direct * path arguments to execve(), we also don't allow * interpreters to be used in capability mode (for now). * Catch indirect lookups and return a permissions error. */ if (IN_CAPABILITY_MODE(td)) { error = ECAPMODE; goto exec_fail; } #endif error = namei(&nd); if (error) goto exec_fail; newtextvp = nd.ni_vp; imgp->vp = newtextvp; } else { AUDIT_ARG_FD(args->fd); /* * Descriptors opened only with O_EXEC or O_RDONLY are allowed. */ error = fgetvp_exec(td, args->fd, cap_rights_init(&rights, CAP_FEXECVE), &newtextvp); if (error) goto exec_fail; vn_lock(newtextvp, LK_EXCLUSIVE | LK_RETRY); AUDIT_ARG_VNODE1(newtextvp); imgp->vp = newtextvp; } /* * Check file permissions (also 'opens' file) */ error = exec_check_permissions(imgp); if (error) goto exec_fail_dealloc; imgp->object = imgp->vp->v_object; if (imgp->object != NULL) vm_object_reference(imgp->object); /* * Set VV_TEXT now so no one can write to the executable while we're * activating it. * * Remember if this was set before and unset it in case this is not * actually an executable image. */ textset = VOP_IS_TEXT(imgp->vp); VOP_SET_TEXT(imgp->vp); error = exec_map_first_page(imgp); if (error) goto exec_fail_dealloc; imgp->proc->p_osrel = 0; /* * Implement image setuid/setgid. * * Determine new credentials before attempting image activators * so that it can be used by process_exec handlers to determine * credential/setid changes. * * Don't honor setuid/setgid if the filesystem prohibits it or if * the process is being traced. * * We disable setuid/setgid/etc in capability mode on the basis * that most setugid applications are not written with that * environment in mind, and will therefore almost certainly operate * incorrectly. In principle there's no reason that setugid * applications might not be useful in capability mode, so we may want * to reconsider this conservative design choice in the future. * * XXXMAC: For the time being, use NOSUID to also prohibit * transitions on the file system. */ credential_changing = 0; credential_changing |= (attr.va_mode & S_ISUID) && oldcred->cr_uid != attr.va_uid; credential_changing |= (attr.va_mode & S_ISGID) && oldcred->cr_gid != attr.va_gid; #ifdef MAC will_transition = mac_vnode_execve_will_transition(oldcred, imgp->vp, interpvplabel, imgp); credential_changing |= will_transition; #endif if (credential_changing && #ifdef CAPABILITY_MODE ((oldcred->cr_flags & CRED_FLAG_CAPMODE) == 0) && #endif (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 && (p->p_flag & P_TRACED) == 0) { imgp->credential_setid = true; VOP_UNLOCK(imgp->vp, 0); imgp->newcred = crdup(oldcred); if (attr.va_mode & S_ISUID) { euip = uifind(attr.va_uid); change_euid(imgp->newcred, euip); } vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); if (attr.va_mode & S_ISGID) change_egid(imgp->newcred, attr.va_gid); /* * Implement correct POSIX saved-id behavior. * * XXXMAC: Note that the current logic will save the * uid and gid if a MAC domain transition occurs, even * though maybe it shouldn't. */ change_svuid(imgp->newcred, imgp->newcred->cr_uid); change_svgid(imgp->newcred, imgp->newcred->cr_gid); } else { /* * Implement correct POSIX saved-id behavior. * * XXX: It's not clear that the existing behavior is * POSIX-compliant. A number of sources indicate that the * saved uid/gid should only be updated if the new ruid is * not equal to the old ruid, or the new euid is not equal * to the old euid and the new euid is not equal to the old * ruid. The FreeBSD code always updates the saved uid/gid. * Also, this code uses the new (replaced) euid and egid as * the source, which may or may not be the right ones to use. */ if (oldcred->cr_svuid != oldcred->cr_uid || oldcred->cr_svgid != oldcred->cr_gid) { VOP_UNLOCK(imgp->vp, 0); imgp->newcred = crdup(oldcred); vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); change_svuid(imgp->newcred, imgp->newcred->cr_uid); change_svgid(imgp->newcred, imgp->newcred->cr_gid); } } /* The new credentials are installed into the process later. */ /* * Do the best to calculate the full path to the image file. */ if (args->fname != NULL && args->fname[0] == '/') imgp->execpath = args->fname; else { VOP_UNLOCK(imgp->vp, 0); if (vn_fullpath(td, imgp->vp, &imgp->execpath, &imgp->freepath) != 0) imgp->execpath = args->fname; vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); } /* * If the current process has a special image activator it * wants to try first, call it. For example, emulating shell * scripts differently. */ error = -1; if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL) error = img_first(imgp); /* * Loop through the list of image activators, calling each one. * An activator returns -1 if there is no match, 0 on success, * and an error otherwise. */ for (i = 0; error == -1 && execsw[i]; ++i) { if (execsw[i]->ex_imgact == NULL || execsw[i]->ex_imgact == img_first) { continue; } error = (*execsw[i]->ex_imgact)(imgp); } if (error) { if (error == -1) { if (textset == 0) VOP_UNSET_TEXT(imgp->vp); error = ENOEXEC; } goto exec_fail_dealloc; } /* * Special interpreter operation, cleanup and loop up to try to * activate the interpreter. */ if (imgp->interpreted) { exec_unmap_first_page(imgp); /* * VV_TEXT needs to be unset for scripts. There is a short * period before we determine that something is a script where * VV_TEXT will be set. The vnode lock is held over this * entire period so nothing should illegitimately be blocked. */ VOP_UNSET_TEXT(imgp->vp); /* free name buffer and old vnode */ if (args->fname != NULL) NDFREE(&nd, NDF_ONLY_PNBUF); #ifdef MAC mac_execve_interpreter_enter(newtextvp, &interpvplabel); #endif if (imgp->opened) { VOP_CLOSE(newtextvp, FREAD, td->td_ucred, td); imgp->opened = 0; } vput(newtextvp); vm_object_deallocate(imgp->object); imgp->object = NULL; imgp->credential_setid = false; if (imgp->newcred != NULL) { crfree(imgp->newcred); imgp->newcred = NULL; } imgp->execpath = NULL; free(imgp->freepath, M_TEMP); imgp->freepath = NULL; /* set new name to that of the interpreter */ NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW | SAVENAME, UIO_SYSSPACE, imgp->interpreter_name, td); args->fname = imgp->interpreter_name; goto interpret; } /* * NB: We unlock the vnode here because it is believed that none * of the sv_copyout_strings/sv_fixup operations require the vnode. */ VOP_UNLOCK(imgp->vp, 0); if (disallow_high_osrel && P_OSREL_MAJOR(p->p_osrel) > P_OSREL_MAJOR(__FreeBSD_version)) { error = ENOEXEC; uprintf("Osrel %d for image %s too high\n", p->p_osrel, imgp->execpath != NULL ? imgp->execpath : ""); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); goto exec_fail_dealloc; } /* ABI enforces the use of Capsicum. Switch into capabilities mode. */ if (SV_PROC_FLAG(p, SV_CAPSICUM)) sys_cap_enter(td, NULL); /* * Copy out strings (args and env) and initialize stack base */ if (p->p_sysent->sv_copyout_strings) stack_base = (*p->p_sysent->sv_copyout_strings)(imgp); else stack_base = exec_copyout_strings(imgp); /* * If custom stack fixup routine present for this process * let it do the stack setup. * Else stuff argument count as first item on stack */ if (p->p_sysent->sv_fixup != NULL) (*p->p_sysent->sv_fixup)(&stack_base, imgp); else suword(--stack_base, imgp->args->argc); if (args->fdp != NULL) { /* Install a brand new file descriptor table. */ fdinstall_remapped(td, args->fdp); args->fdp = NULL; } else { /* * Keep on using the existing file descriptor table. For * security and other reasons, the file descriptor table * cannot be shared after an exec. */ fdunshare(td); /* close files on exec */ fdcloseexec(td); } /* * Malloc things before we need locks. */ i = imgp->args->begin_envv - imgp->args->begin_argv; /* Cache arguments if they fit inside our allowance */ if (ps_arg_cache_limit >= i + sizeof(struct pargs)) { newargs = pargs_alloc(i); bcopy(imgp->args->begin_argv, newargs->ar_args, i); } /* * For security and other reasons, signal handlers cannot * be shared after an exec. The new process gets a copy of the old * handlers. In execsigs(), the new process will have its signals * reset. */ if (sigacts_shared(p->p_sigacts)) { oldsigacts = p->p_sigacts; newsigacts = sigacts_alloc(); sigacts_copy(newsigacts, oldsigacts); } vn_lock(imgp->vp, LK_SHARED | LK_RETRY); PROC_LOCK(p); if (oldsigacts) p->p_sigacts = newsigacts; /* Stop profiling */ stopprofclock(p); /* reset caught signals */ execsigs(p); /* name this process - nameiexec(p, ndp) */ bzero(p->p_comm, sizeof(p->p_comm)); if (args->fname) bcopy(nd.ni_cnd.cn_nameptr, p->p_comm, min(nd.ni_cnd.cn_namelen, MAXCOMLEN)); else if (vn_commname(newtextvp, p->p_comm, sizeof(p->p_comm)) != 0) bcopy(fexecv_proc_title, p->p_comm, sizeof(fexecv_proc_title)); bcopy(p->p_comm, td->td_name, sizeof(td->td_name)); #ifdef KTR sched_clear_tdname(td); #endif /* * mark as execed, wakeup the process that vforked (if any) and tell * it that it now has its own resources back */ p->p_flag |= P_EXEC; if ((p->p_flag2 & P2_NOTRACE_EXEC) == 0) p->p_flag2 &= ~P2_NOTRACE; if (p->p_flag & P_PPWAIT) { p->p_flag &= ~(P_PPWAIT | P_PPTRACE); cv_broadcast(&p->p_pwait); } /* * Implement image setuid/setgid installation. */ if (imgp->credential_setid) { /* * Turn off syscall tracing for set-id programs, except for * root. Record any set-id flags first to make sure that * we do not regain any tracing during a possible block. */ setsugid(p); #ifdef KTRACE if (p->p_tracecred != NULL && priv_check_cred(p->p_tracecred, PRIV_DEBUG_DIFFCRED, 0)) ktrprocexec(p, &tracecred, &tracevp); #endif /* * Close any file descriptors 0..2 that reference procfs, * then make sure file descriptors 0..2 are in use. * * Both fdsetugidsafety() and fdcheckstd() may call functions * taking sleepable locks, so temporarily drop our locks. */ PROC_UNLOCK(p); VOP_UNLOCK(imgp->vp, 0); fdsetugidsafety(td); error = fdcheckstd(td); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); if (error != 0) goto exec_fail_dealloc; PROC_LOCK(p); #ifdef MAC if (will_transition) { mac_vnode_execve_transition(oldcred, imgp->newcred, imgp->vp, interpvplabel, imgp); } #endif } else { if (oldcred->cr_uid == oldcred->cr_ruid && oldcred->cr_gid == oldcred->cr_rgid) p->p_flag &= ~P_SUGID; } /* * Set the new credentials. */ if (imgp->newcred != NULL) { proc_set_cred(p, imgp->newcred); crfree(oldcred); oldcred = NULL; } /* * Store the vp for use in procfs. This vnode was referenced by namei * or fgetvp_exec. */ oldtextvp = p->p_textvp; p->p_textvp = newtextvp; #ifdef KDTRACE_HOOKS /* * Tell the DTrace fasttrap provider about the exec if it * has declared an interest. */ if (dtrace_fasttrap_exec) dtrace_fasttrap_exec(p); #endif /* * Notify others that we exec'd, and clear the P_INEXEC flag * as we're now a bona fide freshly-execed process. */ - KNOTE_LOCKED(&p->p_klist, NOTE_EXEC); + KNOTE_LOCKED(p->p_klist, NOTE_EXEC); p->p_flag &= ~P_INEXEC; /* clear "fork but no exec" flag, as we _are_ execing */ p->p_acflag &= ~AFORK; /* * Free any previous argument cache and replace it with * the new argument cache, if any. */ oldargs = p->p_args; p->p_args = newargs; newargs = NULL; #ifdef HWPMC_HOOKS /* * Check if system-wide sampling is in effect or if the * current process is using PMCs. If so, do exec() time * processing. This processing needs to happen AFTER the * P_INEXEC flag is cleared. * * The proc lock needs to be released before taking the PMC * SX. */ if (PMC_SYSTEM_SAMPLING_ACTIVE() || PMC_PROC_IS_USING_PMCS(p)) { PROC_UNLOCK(p); VOP_UNLOCK(imgp->vp, 0); pe.pm_credentialschanged = credential_changing; pe.pm_entryaddr = imgp->entry_addr; PMC_CALL_HOOK_X(td, PMC_FN_PROCESS_EXEC, (void *) &pe); vn_lock(imgp->vp, LK_SHARED | LK_RETRY); } else PROC_UNLOCK(p); #else /* !HWPMC_HOOKS */ PROC_UNLOCK(p); #endif /* Set values passed into the program in registers. */ if (p->p_sysent->sv_setregs) (*p->p_sysent->sv_setregs)(td, imgp, (u_long)(uintptr_t)stack_base); else exec_setregs(td, imgp, (u_long)(uintptr_t)stack_base); vfs_mark_atime(imgp->vp, td->td_ucred); SDT_PROBE1(proc, , , exec__success, args->fname); exec_fail_dealloc: if (imgp->firstpage != NULL) exec_unmap_first_page(imgp); if (imgp->vp != NULL) { if (args->fname) NDFREE(&nd, NDF_ONLY_PNBUF); if (imgp->opened) VOP_CLOSE(imgp->vp, FREAD, td->td_ucred, td); if (error != 0) vput(imgp->vp); else VOP_UNLOCK(imgp->vp, 0); } if (imgp->object != NULL) vm_object_deallocate(imgp->object); free(imgp->freepath, M_TEMP); if (error == 0) { PROC_LOCK(p); td->td_dbgflags |= TDB_EXEC; PROC_UNLOCK(p); /* * Stop the process here if its stop event mask has * the S_EXEC bit set. */ STOPEVENT(p, S_EXEC, 0); } else { exec_fail: /* we're done here, clear P_INEXEC */ PROC_LOCK(p); p->p_flag &= ~P_INEXEC; PROC_UNLOCK(p); SDT_PROBE1(proc, , , exec__failure, error); } if (imgp->newcred != NULL && oldcred != NULL) crfree(imgp->newcred); #ifdef MAC mac_execve_exit(imgp); mac_execve_interpreter_exit(interpvplabel); #endif exec_free_args(args); /* * Handle deferred decrement of ref counts. */ if (oldtextvp != NULL) vrele(oldtextvp); #ifdef KTRACE if (tracevp != NULL) vrele(tracevp); if (tracecred != NULL) crfree(tracecred); #endif pargs_drop(oldargs); pargs_drop(newargs); if (oldsigacts != NULL) sigacts_free(oldsigacts); if (euip != NULL) uifree(euip); if (error && imgp->vmspace_destroyed) { /* sorry, no more process anymore. exit gracefully */ exit1(td, 0, SIGABRT); /* NOT REACHED */ } #ifdef KTRACE if (error == 0) ktrprocctor(p); #endif return (error); } int exec_map_first_page(imgp) struct image_params *imgp; { int rv, i, after, initial_pagein; vm_page_t ma[VM_INITIAL_PAGEIN]; vm_object_t object; if (imgp->firstpage != NULL) exec_unmap_first_page(imgp); object = imgp->vp->v_object; if (object == NULL) return (EACCES); VM_OBJECT_WLOCK(object); #if VM_NRESERVLEVEL > 0 vm_object_color(object, 0); #endif ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL); if (ma[0]->valid != VM_PAGE_BITS_ALL) { if (!vm_pager_has_page(object, 0, NULL, &after)) { vm_page_lock(ma[0]); vm_page_free(ma[0]); vm_page_unlock(ma[0]); vm_page_xunbusy(ma[0]); VM_OBJECT_WUNLOCK(object); return (EIO); } initial_pagein = min(after, VM_INITIAL_PAGEIN); KASSERT(initial_pagein <= object->size, ("%s: initial_pagein %d object->size %ju", __func__, initial_pagein, (uintmax_t )object->size)); for (i = 1; i < initial_pagein; i++) { if ((ma[i] = vm_page_next(ma[i - 1])) != NULL) { if (ma[i]->valid) break; if (vm_page_tryxbusy(ma[i])) break; } else { ma[i] = vm_page_alloc(object, i, VM_ALLOC_NORMAL | VM_ALLOC_IFNOTCACHED); if (ma[i] == NULL) break; } } initial_pagein = i; rv = vm_pager_get_pages(object, ma, initial_pagein, NULL, NULL); if (rv != VM_PAGER_OK) { for (i = 0; i < initial_pagein; i++) { vm_page_lock(ma[i]); vm_page_free(ma[i]); vm_page_unlock(ma[i]); vm_page_xunbusy(ma[i]); } VM_OBJECT_WUNLOCK(object); return (EIO); } for (i = 1; i < initial_pagein; i++) vm_page_readahead_finish(ma[i]); } vm_page_xunbusy(ma[0]); vm_page_lock(ma[0]); vm_page_hold(ma[0]); vm_page_activate(ma[0]); vm_page_unlock(ma[0]); VM_OBJECT_WUNLOCK(object); imgp->firstpage = sf_buf_alloc(ma[0], 0); imgp->image_header = (char *)sf_buf_kva(imgp->firstpage); return (0); } void exec_unmap_first_page(imgp) struct image_params *imgp; { vm_page_t m; if (imgp->firstpage != NULL) { m = sf_buf_page(imgp->firstpage); sf_buf_free(imgp->firstpage); imgp->firstpage = NULL; vm_page_lock(m); vm_page_unhold(m); vm_page_unlock(m); } } /* * Destroy old address space, and allocate a new stack * The new stack is only SGROWSIZ large because it is grown * automatically in trap.c. */ int exec_new_vmspace(imgp, sv) struct image_params *imgp; struct sysentvec *sv; { int error; struct proc *p = imgp->proc; struct vmspace *vmspace = p->p_vmspace; vm_object_t obj; struct rlimit rlim_stack; vm_offset_t sv_minuser, stack_addr; vm_map_t map; u_long ssiz; imgp->vmspace_destroyed = 1; imgp->sysent = sv; /* May be called with Giant held */ EVENTHANDLER_INVOKE(process_exec, p, imgp); /* * Blow away entire process VM, if address space not shared, * otherwise, create a new VM space so that other threads are * not disrupted */ map = &vmspace->vm_map; if (map_at_zero) sv_minuser = sv->sv_minuser; else sv_minuser = MAX(sv->sv_minuser, PAGE_SIZE); if (vmspace->vm_refcnt == 1 && vm_map_min(map) == sv_minuser && vm_map_max(map) == sv->sv_maxuser) { shmexit(vmspace); pmap_remove_pages(vmspace_pmap(vmspace)); vm_map_remove(map, vm_map_min(map), vm_map_max(map)); } else { error = vmspace_exec(p, sv_minuser, sv->sv_maxuser); if (error) return (error); vmspace = p->p_vmspace; map = &vmspace->vm_map; } /* Map a shared page */ obj = sv->sv_shared_page_obj; if (obj != NULL) { vm_object_reference(obj); error = vm_map_fixed(map, obj, 0, sv->sv_shared_page_base, sv->sv_shared_page_len, VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_READ | VM_PROT_EXECUTE, MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE); if (error) { vm_object_deallocate(obj); return (error); } } /* Allocate a new stack */ if (imgp->stack_sz != 0) { ssiz = trunc_page(imgp->stack_sz); PROC_LOCK(p); lim_rlimit_proc(p, RLIMIT_STACK, &rlim_stack); PROC_UNLOCK(p); if (ssiz > rlim_stack.rlim_max) ssiz = rlim_stack.rlim_max; if (ssiz > rlim_stack.rlim_cur) { rlim_stack.rlim_cur = ssiz; kern_setrlimit(curthread, RLIMIT_STACK, &rlim_stack); } } else if (sv->sv_maxssiz != NULL) { ssiz = *sv->sv_maxssiz; } else { ssiz = maxssiz; } stack_addr = sv->sv_usrstack - ssiz; error = vm_map_stack(map, stack_addr, (vm_size_t)ssiz, obj != NULL && imgp->stack_prot != 0 ? imgp->stack_prot : sv->sv_stackprot, VM_PROT_ALL, MAP_STACK_GROWS_DOWN); if (error) return (error); /* * vm_ssize and vm_maxsaddr are somewhat antiquated concepts, but they * are still used to enforce the stack rlimit on the process stack. */ vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT; vmspace->vm_maxsaddr = (char *)stack_addr; return (0); } /* * Copy out argument and environment strings from the old process address * space into the temporary string buffer. */ int exec_copyin_args(struct image_args *args, char *fname, enum uio_seg segflg, char **argv, char **envv) { u_long argp, envp; int error; size_t length; bzero(args, sizeof(*args)); if (argv == NULL) return (EFAULT); /* * Allocate demand-paged memory for the file name, argument, and * environment strings. */ error = exec_alloc_args(args); if (error != 0) return (error); /* * Copy the file name. */ if (fname != NULL) { args->fname = args->buf; error = (segflg == UIO_SYSSPACE) ? copystr(fname, args->fname, PATH_MAX, &length) : copyinstr(fname, args->fname, PATH_MAX, &length); if (error != 0) goto err_exit; } else length = 0; args->begin_argv = args->buf + length; args->endp = args->begin_argv; args->stringspace = ARG_MAX; /* * extract arguments first */ for (;;) { error = fueword(argv++, &argp); if (error == -1) { error = EFAULT; goto err_exit; } if (argp == 0) break; error = copyinstr((void *)(uintptr_t)argp, args->endp, args->stringspace, &length); if (error != 0) { if (error == ENAMETOOLONG) error = E2BIG; goto err_exit; } args->stringspace -= length; args->endp += length; args->argc++; } args->begin_envv = args->endp; /* * extract environment strings */ if (envv) { for (;;) { error = fueword(envv++, &envp); if (error == -1) { error = EFAULT; goto err_exit; } if (envp == 0) break; error = copyinstr((void *)(uintptr_t)envp, args->endp, args->stringspace, &length); if (error != 0) { if (error == ENAMETOOLONG) error = E2BIG; goto err_exit; } args->stringspace -= length; args->endp += length; args->envc++; } } return (0); err_exit: exec_free_args(args); return (error); } int exec_copyin_data_fds(struct thread *td, struct image_args *args, const void *data, size_t datalen, const int *fds, size_t fdslen) { struct filedesc *ofdp; const char *p; int *kfds; int error; memset(args, '\0', sizeof(*args)); ofdp = td->td_proc->p_fd; if (datalen >= ARG_MAX || fdslen > ofdp->fd_lastfile + 1) return (E2BIG); error = exec_alloc_args(args); if (error != 0) return (error); args->begin_argv = args->buf; args->stringspace = ARG_MAX; if (datalen > 0) { /* * Argument buffer has been provided. Copy it into the * kernel as a single string and add a terminating null * byte. */ error = copyin(data, args->begin_argv, datalen); if (error != 0) goto err_exit; args->begin_argv[datalen] = '\0'; args->endp = args->begin_argv + datalen + 1; args->stringspace -= datalen + 1; /* * Traditional argument counting. Count the number of * null bytes. */ for (p = args->begin_argv; p < args->endp; ++p) if (*p == '\0') ++args->argc; } else { /* No argument buffer provided. */ args->endp = args->begin_argv; } /* There are no environment variables. */ args->begin_envv = args->endp; /* Create new file descriptor table. */ kfds = malloc(fdslen * sizeof(int), M_TEMP, M_WAITOK); error = copyin(fds, kfds, fdslen * sizeof(int)); if (error != 0) { free(kfds, M_TEMP); goto err_exit; } error = fdcopy_remapped(ofdp, kfds, fdslen, &args->fdp); free(kfds, M_TEMP); if (error != 0) goto err_exit; return (0); err_exit: exec_free_args(args); return (error); } /* * Allocate temporary demand-paged, zero-filled memory for the file name, * argument, and environment strings. Returns zero if the allocation succeeds * and ENOMEM otherwise. */ int exec_alloc_args(struct image_args *args) { args->buf = (char *)kmap_alloc_wait(exec_map, PATH_MAX + ARG_MAX); return (args->buf != NULL ? 0 : ENOMEM); } void exec_free_args(struct image_args *args) { if (args->buf != NULL) { kmap_free_wakeup(exec_map, (vm_offset_t)args->buf, PATH_MAX + ARG_MAX); args->buf = NULL; } if (args->fname_buf != NULL) { free(args->fname_buf, M_TEMP); args->fname_buf = NULL; } if (args->fdp != NULL) fdescfree_remapped(args->fdp); } /* * Copy strings out to the new process address space, constructing new arg * and env vector tables. Return a pointer to the base so that it can be used * as the initial stack pointer. */ register_t * exec_copyout_strings(imgp) struct image_params *imgp; { int argc, envc; char **vectp; char *stringp; uintptr_t destp; register_t *stack_base; struct ps_strings *arginfo; struct proc *p; size_t execpath_len; int szsigcode, szps; char canary[sizeof(long) * 8]; szps = sizeof(pagesizes[0]) * MAXPAGESIZES; /* * Calculate string base and vector table pointers. * Also deal with signal trampoline code for this exec type. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; p = imgp->proc; szsigcode = 0; arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; if (p->p_sysent->sv_sigcode_base == 0) { if (p->p_sysent->sv_szsigcode != NULL) szsigcode = *(p->p_sysent->sv_szsigcode); } destp = (uintptr_t)arginfo; /* * install sigcode */ if (szsigcode != 0) { destp -= szsigcode; destp = rounddown2(destp, sizeof(void *)); copyout(p->p_sysent->sv_sigcode, (void *)destp, szsigcode); } /* * Copy the image path for the rtld. */ if (execpath_len != 0) { destp -= execpath_len; imgp->execpathp = destp; copyout(imgp->execpath, (void *)destp, execpath_len); } /* * Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); destp -= sizeof(canary); imgp->canary = destp; copyout(canary, (void *)destp, sizeof(canary)); imgp->canarylen = sizeof(canary); /* * Prepare the pagesizes array. */ destp -= szps; destp = rounddown2(destp, sizeof(void *)); imgp->pagesizes = destp; copyout(pagesizes, (void *)destp, szps); imgp->pagesizeslen = szps; destp -= ARG_MAX - imgp->args->stringspace; destp = rounddown2(destp, sizeof(void *)); /* * If we have a valid auxargs ptr, prepare some room * on the stack. */ if (imgp->auxargs) { /* * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for * lower compatibility. */ imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : (AT_COUNT * 2); /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets,and imgp->auxarg_size is room * for argument of Runtime loader. */ vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *)); } else { /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets */ vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(char *)); } /* * vectp also becomes our initial stack base */ stack_base = (register_t *)vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* * Copy out strings - arguments and environment. */ copyout(stringp, (void *)destp, ARG_MAX - imgp->args->stringspace); /* * Fill in "ps_strings" struct for ps, w, etc. */ suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp); suword32(&arginfo->ps_nargvstr, argc); /* * Fill in argument portion of vector table. */ for (; argc > 0; --argc) { suword(vectp++, (long)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* a null vector table pointer separates the argp's from the envp's */ suword(vectp++, 0); suword(&arginfo->ps_envstr, (long)(intptr_t)vectp); suword32(&arginfo->ps_nenvstr, envc); /* * Fill in environment portion of vector table. */ for (; envc > 0; --envc) { suword(vectp++, (long)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* end of vector table is a null pointer */ suword(vectp, 0); return (stack_base); } /* * Check permissions of file to execute. * Called with imgp->vp locked. * Return 0 for success or error code on failure. */ int exec_check_permissions(imgp) struct image_params *imgp; { struct vnode *vp = imgp->vp; struct vattr *attr = imgp->attr; struct thread *td; int error, writecount; td = curthread; /* Get file attributes */ error = VOP_GETATTR(vp, attr, td->td_ucred); if (error) return (error); #ifdef MAC error = mac_vnode_check_exec(td->td_ucred, imgp->vp, imgp); if (error) return (error); #endif /* * 1) Check if file execution is disabled for the filesystem that * this file resides on. * 2) Ensure that at least one execute bit is on. Otherwise, a * privileged user will always succeed, and we don't want this * to happen unless the file really is executable. * 3) Ensure that the file is a regular file. */ if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || (attr->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0 || (attr->va_type != VREG)) return (EACCES); /* * Zero length files can't be exec'd */ if (attr->va_size == 0) return (ENOEXEC); /* * Check for execute permission to file based on current credentials. */ error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); if (error) return (error); /* * Check number of open-for-writes on the file and deny execution * if there are any. */ error = VOP_GET_WRITECOUNT(vp, &writecount); if (error != 0) return (error); if (writecount != 0) return (ETXTBSY); /* * Call filesystem specific open routine (which does nothing in the * general case). */ error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); if (error == 0) imgp->opened = 1; return (error); } /* * Exec handler registration */ int exec_register(execsw_arg) const struct execsw *execsw_arg; { const struct execsw **es, **xs, **newexecsw; int count = 2; /* New slot and trailing NULL */ if (execsw) for (es = execsw; *es; es++) count++; newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); xs = newexecsw; if (execsw) for (es = execsw; *es; es++) *xs++ = *es; *xs++ = execsw_arg; *xs = NULL; if (execsw) free(execsw, M_TEMP); execsw = newexecsw; return (0); } int exec_unregister(execsw_arg) const struct execsw *execsw_arg; { const struct execsw **es, **xs, **newexecsw; int count = 1; if (execsw == NULL) panic("unregister with no handlers left?\n"); for (es = execsw; *es; es++) { if (*es == execsw_arg) break; } if (*es == NULL) return (ENOENT); for (es = execsw; *es; es++) if (*es != execsw_arg) count++; newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK); xs = newexecsw; for (es = execsw; *es; es++) if (*es != execsw_arg) *xs++ = *es; *xs = NULL; if (execsw) free(execsw, M_TEMP); execsw = newexecsw; return (0); } Index: projects/vnet/sys/kern/kern_exit.c =================================================================== --- projects/vnet/sys/kern/kern_exit.c (revision 302276) +++ projects/vnet/sys/kern/kern_exit.c (revision 302277) @@ -1,1329 +1,1321 @@ /*- * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_exit.c 8.7 (Berkeley) 2/12/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for acct_process() function prototype */ #include #include #include #include #include #ifdef KTRACE #include #endif #include #include #include #include #include #include #include #include #include #include #ifdef KDTRACE_HOOKS #include dtrace_execexit_func_t dtrace_fasttrap_exit; #endif SDT_PROVIDER_DECLARE(proc); SDT_PROBE_DEFINE1(proc, , , exit, "int"); /* Hook for NFS teardown procedure. */ void (*nlminfo_release_p)(struct proc *p); struct proc * proc_realparent(struct proc *child) { struct proc *p, *parent; sx_assert(&proctree_lock, SX_LOCKED); if ((child->p_treeflag & P_TREE_ORPHANED) == 0) { if (child->p_oppid == 0 || child->p_pptr->p_pid == child->p_oppid) parent = child->p_pptr; else parent = initproc; return (parent); } for (p = child; (p->p_treeflag & P_TREE_FIRST_ORPHAN) == 0;) { /* Cannot use LIST_PREV(), since the list head is not known. */ p = __containerof(p->p_orphan.le_prev, struct proc, p_orphan.le_next); KASSERT((p->p_treeflag & P_TREE_ORPHANED) != 0, ("missing P_ORPHAN %p", p)); } parent = __containerof(p->p_orphan.le_prev, struct proc, p_orphans.lh_first); return (parent); } void reaper_abandon_children(struct proc *p, bool exiting) { struct proc *p1, *p2, *ptmp; sx_assert(&proctree_lock, SX_LOCKED); KASSERT(p != initproc, ("reaper_abandon_children for initproc")); if ((p->p_treeflag & P_TREE_REAPER) == 0) return; p1 = p->p_reaper; LIST_FOREACH_SAFE(p2, &p->p_reaplist, p_reapsibling, ptmp) { LIST_REMOVE(p2, p_reapsibling); p2->p_reaper = p1; p2->p_reapsubtree = p->p_reapsubtree; LIST_INSERT_HEAD(&p1->p_reaplist, p2, p_reapsibling); if (exiting && p2->p_pptr == p) { PROC_LOCK(p2); proc_reparent(p2, p1); PROC_UNLOCK(p2); } } KASSERT(LIST_EMPTY(&p->p_reaplist), ("p_reaplist not empty")); p->p_treeflag &= ~P_TREE_REAPER; } static void clear_orphan(struct proc *p) { struct proc *p1; sx_assert(&proctree_lock, SA_XLOCKED); if ((p->p_treeflag & P_TREE_ORPHANED) == 0) return; if ((p->p_treeflag & P_TREE_FIRST_ORPHAN) != 0) { p1 = LIST_NEXT(p, p_orphan); if (p1 != NULL) p1->p_treeflag |= P_TREE_FIRST_ORPHAN; p->p_treeflag &= ~P_TREE_FIRST_ORPHAN; } LIST_REMOVE(p, p_orphan); p->p_treeflag &= ~P_TREE_ORPHANED; } /* * exit -- death of process. */ void sys_sys_exit(struct thread *td, struct sys_exit_args *uap) { exit1(td, uap->rval, 0); /* NOTREACHED */ } /* * Exit: deallocate address space and other resources, change proc state to * zombie, and unlink proc from allproc and parent's lists. Save exit status * and rusage for wait(). Check for child processes and orphan them. */ void exit1(struct thread *td, int rval, int signo) { struct proc *p, *nq, *q, *t; struct thread *tdt; mtx_assert(&Giant, MA_NOTOWNED); KASSERT(rval == 0 || signo == 0, ("exit1 rv %d sig %d", rval, signo)); p = td->td_proc; /* * XXX in case we're rebooting we just let init die in order to * work around an unsolved stack overflow seen very late during * shutdown on sparc64 when the gmirror worker process exists. */ if (p == initproc && rebooting == 0) { printf("init died (signal %d, exit %d)\n", signo, rval); panic("Going nowhere without my init!"); } /* * Deref SU mp, since the thread does not return to userspace. */ if (softdep_ast_cleanup != NULL) softdep_ast_cleanup(); /* * MUST abort all other threads before proceeding past here. */ PROC_LOCK(p); /* * First check if some other thread or external request got * here before us. If so, act appropriately: exit or suspend. * We must ensure that stop requests are handled before we set * P_WEXIT. */ thread_suspend_check(0); while (p->p_flag & P_HADTHREADS) { /* * Kill off the other threads. This requires * some co-operation from other parts of the kernel * so it may not be instantaneous. With this state set * any thread entering the kernel from userspace will * thread_exit() in trap(). Any thread attempting to * sleep will return immediately with EINTR or EWOULDBLOCK * which will hopefully force them to back out to userland * freeing resources as they go. Any thread attempting * to return to userland will thread_exit() from userret(). * thread_exit() will unsuspend us when the last of the * other threads exits. * If there is already a thread singler after resumption, * calling thread_single will fail; in that case, we just * re-check all suspension request, the thread should * either be suspended there or exit. */ if (!thread_single(p, SINGLE_EXIT)) /* * All other activity in this process is now * stopped. Threading support has been turned * off. */ break; /* * Recheck for new stop or suspend requests which * might appear while process lock was dropped in * thread_single(). */ thread_suspend_check(0); } KASSERT(p->p_numthreads == 1, ("exit1: proc %p exiting with %d threads", p, p->p_numthreads)); racct_sub(p, RACCT_NTHR, 1); /* Let event handler change exit status */ p->p_xexit = rval; p->p_xsig = signo; /* * Wakeup anyone in procfs' PIOCWAIT. They should have a hold * on our vmspace, so we should block below until they have * released their reference to us. Note that if they have * requested S_EXIT stops we will block here until they ack * via PIOCCONT. */ _STOPEVENT(p, S_EXIT, 0); /* * Ignore any pending request to stop due to a stop signal. * Once P_WEXIT is set, future requests will be ignored as * well. */ p->p_flag &= ~P_STOPPED_SIG; KASSERT(!P_SHOULDSTOP(p), ("exiting process is stopped")); /* * Note that we are exiting and do another wakeup of anyone in * PIOCWAIT in case they aren't listening for S_EXIT stops or * decided to wait again after we told them we are exiting. */ p->p_flag |= P_WEXIT; wakeup(&p->p_stype); /* * Wait for any processes that have a hold on our vmspace to * release their reference. */ while (p->p_lock > 0) msleep(&p->p_lock, &p->p_mtx, PWAIT, "exithold", 0); PROC_UNLOCK(p); /* Drain the limit callout while we don't have the proc locked */ callout_drain(&p->p_limco); #ifdef AUDIT /* * The Sun BSM exit token contains two components: an exit status as * passed to exit(), and a return value to indicate what sort of exit * it was. The exit status is WEXITSTATUS(rv), but it's not clear * what the return value is. */ AUDIT_ARG_EXIT(rval, 0); AUDIT_SYSCALL_EXIT(0, td); #endif /* Are we a task leader with peers? */ if (p->p_peers != NULL && p == p->p_leader) { mtx_lock(&ppeers_lock); q = p->p_peers; while (q != NULL) { PROC_LOCK(q); kern_psignal(q, SIGKILL); PROC_UNLOCK(q); q = q->p_peers; } while (p->p_peers != NULL) msleep(p, &ppeers_lock, PWAIT, "exit1", 0); mtx_unlock(&ppeers_lock); } /* * Check if any loadable modules need anything done at process exit. * E.g. SYSV IPC stuff. * Event handler could change exit status. * XXX what if one of these generates an error? */ EVENTHANDLER_INVOKE(process_exit, p); /* * If parent is waiting for us to exit or exec, * P_PPWAIT is set; we will wakeup the parent below. */ PROC_LOCK(p); stopprofclock(p); p->p_flag &= ~(P_TRACED | P_PPWAIT | P_PPTRACE); /* * Stop the real interval timer. If the handler is currently * executing, prevent it from rearming itself and let it finish. */ if (timevalisset(&p->p_realtimer.it_value) && callout_stop(&p->p_itcallout) == 0) { timevalclear(&p->p_realtimer.it_interval); msleep(&p->p_itcallout, &p->p_mtx, PWAIT, "ritwait", 0); KASSERT(!timevalisset(&p->p_realtimer.it_value), ("realtime timer is still armed")); } PROC_UNLOCK(p); umtx_thread_exit(td); /* * Reset any sigio structures pointing to us as a result of * F_SETOWN with our pid. */ funsetownlst(&p->p_sigiolst); /* * If this process has an nlminfo data area (for lockd), release it */ if (nlminfo_release_p != NULL && p->p_nlminfo != NULL) (*nlminfo_release_p)(p); /* * Close open files and release open-file table. * This may block! */ fdescfree(td); /* * If this thread tickled GEOM, we need to wait for the giggling to * stop before we return to userland */ if (td->td_pflags & TDP_GEOM) g_waitidle(); /* * Remove ourself from our leader's peer list and wake our leader. */ if (p->p_leader->p_peers != NULL) { mtx_lock(&ppeers_lock); if (p->p_leader->p_peers != NULL) { q = p->p_leader; while (q->p_peers != p) q = q->p_peers; q->p_peers = p->p_peers; wakeup(p->p_leader); } mtx_unlock(&ppeers_lock); } vmspace_exit(td); killjobc(); (void)acct_process(td); #ifdef KTRACE ktrprocexit(td); #endif /* * Release reference to text vnode */ if (p->p_textvp != NULL) { vrele(p->p_textvp); p->p_textvp = NULL; } /* * Release our limits structure. */ lim_free(p->p_limit); p->p_limit = NULL; tidhash_remove(td); /* * Remove proc from allproc queue and pidhash chain. * Place onto zombproc. Unlink from parent's child list. */ sx_xlock(&allproc_lock); LIST_REMOVE(p, p_list); LIST_INSERT_HEAD(&zombproc, p, p_list); LIST_REMOVE(p, p_hash); sx_xunlock(&allproc_lock); /* * Call machine-dependent code to release any * machine-dependent resources other than the address space. * The address space is released by "vmspace_exitfree(p)" in * vm_waitproc(). */ cpu_exit(td); WITNESS_WARN(WARN_PANIC, NULL, "process (pid %d) exiting", p->p_pid); /* * Reparent all children processes: * - traced ones to the original parent (or init if we are that parent) * - the rest to init */ sx_xlock(&proctree_lock); q = LIST_FIRST(&p->p_children); if (q != NULL) /* only need this if any child is S_ZOMB */ wakeup(q->p_reaper); for (; q != NULL; q = nq) { nq = LIST_NEXT(q, p_sibling); PROC_LOCK(q); q->p_sigparent = SIGCHLD; if (!(q->p_flag & P_TRACED)) { proc_reparent(q, q->p_reaper); } else { /* * Traced processes are killed since their existence * means someone is screwing up. */ t = proc_realparent(q); if (t == p) { proc_reparent(q, q->p_reaper); } else { PROC_LOCK(t); proc_reparent(q, t); PROC_UNLOCK(t); } /* * Since q was found on our children list, the * proc_reparent() call moved q to the orphan * list due to present P_TRACED flag. Clear * orphan link for q now while q is locked. */ clear_orphan(q); q->p_flag &= ~(P_TRACED | P_STOPPED_TRACE); FOREACH_THREAD_IN_PROC(q, tdt) tdt->td_dbgflags &= ~TDB_SUSPEND; kern_psignal(q, SIGKILL); } PROC_UNLOCK(q); } /* * Also get rid of our orphans. */ while ((q = LIST_FIRST(&p->p_orphans)) != NULL) { PROC_LOCK(q); CTR2(KTR_PTRACE, "exit: pid %d, clearing orphan %d", p->p_pid, q->p_pid); clear_orphan(q); PROC_UNLOCK(q); } /* Save exit status. */ PROC_LOCK(p); p->p_xthread = td; /* Tell the prison that we are gone. */ prison_proc_free(p->p_ucred->cr_prison); #ifdef KDTRACE_HOOKS /* * Tell the DTrace fasttrap provider about the exit if it * has declared an interest. */ if (dtrace_fasttrap_exit) dtrace_fasttrap_exit(p); #endif /* * Notify interested parties of our demise. */ - KNOTE_LOCKED(&p->p_klist, NOTE_EXIT); + KNOTE_LOCKED(p->p_klist, NOTE_EXIT); #ifdef KDTRACE_HOOKS int reason = CLD_EXITED; if (WCOREDUMP(signo)) reason = CLD_DUMPED; else if (WIFSIGNALED(signo)) reason = CLD_KILLED; SDT_PROBE1(proc, , , exit, reason); #endif /* - * Just delete all entries in the p_klist. At this point we won't - * report any more events, and there are nasty race conditions that - * can beat us if we don't. - */ - knlist_clear(&p->p_klist, 1); - - /* * If this is a process with a descriptor, we may not need to deliver * a signal to the parent. proctree_lock is held over * procdesc_exit() to serialize concurrent calls to close() and * exit(). */ if (p->p_procdesc == NULL || procdesc_exit(p)) { /* * Notify parent that we're gone. If parent has the * PS_NOCLDWAIT flag set, or if the handler is set to SIG_IGN, * notify process 1 instead (and hope it will handle this * situation). */ PROC_LOCK(p->p_pptr); mtx_lock(&p->p_pptr->p_sigacts->ps_mtx); if (p->p_pptr->p_sigacts->ps_flag & (PS_NOCLDWAIT | PS_CLDSIGIGN)) { struct proc *pp; mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); pp = p->p_pptr; PROC_UNLOCK(pp); proc_reparent(p, p->p_reaper); p->p_sigparent = SIGCHLD; PROC_LOCK(p->p_pptr); /* * Notify parent, so in case he was wait(2)ing or * executing waitpid(2) with our pid, he will * continue. */ wakeup(pp); } else mtx_unlock(&p->p_pptr->p_sigacts->ps_mtx); if (p->p_pptr == p->p_reaper || p->p_pptr == initproc) childproc_exited(p); else if (p->p_sigparent != 0) { if (p->p_sigparent == SIGCHLD) childproc_exited(p); else /* LINUX thread */ kern_psignal(p->p_pptr, p->p_sigparent); } } else PROC_LOCK(p->p_pptr); sx_xunlock(&proctree_lock); /* * The state PRS_ZOMBIE prevents other proesses from sending * signal to the process, to avoid memory leak, we free memory * for signal queue at the time when the state is set. */ sigqueue_flush(&p->p_sigqueue); sigqueue_flush(&td->td_sigqueue); /* * We have to wait until after acquiring all locks before * changing p_state. We need to avoid all possible context * switches (including ones from blocking on a mutex) while * marked as a zombie. We also have to set the zombie state * before we release the parent process' proc lock to avoid * a lost wakeup. So, we first call wakeup, then we grab the * sched lock, update the state, and release the parent process' * proc lock. */ wakeup(p->p_pptr); cv_broadcast(&p->p_pwait); sched_exit(p->p_pptr, td); PROC_SLOCK(p); p->p_state = PRS_ZOMBIE; PROC_UNLOCK(p->p_pptr); /* - * Hopefully no one will try to deliver a signal to the process this - * late in the game. - */ - knlist_destroy(&p->p_klist); - - /* * Save our children's rusage information in our exit rusage. */ PROC_STATLOCK(p); ruadd(&p->p_ru, &p->p_rux, &p->p_stats->p_cru, &p->p_crux); PROC_STATUNLOCK(p); /* * Make sure the scheduler takes this thread out of its tables etc. * This will also release this thread's reference to the ucred. * Other thread parts to release include pcb bits and such. */ thread_exit(); } #ifndef _SYS_SYSPROTO_H_ struct abort2_args { char *why; int nargs; void **args; }; #endif int sys_abort2(struct thread *td, struct abort2_args *uap) { struct proc *p = td->td_proc; struct sbuf *sb; void *uargs[16]; int error, i, sig; /* * Do it right now so we can log either proper call of abort2(), or * note, that invalid argument was passed. 512 is big enough to * handle 16 arguments' descriptions with additional comments. */ sb = sbuf_new(NULL, NULL, 512, SBUF_FIXEDLEN); sbuf_clear(sb); sbuf_printf(sb, "%s(pid %d uid %d) aborted: ", p->p_comm, p->p_pid, td->td_ucred->cr_uid); /* * Since we can't return from abort2(), send SIGKILL in cases, where * abort2() was called improperly */ sig = SIGKILL; /* Prevent from DoSes from user-space. */ if (uap->nargs < 0 || uap->nargs > 16) goto out; if (uap->nargs > 0) { if (uap->args == NULL) goto out; error = copyin(uap->args, uargs, uap->nargs * sizeof(void *)); if (error != 0) goto out; } /* * Limit size of 'reason' string to 128. Will fit even when * maximal number of arguments was chosen to be logged. */ if (uap->why != NULL) { error = sbuf_copyin(sb, uap->why, 128); if (error < 0) goto out; } else { sbuf_printf(sb, "(null)"); } if (uap->nargs > 0) { sbuf_printf(sb, "("); for (i = 0;i < uap->nargs; i++) sbuf_printf(sb, "%s%p", i == 0 ? "" : ", ", uargs[i]); sbuf_printf(sb, ")"); } /* * Final stage: arguments were proper, string has been * successfully copied from userspace, and copying pointers * from user-space succeed. */ sig = SIGABRT; out: if (sig == SIGKILL) { sbuf_trim(sb); sbuf_printf(sb, " (Reason text inaccessible)"); } sbuf_cat(sb, "\n"); sbuf_finish(sb); log(LOG_INFO, "%s", sbuf_data(sb)); sbuf_delete(sb); exit1(td, 0, sig); return (0); } #ifdef COMPAT_43 /* * The dirty work is handled by kern_wait(). */ int owait(struct thread *td, struct owait_args *uap __unused) { int error, status; error = kern_wait(td, WAIT_ANY, &status, 0, NULL); if (error == 0) td->td_retval[1] = status; return (error); } #endif /* COMPAT_43 */ /* * The dirty work is handled by kern_wait(). */ int sys_wait4(struct thread *td, struct wait4_args *uap) { struct rusage ru, *rup; int error, status; if (uap->rusage != NULL) rup = &ru; else rup = NULL; error = kern_wait(td, uap->pid, &status, uap->options, rup); if (uap->status != NULL && error == 0) error = copyout(&status, uap->status, sizeof(status)); if (uap->rusage != NULL && error == 0) error = copyout(&ru, uap->rusage, sizeof(struct rusage)); return (error); } int sys_wait6(struct thread *td, struct wait6_args *uap) { struct __wrusage wru, *wrup; siginfo_t si, *sip; idtype_t idtype; id_t id; int error, status; idtype = uap->idtype; id = uap->id; if (uap->wrusage != NULL) wrup = &wru; else wrup = NULL; if (uap->info != NULL) { sip = &si; bzero(sip, sizeof(*sip)); } else sip = NULL; /* * We expect all callers of wait6() to know about WEXITED and * WTRAPPED. */ error = kern_wait6(td, idtype, id, &status, uap->options, wrup, sip); if (uap->status != NULL && error == 0) error = copyout(&status, uap->status, sizeof(status)); if (uap->wrusage != NULL && error == 0) error = copyout(&wru, uap->wrusage, sizeof(wru)); if (uap->info != NULL && error == 0) error = copyout(&si, uap->info, sizeof(si)); return (error); } /* * Reap the remains of a zombie process and optionally return status and * rusage. Asserts and will release both the proctree_lock and the process * lock as part of its work. */ void proc_reap(struct thread *td, struct proc *p, int *status, int options) { struct proc *q, *t; sx_assert(&proctree_lock, SA_XLOCKED); PROC_LOCK_ASSERT(p, MA_OWNED); PROC_SLOCK_ASSERT(p, MA_OWNED); KASSERT(p->p_state == PRS_ZOMBIE, ("proc_reap: !PRS_ZOMBIE")); q = td->td_proc; PROC_SUNLOCK(p); if (status) *status = KW_EXITCODE(p->p_xexit, p->p_xsig); if (options & WNOWAIT) { /* * Only poll, returning the status. Caller does not wish to * release the proc struct just yet. */ PROC_UNLOCK(p); sx_xunlock(&proctree_lock); return; } PROC_LOCK(q); sigqueue_take(p->p_ksi); PROC_UNLOCK(q); /* * If we got the child via a ptrace 'attach', we need to give it back * to the old parent. */ if (p->p_oppid != 0 && p->p_oppid != p->p_pptr->p_pid) { PROC_UNLOCK(p); t = proc_realparent(p); PROC_LOCK(t); PROC_LOCK(p); CTR2(KTR_PTRACE, "wait: traced child %d moved back to parent %d", p->p_pid, t->p_pid); proc_reparent(p, t); p->p_oppid = 0; PROC_UNLOCK(p); pksignal(t, SIGCHLD, p->p_ksi); wakeup(t); cv_broadcast(&p->p_pwait); PROC_UNLOCK(t); sx_xunlock(&proctree_lock); return; } p->p_oppid = 0; PROC_UNLOCK(p); /* * Remove other references to this process to ensure we have an * exclusive reference. */ sx_xlock(&allproc_lock); LIST_REMOVE(p, p_list); /* off zombproc */ sx_xunlock(&allproc_lock); LIST_REMOVE(p, p_sibling); reaper_abandon_children(p, true); LIST_REMOVE(p, p_reapsibling); PROC_LOCK(p); clear_orphan(p); PROC_UNLOCK(p); leavepgrp(p); if (p->p_procdesc != NULL) procdesc_reap(p); sx_xunlock(&proctree_lock); + + PROC_LOCK(p); + knlist_detach(p->p_klist); + p->p_klist = NULL; + PROC_UNLOCK(p); /* * Removal from allproc list and process group list paired with * PROC_LOCK which was executed during that time should guarantee * nothing can reach this process anymore. As such further locking * is unnecessary. */ p->p_xexit = p->p_xsig = 0; /* XXX: why? */ PROC_LOCK(q); ruadd(&q->p_stats->p_cru, &q->p_crux, &p->p_ru, &p->p_rux); PROC_UNLOCK(q); /* * Decrement the count of procs running with this uid. */ (void)chgproccnt(p->p_ucred->cr_ruidinfo, -1, 0); /* * Destroy resource accounting information associated with the process. */ #ifdef RACCT if (racct_enable) { PROC_LOCK(p); racct_sub(p, RACCT_NPROC, 1); PROC_UNLOCK(p); } #endif racct_proc_exit(p); /* * Free credentials, arguments, and sigacts. */ crfree(p->p_ucred); proc_set_cred(p, NULL); pargs_drop(p->p_args); p->p_args = NULL; sigacts_free(p->p_sigacts); p->p_sigacts = NULL; /* * Do any thread-system specific cleanups. */ thread_wait(p); /* * Give vm and machine-dependent layer a chance to free anything that * cpu_exit couldn't release while still running in process context. */ vm_waitproc(p); #ifdef MAC mac_proc_destroy(p); #endif /* * Free any domain policy that's still hiding around. */ vm_domain_policy_cleanup(&p->p_vm_dom_policy); KASSERT(FIRST_THREAD_IN_PROC(p), ("proc_reap: no residual thread!")); uma_zfree(proc_zone, p); atomic_add_int(&nprocs, -1); } static int proc_to_reap(struct thread *td, struct proc *p, idtype_t idtype, id_t id, int *status, int options, struct __wrusage *wrusage, siginfo_t *siginfo, int check_only) { struct rusage *rup; sx_assert(&proctree_lock, SA_XLOCKED); PROC_LOCK(p); switch (idtype) { case P_ALL: if (p->p_procdesc != NULL) { PROC_UNLOCK(p); return (0); } break; case P_PID: if (p->p_pid != (pid_t)id) { PROC_UNLOCK(p); return (0); } break; case P_PGID: if (p->p_pgid != (pid_t)id) { PROC_UNLOCK(p); return (0); } break; case P_SID: if (p->p_session->s_sid != (pid_t)id) { PROC_UNLOCK(p); return (0); } break; case P_UID: if (p->p_ucred->cr_uid != (uid_t)id) { PROC_UNLOCK(p); return (0); } break; case P_GID: if (p->p_ucred->cr_gid != (gid_t)id) { PROC_UNLOCK(p); return (0); } break; case P_JAILID: if (p->p_ucred->cr_prison->pr_id != (int)id) { PROC_UNLOCK(p); return (0); } break; /* * It seems that the thread structures get zeroed out * at process exit. This makes it impossible to * support P_SETID, P_CID or P_CPUID. */ default: PROC_UNLOCK(p); return (0); } if (p_canwait(td, p)) { PROC_UNLOCK(p); return (0); } if (((options & WEXITED) == 0) && (p->p_state == PRS_ZOMBIE)) { PROC_UNLOCK(p); return (0); } /* * This special case handles a kthread spawned by linux_clone * (see linux_misc.c). The linux_wait4 and linux_waitpid * functions need to be able to distinguish between waiting * on a process and waiting on a thread. It is a thread if * p_sigparent is not SIGCHLD, and the WLINUXCLONE option * signifies we want to wait for threads and not processes. */ if ((p->p_sigparent != SIGCHLD) ^ ((options & WLINUXCLONE) != 0)) { PROC_UNLOCK(p); return (0); } if (siginfo != NULL) { bzero(siginfo, sizeof(*siginfo)); siginfo->si_errno = 0; /* * SUSv4 requires that the si_signo value is always * SIGCHLD. Obey it despite the rfork(2) interface * allows to request other signal for child exit * notification. */ siginfo->si_signo = SIGCHLD; /* * This is still a rough estimate. We will fix the * cases TRAPPED, STOPPED, and CONTINUED later. */ if (WCOREDUMP(p->p_xsig)) { siginfo->si_code = CLD_DUMPED; siginfo->si_status = WTERMSIG(p->p_xsig); } else if (WIFSIGNALED(p->p_xsig)) { siginfo->si_code = CLD_KILLED; siginfo->si_status = WTERMSIG(p->p_xsig); } else { siginfo->si_code = CLD_EXITED; siginfo->si_status = p->p_xexit; } siginfo->si_pid = p->p_pid; siginfo->si_uid = p->p_ucred->cr_uid; /* * The si_addr field would be useful additional * detail, but apparently the PC value may be lost * when we reach this point. bzero() above sets * siginfo->si_addr to NULL. */ } /* * There should be no reason to limit resources usage info to * exited processes only. A snapshot about any resources used * by a stopped process may be exactly what is needed. */ if (wrusage != NULL) { rup = &wrusage->wru_self; *rup = p->p_ru; PROC_STATLOCK(p); calcru(p, &rup->ru_utime, &rup->ru_stime); PROC_STATUNLOCK(p); rup = &wrusage->wru_children; *rup = p->p_stats->p_cru; calccru(p, &rup->ru_utime, &rup->ru_stime); } if (p->p_state == PRS_ZOMBIE && !check_only) { PROC_SLOCK(p); proc_reap(td, p, status, options); return (-1); } PROC_UNLOCK(p); return (1); } int kern_wait(struct thread *td, pid_t pid, int *status, int options, struct rusage *rusage) { struct __wrusage wru, *wrup; idtype_t idtype; id_t id; int ret; /* * Translate the special pid values into the (idtype, pid) * pair for kern_wait6. The WAIT_MYPGRP case is handled by * kern_wait6() on its own. */ if (pid == WAIT_ANY) { idtype = P_ALL; id = 0; } else if (pid < 0) { idtype = P_PGID; id = (id_t)-pid; } else { idtype = P_PID; id = (id_t)pid; } if (rusage != NULL) wrup = &wru; else wrup = NULL; /* * For backward compatibility we implicitly add flags WEXITED * and WTRAPPED here. */ options |= WEXITED | WTRAPPED; ret = kern_wait6(td, idtype, id, status, options, wrup, NULL); if (rusage != NULL) *rusage = wru.wru_self; return (ret); } int kern_wait6(struct thread *td, idtype_t idtype, id_t id, int *status, int options, struct __wrusage *wrusage, siginfo_t *siginfo) { struct proc *p, *q; pid_t pid; int error, nfound, ret; AUDIT_ARG_VALUE((int)idtype); /* XXX - This is likely wrong! */ AUDIT_ARG_PID((pid_t)id); /* XXX - This may be wrong! */ AUDIT_ARG_VALUE(options); q = td->td_proc; if ((pid_t)id == WAIT_MYPGRP && (idtype == P_PID || idtype == P_PGID)) { PROC_LOCK(q); id = (id_t)q->p_pgid; PROC_UNLOCK(q); idtype = P_PGID; } /* If we don't know the option, just return. */ if ((options & ~(WUNTRACED | WNOHANG | WCONTINUED | WNOWAIT | WEXITED | WTRAPPED | WLINUXCLONE)) != 0) return (EINVAL); if ((options & (WEXITED | WUNTRACED | WCONTINUED | WTRAPPED)) == 0) { /* * We will be unable to find any matching processes, * because there are no known events to look for. * Prefer to return error instead of blocking * indefinitely. */ return (EINVAL); } loop: if (q->p_flag & P_STATCHILD) { PROC_LOCK(q); q->p_flag &= ~P_STATCHILD; PROC_UNLOCK(q); } nfound = 0; sx_xlock(&proctree_lock); LIST_FOREACH(p, &q->p_children, p_sibling) { pid = p->p_pid; ret = proc_to_reap(td, p, idtype, id, status, options, wrusage, siginfo, 0); if (ret == 0) continue; else if (ret == 1) nfound++; else { td->td_retval[0] = pid; return (0); } PROC_LOCK(p); PROC_SLOCK(p); if ((options & WTRAPPED) != 0 && (p->p_flag & P_TRACED) != 0 && (p->p_flag & (P_STOPPED_TRACE | P_STOPPED_SIG)) != 0 && (p->p_suspcount == p->p_numthreads) && ((p->p_flag & P_WAITED) == 0)) { PROC_SUNLOCK(p); if ((options & WNOWAIT) == 0) p->p_flag |= P_WAITED; sx_xunlock(&proctree_lock); if (status != NULL) *status = W_STOPCODE(p->p_xsig); if (siginfo != NULL) { siginfo->si_status = p->p_xsig; siginfo->si_code = CLD_TRAPPED; } if ((options & WNOWAIT) == 0) { PROC_LOCK(q); sigqueue_take(p->p_ksi); PROC_UNLOCK(q); } CTR4(KTR_PTRACE, "wait: returning trapped pid %d status %#x (xstat %d) xthread %d", p->p_pid, W_STOPCODE(p->p_xsig), p->p_xsig, p->p_xthread != NULL ? p->p_xthread->td_tid : -1); PROC_UNLOCK(p); td->td_retval[0] = pid; return (0); } if ((options & WUNTRACED) != 0 && (p->p_flag & P_STOPPED_SIG) != 0 && (p->p_suspcount == p->p_numthreads) && ((p->p_flag & P_WAITED) == 0)) { PROC_SUNLOCK(p); if ((options & WNOWAIT) == 0) p->p_flag |= P_WAITED; sx_xunlock(&proctree_lock); if (status != NULL) *status = W_STOPCODE(p->p_xsig); if (siginfo != NULL) { siginfo->si_status = p->p_xsig; siginfo->si_code = CLD_STOPPED; } if ((options & WNOWAIT) == 0) { PROC_LOCK(q); sigqueue_take(p->p_ksi); PROC_UNLOCK(q); } PROC_UNLOCK(p); td->td_retval[0] = pid; return (0); } PROC_SUNLOCK(p); if ((options & WCONTINUED) != 0 && (p->p_flag & P_CONTINUED) != 0) { sx_xunlock(&proctree_lock); if ((options & WNOWAIT) == 0) { p->p_flag &= ~P_CONTINUED; PROC_LOCK(q); sigqueue_take(p->p_ksi); PROC_UNLOCK(q); } PROC_UNLOCK(p); if (status != NULL) *status = SIGCONT; if (siginfo != NULL) { siginfo->si_status = SIGCONT; siginfo->si_code = CLD_CONTINUED; } td->td_retval[0] = pid; return (0); } PROC_UNLOCK(p); } /* * Look in the orphans list too, to allow the parent to * collect it's child exit status even if child is being * debugged. * * Debugger detaches from the parent upon successful * switch-over from parent to child. At this point due to * re-parenting the parent loses the child to debugger and a * wait4(2) call would report that it has no children to wait * for. By maintaining a list of orphans we allow the parent * to successfully wait until the child becomes a zombie. */ if (nfound == 0) { LIST_FOREACH(p, &q->p_orphans, p_orphan) { ret = proc_to_reap(td, p, idtype, id, NULL, options, NULL, NULL, 1); if (ret != 0) { KASSERT(ret != -1, ("reaped an orphan (pid %d)", (int)td->td_retval[0])); nfound++; break; } } } if (nfound == 0) { sx_xunlock(&proctree_lock); return (ECHILD); } if (options & WNOHANG) { sx_xunlock(&proctree_lock); td->td_retval[0] = 0; return (0); } PROC_LOCK(q); sx_xunlock(&proctree_lock); if (q->p_flag & P_STATCHILD) { q->p_flag &= ~P_STATCHILD; error = 0; } else error = msleep(q, &q->p_mtx, PWAIT | PCATCH, "wait", 0); PROC_UNLOCK(q); if (error) return (error); goto loop; } /* * Make process 'parent' the new parent of process 'child'. * Must be called with an exclusive hold of proctree lock. */ void proc_reparent(struct proc *child, struct proc *parent) { sx_assert(&proctree_lock, SX_XLOCKED); PROC_LOCK_ASSERT(child, MA_OWNED); if (child->p_pptr == parent) return; PROC_LOCK(child->p_pptr); sigqueue_take(child->p_ksi); PROC_UNLOCK(child->p_pptr); LIST_REMOVE(child, p_sibling); LIST_INSERT_HEAD(&parent->p_children, child, p_sibling); clear_orphan(child); if (child->p_flag & P_TRACED) { if (LIST_EMPTY(&child->p_pptr->p_orphans)) { child->p_treeflag |= P_TREE_FIRST_ORPHAN; LIST_INSERT_HEAD(&child->p_pptr->p_orphans, child, p_orphan); } else { LIST_INSERT_AFTER(LIST_FIRST(&child->p_pptr->p_orphans), child, p_orphan); } child->p_treeflag |= P_TREE_ORPHANED; } child->p_pptr = parent; } Index: projects/vnet/sys/kern/kern_fork.c =================================================================== --- projects/vnet/sys/kern/kern_fork.c (revision 302276) +++ projects/vnet/sys/kern/kern_fork.c (revision 302277) @@ -1,1118 +1,1118 @@ /*- * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_fork.c 8.6 (Berkeley) 4/8/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_ktrace.h" #include "opt_kstack_pages.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KDTRACE_HOOKS #include dtrace_fork_func_t dtrace_fasttrap_fork; #endif SDT_PROVIDER_DECLARE(proc); SDT_PROBE_DEFINE3(proc, , , create, "struct proc *", "struct proc *", "int"); #ifndef _SYS_SYSPROTO_H_ struct fork_args { int dummy; }; #endif /* ARGSUSED */ int sys_fork(struct thread *td, struct fork_args *uap) { struct fork_req fr; int error, pid; bzero(&fr, sizeof(fr)); fr.fr_flags = RFFDG | RFPROC; fr.fr_pidp = &pid; error = fork1(td, &fr); if (error == 0) { td->td_retval[0] = pid; td->td_retval[1] = 0; } return (error); } /* ARGUSED */ int sys_pdfork(struct thread *td, struct pdfork_args *uap) { struct fork_req fr; int error, fd, pid; bzero(&fr, sizeof(fr)); fr.fr_flags = RFFDG | RFPROC | RFPROCDESC; fr.fr_pidp = &pid; fr.fr_pd_fd = &fd; fr.fr_pd_flags = uap->flags; /* * It is necessary to return fd by reference because 0 is a valid file * descriptor number, and the child needs to be able to distinguish * itself from the parent using the return value. */ error = fork1(td, &fr); if (error == 0) { td->td_retval[0] = pid; td->td_retval[1] = 0; error = copyout(&fd, uap->fdp, sizeof(fd)); } return (error); } /* ARGSUSED */ int sys_vfork(struct thread *td, struct vfork_args *uap) { struct fork_req fr; int error, pid; bzero(&fr, sizeof(fr)); fr.fr_flags = RFFDG | RFPROC | RFPPWAIT | RFMEM; fr.fr_pidp = &pid; error = fork1(td, &fr); if (error == 0) { td->td_retval[0] = pid; td->td_retval[1] = 0; } return (error); } int sys_rfork(struct thread *td, struct rfork_args *uap) { struct fork_req fr; int error, pid; /* Don't allow kernel-only flags. */ if ((uap->flags & RFKERNELONLY) != 0) return (EINVAL); AUDIT_ARG_FFLAGS(uap->flags); bzero(&fr, sizeof(fr)); fr.fr_flags = uap->flags; fr.fr_pidp = &pid; error = fork1(td, &fr); if (error == 0) { td->td_retval[0] = pid; td->td_retval[1] = 0; } return (error); } int nprocs = 1; /* process 0 */ int lastpid = 0; SYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD, &lastpid, 0, "Last used PID"); /* * Random component to lastpid generation. We mix in a random factor to make * it a little harder to predict. We sanity check the modulus value to avoid * doing it in critical paths. Don't let it be too small or we pointlessly * waste randomness entropy, and don't let it be impossibly large. Using a * modulus that is too big causes a LOT more process table scans and slows * down fork processing as the pidchecked caching is defeated. */ static int randompid = 0; static int sysctl_kern_randompid(SYSCTL_HANDLER_ARGS) { int error, pid; error = sysctl_wire_old_buffer(req, sizeof(int)); if (error != 0) return(error); sx_xlock(&allproc_lock); pid = randompid; error = sysctl_handle_int(oidp, &pid, 0, req); if (error == 0 && req->newptr != NULL) { if (pid < 0 || pid > pid_max - 100) /* out of range */ pid = pid_max - 100; else if (pid < 2) /* NOP */ pid = 0; else if (pid < 100) /* Make it reasonable */ pid = 100; randompid = pid; } sx_xunlock(&allproc_lock); return (error); } SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_kern_randompid, "I", "Random PID modulus"); static int fork_findpid(int flags) { struct proc *p; int trypid; static int pidchecked = 0; /* * Requires allproc_lock in order to iterate over the list * of processes, and proctree_lock to access p_pgrp. */ sx_assert(&allproc_lock, SX_LOCKED); sx_assert(&proctree_lock, SX_LOCKED); /* * Find an unused process ID. We remember a range of unused IDs * ready to use (from lastpid+1 through pidchecked-1). * * If RFHIGHPID is set (used during system boot), do not allocate * low-numbered pids. */ trypid = lastpid + 1; if (flags & RFHIGHPID) { if (trypid < 10) trypid = 10; } else { if (randompid) trypid += arc4random() % randompid; } retry: /* * If the process ID prototype has wrapped around, * restart somewhat above 0, as the low-numbered procs * tend to include daemons that don't exit. */ if (trypid >= pid_max) { trypid = trypid % pid_max; if (trypid < 100) trypid += 100; pidchecked = 0; } if (trypid >= pidchecked) { int doingzomb = 0; pidchecked = PID_MAX; /* * Scan the active and zombie procs to check whether this pid * is in use. Remember the lowest pid that's greater * than trypid, so we can avoid checking for a while. * * Avoid reuse of the process group id, session id or * the reaper subtree id. Note that for process group * and sessions, the amount of reserved pids is * limited by process limit. For the subtree ids, the * id is kept reserved only while there is a * non-reaped process in the subtree, so amount of * reserved pids is limited by process limit times * two. */ p = LIST_FIRST(&allproc); again: for (; p != NULL; p = LIST_NEXT(p, p_list)) { while (p->p_pid == trypid || p->p_reapsubtree == trypid || (p->p_pgrp != NULL && (p->p_pgrp->pg_id == trypid || (p->p_session != NULL && p->p_session->s_sid == trypid)))) { trypid++; if (trypid >= pidchecked) goto retry; } if (p->p_pid > trypid && pidchecked > p->p_pid) pidchecked = p->p_pid; if (p->p_pgrp != NULL) { if (p->p_pgrp->pg_id > trypid && pidchecked > p->p_pgrp->pg_id) pidchecked = p->p_pgrp->pg_id; if (p->p_session != NULL && p->p_session->s_sid > trypid && pidchecked > p->p_session->s_sid) pidchecked = p->p_session->s_sid; } } if (!doingzomb) { doingzomb = 1; p = LIST_FIRST(&zombproc); goto again; } } /* * RFHIGHPID does not mess with the lastpid counter during boot. */ if (flags & RFHIGHPID) pidchecked = 0; else lastpid = trypid; return (trypid); } static int fork_norfproc(struct thread *td, int flags) { int error; struct proc *p1; KASSERT((flags & RFPROC) == 0, ("fork_norfproc called with RFPROC set")); p1 = td->td_proc; if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) && (flags & (RFCFDG | RFFDG))) { PROC_LOCK(p1); if (thread_single(p1, SINGLE_BOUNDARY)) { PROC_UNLOCK(p1); return (ERESTART); } PROC_UNLOCK(p1); } error = vm_forkproc(td, NULL, NULL, NULL, flags); if (error) goto fail; /* * Close all file descriptors. */ if (flags & RFCFDG) { struct filedesc *fdtmp; fdtmp = fdinit(td->td_proc->p_fd, false); fdescfree(td); p1->p_fd = fdtmp; } /* * Unshare file descriptors (from parent). */ if (flags & RFFDG) fdunshare(td); fail: if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) && (flags & (RFCFDG | RFFDG))) { PROC_LOCK(p1); thread_single_end(p1, SINGLE_BOUNDARY); PROC_UNLOCK(p1); } return (error); } static void do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *td2, struct vmspace *vm2, struct file *fp_procdesc) { struct proc *p1, *pptr; int trypid; struct filedesc *fd; struct filedesc_to_leader *fdtol; struct sigacts *newsigacts; sx_assert(&proctree_lock, SX_SLOCKED); sx_assert(&allproc_lock, SX_XLOCKED); p1 = td->td_proc; trypid = fork_findpid(fr->fr_flags); sx_sunlock(&proctree_lock); p2->p_state = PRS_NEW; /* protect against others */ p2->p_pid = trypid; AUDIT_ARG_PID(p2->p_pid); LIST_INSERT_HEAD(&allproc, p2, p_list); allproc_gen++; LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash); tidhash_add(td2); PROC_LOCK(p2); PROC_LOCK(p1); sx_xunlock(&allproc_lock); bcopy(&p1->p_startcopy, &p2->p_startcopy, __rangeof(struct proc, p_startcopy, p_endcopy)); pargs_hold(p2->p_args); PROC_UNLOCK(p1); bzero(&p2->p_startzero, __rangeof(struct proc, p_startzero, p_endzero)); /* Tell the prison that we exist. */ prison_proc_hold(p2->p_ucred->cr_prison); PROC_UNLOCK(p2); /* * Malloc things while we don't hold any locks. */ if (fr->fr_flags & RFSIGSHARE) newsigacts = NULL; else newsigacts = sigacts_alloc(); /* * Copy filedesc. */ if (fr->fr_flags & RFCFDG) { fd = fdinit(p1->p_fd, false); fdtol = NULL; } else if (fr->fr_flags & RFFDG) { fd = fdcopy(p1->p_fd); fdtol = NULL; } else { fd = fdshare(p1->p_fd); if (p1->p_fdtol == NULL) p1->p_fdtol = filedesc_to_leader_alloc(NULL, NULL, p1->p_leader); if ((fr->fr_flags & RFTHREAD) != 0) { /* * Shared file descriptor table, and shared * process leaders. */ fdtol = p1->p_fdtol; FILEDESC_XLOCK(p1->p_fd); fdtol->fdl_refcount++; FILEDESC_XUNLOCK(p1->p_fd); } else { /* * Shared file descriptor table, and different * process leaders. */ fdtol = filedesc_to_leader_alloc(p1->p_fdtol, p1->p_fd, p2); } } /* * Make a proc table entry for the new process. * Start by zeroing the section of proc that is zero-initialized, * then copy the section that is copied directly from the parent. */ PROC_LOCK(p2); PROC_LOCK(p1); bzero(&td2->td_startzero, __rangeof(struct thread, td_startzero, td_endzero)); bcopy(&td->td_startcopy, &td2->td_startcopy, __rangeof(struct thread, td_startcopy, td_endcopy)); bcopy(&p2->p_comm, &td2->td_name, sizeof(td2->td_name)); td2->td_sigstk = td->td_sigstk; td2->td_flags = TDF_INMEM; td2->td_lend_user_pri = PRI_MAX; #ifdef VIMAGE td2->td_vnet = NULL; td2->td_vnet_lpush = NULL; #endif /* * Allow the scheduler to initialize the child. */ thread_lock(td); sched_fork(td, td2); thread_unlock(td); /* * Duplicate sub-structures as needed. * Increase reference counts on shared objects. */ p2->p_flag = P_INMEM; p2->p_flag2 = p1->p_flag2 & (P2_NOTRACE | P2_NOTRACE_EXEC); p2->p_swtick = ticks; if (p1->p_flag & P_PROFIL) startprofclock(p2); /* * Whilst the proc lock is held, copy the VM domain data out * using the VM domain method. */ vm_domain_policy_init(&p2->p_vm_dom_policy); vm_domain_policy_localcopy(&p2->p_vm_dom_policy, &p1->p_vm_dom_policy); if (fr->fr_flags & RFSIGSHARE) { p2->p_sigacts = sigacts_hold(p1->p_sigacts); } else { sigacts_copy(newsigacts, p1->p_sigacts); p2->p_sigacts = newsigacts; } if (fr->fr_flags & RFTSIGZMB) p2->p_sigparent = RFTSIGNUM(fr->fr_flags); else if (fr->fr_flags & RFLINUXTHPN) p2->p_sigparent = SIGUSR1; else p2->p_sigparent = SIGCHLD; p2->p_textvp = p1->p_textvp; p2->p_fd = fd; p2->p_fdtol = fdtol; if (p1->p_flag2 & P2_INHERIT_PROTECTED) { p2->p_flag |= P_PROTECTED; p2->p_flag2 |= P2_INHERIT_PROTECTED; } /* * p_limit is copy-on-write. Bump its refcount. */ lim_fork(p1, p2); thread_cow_get_proc(td2, p2); pstats_fork(p1->p_stats, p2->p_stats); PROC_UNLOCK(p1); PROC_UNLOCK(p2); /* Bump references to the text vnode (for procfs). */ if (p2->p_textvp) vref(p2->p_textvp); /* * Set up linkage for kernel based threading. */ if ((fr->fr_flags & RFTHREAD) != 0) { mtx_lock(&ppeers_lock); p2->p_peers = p1->p_peers; p1->p_peers = p2; p2->p_leader = p1->p_leader; mtx_unlock(&ppeers_lock); PROC_LOCK(p1->p_leader); if ((p1->p_leader->p_flag & P_WEXIT) != 0) { PROC_UNLOCK(p1->p_leader); /* * The task leader is exiting, so process p1 is * going to be killed shortly. Since p1 obviously * isn't dead yet, we know that the leader is either * sending SIGKILL's to all the processes in this * task or is sleeping waiting for all the peers to * exit. We let p1 complete the fork, but we need * to go ahead and kill the new process p2 since * the task leader may not get a chance to send * SIGKILL to it. We leave it on the list so that * the task leader will wait for this new process * to commit suicide. */ PROC_LOCK(p2); kern_psignal(p2, SIGKILL); PROC_UNLOCK(p2); } else PROC_UNLOCK(p1->p_leader); } else { p2->p_peers = NULL; p2->p_leader = p2; } sx_xlock(&proctree_lock); PGRP_LOCK(p1->p_pgrp); PROC_LOCK(p2); PROC_LOCK(p1); /* * Preserve some more flags in subprocess. P_PROFIL has already * been preserved. */ p2->p_flag |= p1->p_flag & P_SUGID; td2->td_pflags |= (td->td_pflags & TDP_ALTSTACK) | TDP_FORKING; SESS_LOCK(p1->p_session); if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT) p2->p_flag |= P_CONTROLT; SESS_UNLOCK(p1->p_session); if (fr->fr_flags & RFPPWAIT) p2->p_flag |= P_PPWAIT; p2->p_pgrp = p1->p_pgrp; LIST_INSERT_AFTER(p1, p2, p_pglist); PGRP_UNLOCK(p1->p_pgrp); LIST_INIT(&p2->p_children); LIST_INIT(&p2->p_orphans); callout_init_mtx(&p2->p_itcallout, &p2->p_mtx, 0); /* * If PF_FORK is set, the child process inherits the * procfs ioctl flags from its parent. */ if (p1->p_pfsflags & PF_FORK) { p2->p_stops = p1->p_stops; p2->p_pfsflags = p1->p_pfsflags; } /* * This begins the section where we must prevent the parent * from being swapped. */ _PHOLD(p1); PROC_UNLOCK(p1); /* * Attach the new process to its parent. * * If RFNOWAIT is set, the newly created process becomes a child * of init. This effectively disassociates the child from the * parent. */ if ((fr->fr_flags & RFNOWAIT) != 0) { pptr = p1->p_reaper; p2->p_reaper = pptr; } else { p2->p_reaper = (p1->p_treeflag & P_TREE_REAPER) != 0 ? p1 : p1->p_reaper; pptr = p1; } p2->p_pptr = pptr; LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling); LIST_INIT(&p2->p_reaplist); LIST_INSERT_HEAD(&p2->p_reaper->p_reaplist, p2, p_reapsibling); if (p2->p_reaper == p1) p2->p_reapsubtree = p2->p_pid; sx_xunlock(&proctree_lock); /* Inform accounting that we have forked. */ p2->p_acflag = AFORK; PROC_UNLOCK(p2); #ifdef KTRACE ktrprocfork(p1, p2); #endif /* * Finish creating the child process. It will return via a different * execution path later. (ie: directly into user mode) */ vm_forkproc(td, p2, td2, vm2, fr->fr_flags); if (fr->fr_flags == (RFFDG | RFPROC)) { PCPU_INC(cnt.v_forks); PCPU_ADD(cnt.v_forkpages, p2->p_vmspace->vm_dsize + p2->p_vmspace->vm_ssize); } else if (fr->fr_flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) { PCPU_INC(cnt.v_vforks); PCPU_ADD(cnt.v_vforkpages, p2->p_vmspace->vm_dsize + p2->p_vmspace->vm_ssize); } else if (p1 == &proc0) { PCPU_INC(cnt.v_kthreads); PCPU_ADD(cnt.v_kthreadpages, p2->p_vmspace->vm_dsize + p2->p_vmspace->vm_ssize); } else { PCPU_INC(cnt.v_rforks); PCPU_ADD(cnt.v_rforkpages, p2->p_vmspace->vm_dsize + p2->p_vmspace->vm_ssize); } /* * Associate the process descriptor with the process before anything * can happen that might cause that process to need the descriptor. * However, don't do this until after fork(2) can no longer fail. */ if (fr->fr_flags & RFPROCDESC) procdesc_new(p2, fr->fr_pd_flags); /* * Both processes are set up, now check if any loadable modules want * to adjust anything. */ EVENTHANDLER_INVOKE(process_fork, p1, p2, fr->fr_flags); /* * Set the child start time and mark the process as being complete. */ PROC_LOCK(p2); PROC_LOCK(p1); microuptime(&p2->p_stats->p_start); PROC_SLOCK(p2); p2->p_state = PRS_NORMAL; PROC_SUNLOCK(p2); #ifdef KDTRACE_HOOKS /* * Tell the DTrace fasttrap provider about the new process so that any * tracepoints inherited from the parent can be removed. We have to do * this only after p_state is PRS_NORMAL since the fasttrap module will * use pfind() later on. */ if ((fr->fr_flags & RFMEM) == 0 && dtrace_fasttrap_fork) dtrace_fasttrap_fork(p1, p2); #endif /* * Hold the process so that it cannot exit after we make it runnable, * but before we wait for the debugger. */ _PHOLD(p2); if ((p1->p_flag & (P_TRACED | P_FOLLOWFORK)) == (P_TRACED | P_FOLLOWFORK)) { /* * Arrange for debugger to receive the fork event. * * We can report PL_FLAG_FORKED regardless of * P_FOLLOWFORK settings, but it does not make a sense * for runaway child. */ td->td_dbgflags |= TDB_FORK; td->td_dbg_forked = p2->p_pid; td2->td_dbgflags |= TDB_STOPATFORK; } if (fr->fr_flags & RFPPWAIT) { td->td_pflags |= TDP_RFPPWAIT; td->td_rfppwait_p = p2; } PROC_UNLOCK(p2); /* * Now can be swapped. */ _PRELE(p1); PROC_UNLOCK(p1); /* * Tell any interested parties about the new process. */ - knote_fork(&p1->p_klist, p2->p_pid); + knote_fork(p1->p_klist, p2->p_pid); SDT_PROBE3(proc, , , create, p2, p1, fr->fr_flags); if (fr->fr_flags & RFPROCDESC) { procdesc_finit(p2->p_procdesc, fp_procdesc); fdrop(fp_procdesc, td); } if ((fr->fr_flags & RFSTOPPED) == 0) { /* * If RFSTOPPED not requested, make child runnable and * add to run queue. */ thread_lock(td2); TD_SET_CAN_RUN(td2); sched_add(td2, SRQ_BORING); thread_unlock(td2); if (fr->fr_pidp != NULL) *fr->fr_pidp = p2->p_pid; } else { *fr->fr_procp = p2; } PROC_LOCK(p2); /* * Wait until debugger is attached to child. */ while (td2->td_proc == p2 && (td2->td_dbgflags & TDB_STOPATFORK) != 0) cv_wait(&p2->p_dbgwait, &p2->p_mtx); _PRELE(p2); racct_proc_fork_done(p2); PROC_UNLOCK(p2); } int fork1(struct thread *td, struct fork_req *fr) { struct proc *p1, *newproc; struct thread *td2; struct vmspace *vm2; struct file *fp_procdesc; vm_ooffset_t mem_charged; int error, nprocs_new, ok; static int curfail; static struct timeval lastfail; int flags, pages; flags = fr->fr_flags; pages = fr->fr_pages; if ((flags & RFSTOPPED) != 0) MPASS(fr->fr_procp != NULL && fr->fr_pidp == NULL); else MPASS(fr->fr_procp == NULL); /* Check for the undefined or unimplemented flags. */ if ((flags & ~(RFFLAGS | RFTSIGFLAGS(RFTSIGMASK))) != 0) return (EINVAL); /* Signal value requires RFTSIGZMB. */ if ((flags & RFTSIGFLAGS(RFTSIGMASK)) != 0 && (flags & RFTSIGZMB) == 0) return (EINVAL); /* Can't copy and clear. */ if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG)) return (EINVAL); /* Check the validity of the signal number. */ if ((flags & RFTSIGZMB) != 0 && (u_int)RFTSIGNUM(flags) > _SIG_MAXSIG) return (EINVAL); if ((flags & RFPROCDESC) != 0) { /* Can't not create a process yet get a process descriptor. */ if ((flags & RFPROC) == 0) return (EINVAL); /* Must provide a place to put a procdesc if creating one. */ if (fr->fr_pd_fd == NULL) return (EINVAL); /* Check if we are using supported flags. */ if ((fr->fr_pd_flags & ~PD_ALLOWED_AT_FORK) != 0) return (EINVAL); } p1 = td->td_proc; /* * Here we don't create a new process, but we divorce * certain parts of a process from itself. */ if ((flags & RFPROC) == 0) { if (fr->fr_procp != NULL) *fr->fr_procp = NULL; else if (fr->fr_pidp != NULL) *fr->fr_pidp = 0; return (fork_norfproc(td, flags)); } fp_procdesc = NULL; newproc = NULL; vm2 = NULL; /* * Increment the nprocs resource before allocations occur. * Although process entries are dynamically created, we still * keep a global limit on the maximum number we will * create. There are hard-limits as to the number of processes * that can run, established by the KVA and memory usage for * the process data. * * Don't allow a nonprivileged user to use the last ten * processes; don't let root exceed the limit. */ nprocs_new = atomic_fetchadd_int(&nprocs, 1) + 1; if ((nprocs_new >= maxproc - 10 && priv_check_cred(td->td_ucred, PRIV_MAXPROC, 0) != 0) || nprocs_new >= maxproc) { error = EAGAIN; sx_xlock(&allproc_lock); if (ppsratecheck(&lastfail, &curfail, 1)) { printf("maxproc limit exceeded by uid %u (pid %d); " "see tuning(7) and login.conf(5)\n", td->td_ucred->cr_ruid, p1->p_pid); } sx_xunlock(&allproc_lock); goto fail2; } /* * If required, create a process descriptor in the parent first; we * will abandon it if something goes wrong. We don't finit() until * later. */ if (flags & RFPROCDESC) { error = procdesc_falloc(td, &fp_procdesc, fr->fr_pd_fd, fr->fr_pd_flags, fr->fr_pd_fcaps); if (error != 0) goto fail2; } mem_charged = 0; if (pages == 0) pages = kstack_pages; /* Allocate new proc. */ newproc = uma_zalloc(proc_zone, M_WAITOK); td2 = FIRST_THREAD_IN_PROC(newproc); if (td2 == NULL) { td2 = thread_alloc(pages); if (td2 == NULL) { error = ENOMEM; goto fail2; } proc_linkup(newproc, td2); } else { if (td2->td_kstack == 0 || td2->td_kstack_pages != pages) { if (td2->td_kstack != 0) vm_thread_dispose(td2); if (!thread_alloc_stack(td2, pages)) { error = ENOMEM; goto fail2; } } } if ((flags & RFMEM) == 0) { vm2 = vmspace_fork(p1->p_vmspace, &mem_charged); if (vm2 == NULL) { error = ENOMEM; goto fail2; } if (!swap_reserve(mem_charged)) { /* * The swap reservation failed. The accounting * from the entries of the copied vm2 will be * subtracted in vmspace_free(), so force the * reservation there. */ swap_reserve_force(mem_charged); error = ENOMEM; goto fail2; } } else vm2 = NULL; /* * XXX: This is ugly; when we copy resource usage, we need to bump * per-cred resource counters. */ proc_set_cred_init(newproc, crhold(td->td_ucred)); /* * Initialize resource accounting for the child process. */ error = racct_proc_fork(p1, newproc); if (error != 0) { error = EAGAIN; goto fail1; } #ifdef MAC mac_proc_init(newproc); #endif - knlist_init_mtx(&newproc->p_klist, &newproc->p_mtx); + newproc->p_klist = knlist_alloc(&newproc->p_mtx); STAILQ_INIT(&newproc->p_ktr); /* We have to lock the process tree while we look for a pid. */ sx_slock(&proctree_lock); sx_xlock(&allproc_lock); /* * Increment the count of procs running with this uid. Don't allow * a nonprivileged user to exceed their current limit. * * XXXRW: Can we avoid privilege here if it's not needed? */ error = priv_check_cred(td->td_ucred, PRIV_PROC_LIMIT, 0); if (error == 0) ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1, 0); else { ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_NPROC)); } if (ok) { do_fork(td, fr, newproc, td2, vm2, fp_procdesc); return (0); } error = EAGAIN; sx_sunlock(&proctree_lock); sx_xunlock(&allproc_lock); #ifdef MAC mac_proc_destroy(newproc); #endif racct_proc_exit(newproc); fail1: crfree(newproc->p_ucred); newproc->p_ucred = NULL; fail2: if (vm2 != NULL) vmspace_free(vm2); uma_zfree(proc_zone, newproc); if ((flags & RFPROCDESC) != 0 && fp_procdesc != NULL) { fdclose(td, fp_procdesc, *fr->fr_pd_fd); fdrop(fp_procdesc, td); } atomic_add_int(&nprocs, -1); pause("fork", hz / 2); return (error); } /* * Handle the return of a child process from fork1(). This function * is called from the MD fork_trampoline() entry point. */ void fork_exit(void (*callout)(void *, struct trapframe *), void *arg, struct trapframe *frame) { struct proc *p; struct thread *td; struct thread *dtd; td = curthread; p = td->td_proc; KASSERT(p->p_state == PRS_NORMAL, ("executing process is still new")); CTR4(KTR_PROC, "fork_exit: new thread %p (td_sched %p, pid %d, %s)", td, td_get_sched(td), p->p_pid, td->td_name); sched_fork_exit(td); /* * Processes normally resume in mi_switch() after being * cpu_switch()'ed to, but when children start up they arrive here * instead, so we must do much the same things as mi_switch() would. */ if ((dtd = PCPU_GET(deadthread))) { PCPU_SET(deadthread, NULL); thread_stash(dtd); } thread_unlock(td); /* * cpu_fork_kthread_handler intercepts this function call to * have this call a non-return function to stay in kernel mode. * initproc has its own fork handler, but it does return. */ KASSERT(callout != NULL, ("NULL callout in fork_exit")); callout(arg, frame); /* * Check if a kernel thread misbehaved and returned from its main * function. */ if (p->p_flag & P_KPROC) { printf("Kernel thread \"%s\" (pid %d) exited prematurely.\n", td->td_name, p->p_pid); kthread_exit(); } mtx_assert(&Giant, MA_NOTOWNED); if (p->p_sysent->sv_schedtail != NULL) (p->p_sysent->sv_schedtail)(td); td->td_pflags &= ~TDP_FORKING; } /* * Simplified back end of syscall(), used when returning from fork() * directly into user mode. Giant is not held on entry, and must not * be held on return. This function is passed in to fork_exit() as the * first parameter and is called when returning to a new userland process. */ void fork_return(struct thread *td, struct trapframe *frame) { struct proc *p, *dbg; p = td->td_proc; if (td->td_dbgflags & TDB_STOPATFORK) { sx_xlock(&proctree_lock); PROC_LOCK(p); if ((p->p_pptr->p_flag & (P_TRACED | P_FOLLOWFORK)) == (P_TRACED | P_FOLLOWFORK)) { /* * If debugger still wants auto-attach for the * parent's children, do it now. */ dbg = p->p_pptr->p_pptr; p->p_flag |= P_TRACED; p->p_oppid = p->p_pptr->p_pid; CTR2(KTR_PTRACE, "fork_return: attaching to new child pid %d: oppid %d", p->p_pid, p->p_oppid); proc_reparent(p, dbg); sx_xunlock(&proctree_lock); td->td_dbgflags |= TDB_CHILD | TDB_SCX; ptracestop(td, SIGSTOP); td->td_dbgflags &= ~(TDB_CHILD | TDB_SCX); } else { /* * ... otherwise clear the request. */ sx_xunlock(&proctree_lock); td->td_dbgflags &= ~TDB_STOPATFORK; cv_broadcast(&p->p_dbgwait); } PROC_UNLOCK(p); } else if (p->p_flag & P_TRACED || td->td_dbgflags & TDB_BORN) { /* * This is the start of a new thread in a traced * process. Report a system call exit event. */ PROC_LOCK(p); td->td_dbgflags |= TDB_SCX; _STOPEVENT(p, S_SCX, td->td_dbg_sc_code); if ((p->p_stops & S_PT_SCX) != 0 || (td->td_dbgflags & TDB_BORN) != 0) ptracestop(td, SIGTRAP); td->td_dbgflags &= ~(TDB_SCX | TDB_BORN); PROC_UNLOCK(p); } userret(td, frame); #ifdef KTRACE if (KTRPOINT(td, KTR_SYSRET)) ktrsysret(SYS_fork, 0, 0); #endif } Index: projects/vnet/sys/kern/kern_ntptime.c =================================================================== --- projects/vnet/sys/kern/kern_ntptime.c (revision 302276) +++ projects/vnet/sys/kern/kern_ntptime.c (revision 302277) @@ -1,1057 +1,1083 @@ /*- *********************************************************************** * * * Copyright (c) David L. Mills 1993-2001 * * * * Permission to use, copy, modify, and distribute this software and * * its documentation for any purpose and without fee is hereby * * granted, provided that the above copyright notice appears in all * * copies and that both the copyright notice and this permission * * notice appear in supporting documentation, and that the name * * University of Delaware not be used in advertising or publicity * * pertaining to distribution of the software without specific, * * written prior permission. The University of Delaware makes no * * representations about the suitability this software for any * * purpose. It is provided "as is" without express or implied * * warranty. * * * **********************************************************************/ /* * Adapted from the original sources for FreeBSD and timecounters by: * Poul-Henning Kamp . * * The 32bit version of the "LP" macros seems a bit past its "sell by" * date so I have retained only the 64bit version and included it directly * in this file. * * Only minor changes done to interface with the timecounters over in * sys/kern/kern_clock.c. Some of the comments below may be (even more) * confusing and/or plain wrong in that context. */ #include __FBSDID("$FreeBSD$"); #include "opt_ntp.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PPS_SYNC FEATURE(pps_sync, "Support usage of external PPS signal by kernel PLL"); #endif /* * Single-precision macros for 64-bit machines */ typedef int64_t l_fp; #define L_ADD(v, u) ((v) += (u)) #define L_SUB(v, u) ((v) -= (u)) #define L_ADDHI(v, a) ((v) += (int64_t)(a) << 32) #define L_NEG(v) ((v) = -(v)) #define L_RSHIFT(v, n) \ do { \ if ((v) < 0) \ (v) = -(-(v) >> (n)); \ else \ (v) = (v) >> (n); \ } while (0) #define L_MPY(v, a) ((v) *= (a)) #define L_CLR(v) ((v) = 0) #define L_ISNEG(v) ((v) < 0) #define L_LINT(v, a) ((v) = (int64_t)(a) << 32) #define L_GINT(v) ((v) < 0 ? -(-(v) >> 32) : (v) >> 32) /* * Generic NTP kernel interface * * These routines constitute the Network Time Protocol (NTP) interfaces * for user and daemon application programs. The ntp_gettime() routine * provides the time, maximum error (synch distance) and estimated error * (dispersion) to client user application programs. The ntp_adjtime() * routine is used by the NTP daemon to adjust the system clock to an * externally derived time. The time offset and related variables set by * this routine are used by other routines in this module to adjust the * phase and frequency of the clock discipline loop which controls the * system clock. * * When the kernel time is reckoned directly in nanoseconds (NTP_NANO * defined), the time at each tick interrupt is derived directly from * the kernel time variable. When the kernel time is reckoned in * microseconds, (NTP_NANO undefined), the time is derived from the * kernel time variable together with a variable representing the * leftover nanoseconds at the last tick interrupt. In either case, the * current nanosecond time is reckoned from these values plus an * interpolated value derived by the clock routines in another * architecture-specific module. The interpolation can use either a * dedicated counter or a processor cycle counter (PCC) implemented in * some architectures. * * Note that all routines must run at priority splclock or higher. */ /* * Phase/frequency-lock loop (PLL/FLL) definitions * * The nanosecond clock discipline uses two variable types, time * variables and frequency variables. Both types are represented as 64- * bit fixed-point quantities with the decimal point between two 32-bit * halves. On a 32-bit machine, each half is represented as a single * word and mathematical operations are done using multiple-precision * arithmetic. On a 64-bit machine, ordinary computer arithmetic is * used. * * A time variable is a signed 64-bit fixed-point number in ns and * fraction. It represents the remaining time offset to be amortized * over succeeding tick interrupts. The maximum time offset is about * 0.5 s and the resolution is about 2.3e-10 ns. * * 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |s s s| ns | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | fraction | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * A frequency variable is a signed 64-bit fixed-point number in ns/s * and fraction. It represents the ns and fraction to be added to the * kernel time variable at each second. The maximum frequency offset is * about +-500000 ns/s and the resolution is about 2.3e-10 ns/s. * * 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |s s s s s s s s s s s s s| ns/s | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | fraction | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ /* * The following variables establish the state of the PLL/FLL and the * residual time and frequency offset of the local clock. */ #define SHIFT_PLL 4 /* PLL loop gain (shift) */ #define SHIFT_FLL 2 /* FLL loop gain (shift) */ static int time_state = TIME_OK; /* clock state */ int time_status = STA_UNSYNC; /* clock status bits */ static long time_tai; /* TAI offset (s) */ static long time_monitor; /* last time offset scaled (ns) */ static long time_constant; /* poll interval (shift) (s) */ static long time_precision = 1; /* clock precision (ns) */ static long time_maxerror = MAXPHASE / 1000; /* maximum error (us) */ long time_esterror = MAXPHASE / 1000; /* estimated error (us) */ static long time_reftime; /* uptime at last adjustment (s) */ static l_fp time_offset; /* time offset (ns) */ static l_fp time_freq; /* frequency offset (ns/s) */ static l_fp time_adj; /* tick adjust (ns/s) */ static int64_t time_adjtime; /* correction from adjtime(2) (usec) */ +static struct mtx ntpadj_lock; +MTX_SYSINIT(ntpadj, &ntpadj_lock, "ntpadj", #ifdef PPS_SYNC + MTX_SPIN +#else + MTX_DEF +#endif +); + /* + * When PPS_SYNC is defined, hardpps() function is provided which can + * be legitimately called from interrupt filters. Due to this, use + * spinlock for ntptime state protection, otherwise sleepable mutex is + * adequate. + */ +#ifdef PPS_SYNC +#define NTPADJ_LOCK() mtx_lock_spin(&ntpadj_lock) +#define NTPADJ_UNLOCK() mtx_unlock_spin(&ntpadj_lock) +#else +#define NTPADJ_LOCK() mtx_lock(&ntpadj_lock) +#define NTPADJ_UNLOCK() mtx_unlock(&ntpadj_lock) +#endif +#define NTPADJ_ASSERT_LOCKED() mtx_assert(&ntpadj_lock, MA_OWNED) + +#ifdef PPS_SYNC +/* * The following variables are used when a pulse-per-second (PPS) signal * is available and connected via a modem control lead. They establish * the engineering parameters of the clock discipline loop when * controlled by the PPS signal. */ #define PPS_FAVG 2 /* min freq avg interval (s) (shift) */ #define PPS_FAVGDEF 8 /* default freq avg int (s) (shift) */ #define PPS_FAVGMAX 15 /* max freq avg interval (s) (shift) */ #define PPS_PAVG 4 /* phase avg interval (s) (shift) */ #define PPS_VALID 120 /* PPS signal watchdog max (s) */ #define PPS_MAXWANDER 100000 /* max PPS wander (ns/s) */ #define PPS_POPCORN 2 /* popcorn spike threshold (shift) */ static struct timespec pps_tf[3]; /* phase median filter */ static l_fp pps_freq; /* scaled frequency offset (ns/s) */ static long pps_fcount; /* frequency accumulator */ static long pps_jitter; /* nominal jitter (ns) */ static long pps_stabil; /* nominal stability (scaled ns/s) */ static long pps_lastsec; /* time at last calibration (s) */ static int pps_valid; /* signal watchdog counter */ static int pps_shift = PPS_FAVG; /* interval duration (s) (shift) */ static int pps_shiftmax = PPS_FAVGDEF; /* max interval duration (s) (shift) */ static int pps_intcnt; /* wander counter */ /* * PPS signal quality monitors */ static long pps_calcnt; /* calibration intervals */ static long pps_jitcnt; /* jitter limit exceeded */ static long pps_stbcnt; /* stability limit exceeded */ static long pps_errcnt; /* calibration errors */ #endif /* PPS_SYNC */ /* * End of phase/frequency-lock loop (PLL/FLL) definitions */ static void ntp_init(void); static void hardupdate(long offset); static void ntp_gettime1(struct ntptimeval *ntvp); -static int ntp_is_time_error(void); +static bool ntp_is_time_error(int tsl); -static int -ntp_is_time_error(void) +static bool +ntp_is_time_error(int tsl) { + /* * Status word error decode. If any of these conditions occur, * an error is returned, instead of the status word. Most * applications will care only about the fact the system clock * may not be trusted, not about the details. * * Hardware or software error */ - if ((time_status & (STA_UNSYNC | STA_CLOCKERR)) || + if ((tsl & (STA_UNSYNC | STA_CLOCKERR)) || /* * PPS signal lost when either time or frequency synchronization * requested */ - (time_status & (STA_PPSFREQ | STA_PPSTIME) && - !(time_status & STA_PPSSIGNAL)) || + (tsl & (STA_PPSFREQ | STA_PPSTIME) && + !(tsl & STA_PPSSIGNAL)) || /* * PPS jitter exceeded when time synchronization requested */ - (time_status & STA_PPSTIME && - time_status & STA_PPSJITTER) || + (tsl & STA_PPSTIME && tsl & STA_PPSJITTER) || /* * PPS wander exceeded or calibration error when frequency * synchronization requested */ - (time_status & STA_PPSFREQ && - time_status & (STA_PPSWANDER | STA_PPSERROR))) - return (1); + (tsl & STA_PPSFREQ && + tsl & (STA_PPSWANDER | STA_PPSERROR))) + return (true); - return (0); + return (false); } static void ntp_gettime1(struct ntptimeval *ntvp) { struct timespec atv; /* nanosecond time */ - GIANT_REQUIRED; + NTPADJ_ASSERT_LOCKED(); nanotime(&atv); ntvp->time.tv_sec = atv.tv_sec; ntvp->time.tv_nsec = atv.tv_nsec; ntvp->maxerror = time_maxerror; ntvp->esterror = time_esterror; ntvp->tai = time_tai; ntvp->time_state = time_state; - if (ntp_is_time_error()) + if (ntp_is_time_error(time_status)) ntvp->time_state = TIME_ERROR; } /* * ntp_gettime() - NTP user application interface * * See the timex.h header file for synopsis and API description. Note that * the TAI offset is returned in the ntvtimeval.tai structure member. */ #ifndef _SYS_SYSPROTO_H_ struct ntp_gettime_args { struct ntptimeval *ntvp; }; #endif /* ARGSUSED */ int sys_ntp_gettime(struct thread *td, struct ntp_gettime_args *uap) { struct ntptimeval ntv; - mtx_lock(&Giant); + NTPADJ_LOCK(); ntp_gettime1(&ntv); - mtx_unlock(&Giant); + NTPADJ_UNLOCK(); td->td_retval[0] = ntv.time_state; return (copyout(&ntv, uap->ntvp, sizeof(ntv))); } static int ntp_sysctl(SYSCTL_HANDLER_ARGS) { struct ntptimeval ntv; /* temporary structure */ + NTPADJ_LOCK(); ntp_gettime1(&ntv); + NTPADJ_UNLOCK(); return (sysctl_handle_opaque(oidp, &ntv, sizeof(ntv), req)); } SYSCTL_NODE(_kern, OID_AUTO, ntp_pll, CTLFLAG_RW, 0, ""); -SYSCTL_PROC(_kern_ntp_pll, OID_AUTO, gettime, CTLTYPE_OPAQUE|CTLFLAG_RD, - 0, sizeof(struct ntptimeval) , ntp_sysctl, "S,ntptimeval", ""); +SYSCTL_PROC(_kern_ntp_pll, OID_AUTO, gettime, CTLTYPE_OPAQUE | CTLFLAG_RD | + CTLFLAG_MPSAFE, 0, sizeof(struct ntptimeval) , ntp_sysctl, "S,ntptimeval", + ""); #ifdef PPS_SYNC SYSCTL_INT(_kern_ntp_pll, OID_AUTO, pps_shiftmax, CTLFLAG_RW, &pps_shiftmax, 0, "Max interval duration (sec) (shift)"); SYSCTL_INT(_kern_ntp_pll, OID_AUTO, pps_shift, CTLFLAG_RW, &pps_shift, 0, "Interval duration (sec) (shift)"); SYSCTL_LONG(_kern_ntp_pll, OID_AUTO, time_monitor, CTLFLAG_RD, &time_monitor, 0, "Last time offset scaled (ns)"); -SYSCTL_OPAQUE(_kern_ntp_pll, OID_AUTO, pps_freq, CTLFLAG_RD, - &pps_freq, sizeof(pps_freq), "I", "Scaled frequency offset (ns/sec)"); -SYSCTL_OPAQUE(_kern_ntp_pll, OID_AUTO, time_freq, CTLFLAG_RD, - &time_freq, sizeof(time_freq), "I", "Frequency offset (ns/sec)"); +SYSCTL_S64(_kern_ntp_pll, OID_AUTO, pps_freq, CTLFLAG_RD | CTLFLAG_MPSAFE, + &pps_freq, 0, + "Scaled frequency offset (ns/sec)"); +SYSCTL_S64(_kern_ntp_pll, OID_AUTO, time_freq, CTLFLAG_RD | CTLFLAG_MPSAFE, + &time_freq, 0, + "Frequency offset (ns/sec)"); #endif /* * ntp_adjtime() - NTP daemon application interface * * See the timex.h header file for synopsis and API description. Note that * the timex.constant structure member has a dual purpose to set the time * constant and to set the TAI offset. */ #ifndef _SYS_SYSPROTO_H_ struct ntp_adjtime_args { struct timex *tp; }; #endif int sys_ntp_adjtime(struct thread *td, struct ntp_adjtime_args *uap) { struct timex ntv; /* temporary structure */ long freq; /* frequency ns/s) */ int modes; /* mode bits from structure */ - int s; /* caller priority */ - int error; + int error, retval; error = copyin((caddr_t)uap->tp, (caddr_t)&ntv, sizeof(ntv)); if (error) - return(error); + return (error); /* * Update selected clock variables - only the superuser can * change anything. Note that there is no error checking here on * the assumption the superuser should know what it is doing. * Note that either the time constant or TAI offset are loaded * from the ntv.constant member, depending on the mode bits. If * the STA_PLL bit in the status word is cleared, the state and * status words are reset to the initial values at boot. */ - mtx_lock(&Giant); modes = ntv.modes; if (modes) error = priv_check(td, PRIV_NTP_ADJTIME); - if (error) - goto done2; - s = splclock(); + if (error != 0) + return (error); + NTPADJ_LOCK(); if (modes & MOD_MAXERROR) time_maxerror = ntv.maxerror; if (modes & MOD_ESTERROR) time_esterror = ntv.esterror; if (modes & MOD_STATUS) { if (time_status & STA_PLL && !(ntv.status & STA_PLL)) { time_state = TIME_OK; time_status = STA_UNSYNC; #ifdef PPS_SYNC pps_shift = PPS_FAVG; #endif /* PPS_SYNC */ } time_status &= STA_RONLY; time_status |= ntv.status & ~STA_RONLY; } if (modes & MOD_TIMECONST) { if (ntv.constant < 0) time_constant = 0; else if (ntv.constant > MAXTC) time_constant = MAXTC; else time_constant = ntv.constant; } if (modes & MOD_TAI) { if (ntv.constant > 0) /* XXX zero & negative numbers ? */ time_tai = ntv.constant; } #ifdef PPS_SYNC if (modes & MOD_PPSMAX) { if (ntv.shift < PPS_FAVG) pps_shiftmax = PPS_FAVG; else if (ntv.shift > PPS_FAVGMAX) pps_shiftmax = PPS_FAVGMAX; else pps_shiftmax = ntv.shift; } #endif /* PPS_SYNC */ if (modes & MOD_NANO) time_status |= STA_NANO; if (modes & MOD_MICRO) time_status &= ~STA_NANO; if (modes & MOD_CLKB) time_status |= STA_CLK; if (modes & MOD_CLKA) time_status &= ~STA_CLK; if (modes & MOD_FREQUENCY) { freq = (ntv.freq * 1000LL) >> 16; if (freq > MAXFREQ) L_LINT(time_freq, MAXFREQ); else if (freq < -MAXFREQ) L_LINT(time_freq, -MAXFREQ); else { /* * ntv.freq is [PPM * 2^16] = [us/s * 2^16] * time_freq is [ns/s * 2^32] */ time_freq = ntv.freq * 1000LL * 65536LL; } #ifdef PPS_SYNC pps_freq = time_freq; #endif /* PPS_SYNC */ } if (modes & MOD_OFFSET) { if (time_status & STA_NANO) hardupdate(ntv.offset); else hardupdate(ntv.offset * 1000); } /* * Retrieve all clock variables. Note that the TAI offset is * returned only by ntp_gettime(); */ if (time_status & STA_NANO) ntv.offset = L_GINT(time_offset); else ntv.offset = L_GINT(time_offset) / 1000; /* XXX rounding ? */ ntv.freq = L_GINT((time_freq / 1000LL) << 16); ntv.maxerror = time_maxerror; ntv.esterror = time_esterror; ntv.status = time_status; ntv.constant = time_constant; if (time_status & STA_NANO) ntv.precision = time_precision; else ntv.precision = time_precision / 1000; ntv.tolerance = MAXFREQ * SCALE_PPM; #ifdef PPS_SYNC ntv.shift = pps_shift; ntv.ppsfreq = L_GINT((pps_freq / 1000LL) << 16); if (time_status & STA_NANO) ntv.jitter = pps_jitter; else ntv.jitter = pps_jitter / 1000; ntv.stabil = pps_stabil; ntv.calcnt = pps_calcnt; ntv.errcnt = pps_errcnt; ntv.jitcnt = pps_jitcnt; ntv.stbcnt = pps_stbcnt; #endif /* PPS_SYNC */ - splx(s); + retval = ntp_is_time_error(time_status) ? TIME_ERROR : time_state; + NTPADJ_UNLOCK(); error = copyout((caddr_t)&ntv, (caddr_t)uap->tp, sizeof(ntv)); - if (error) - goto done2; - - if (ntp_is_time_error()) - td->td_retval[0] = TIME_ERROR; - else - td->td_retval[0] = time_state; - -done2: - mtx_unlock(&Giant); + if (error == 0) + td->td_retval[0] = retval; return (error); } /* * second_overflow() - called after ntp_tick_adjust() * * This routine is ordinarily called immediately following the above * routine ntp_tick_adjust(). While these two routines are normally * combined, they are separated here only for the purposes of * simulation. */ void ntp_update_second(int64_t *adjustment, time_t *newsec) { int tickrate; l_fp ftemp; /* 32/64-bit temporary */ /* * On rollover of the second both the nanosecond and microsecond * clocks are updated and the state machine cranked as * necessary. The phase adjustment to be used for the next * second is calculated and the maximum error is increased by * the tolerance. */ time_maxerror += MAXFREQ / 1000; /* * Leap second processing. If in leap-insert state at * the end of the day, the system clock is set back one * second; if in leap-delete state, the system clock is * set ahead one second. The nano_time() routine or * external clock driver will insure that reported time * is always monotonic. */ switch (time_state) { /* * No warning. */ case TIME_OK: if (time_status & STA_INS) time_state = TIME_INS; else if (time_status & STA_DEL) time_state = TIME_DEL; break; /* * Insert second 23:59:60 following second * 23:59:59. */ case TIME_INS: if (!(time_status & STA_INS)) time_state = TIME_OK; else if ((*newsec) % 86400 == 0) { (*newsec)--; time_state = TIME_OOP; time_tai++; } break; /* * Delete second 23:59:59. */ case TIME_DEL: if (!(time_status & STA_DEL)) time_state = TIME_OK; else if (((*newsec) + 1) % 86400 == 0) { (*newsec)++; time_tai--; time_state = TIME_WAIT; } break; /* * Insert second in progress. */ case TIME_OOP: time_state = TIME_WAIT; break; /* * Wait for status bits to clear. */ case TIME_WAIT: if (!(time_status & (STA_INS | STA_DEL))) time_state = TIME_OK; } /* * Compute the total time adjustment for the next second * in ns. The offset is reduced by a factor depending on * whether the PPS signal is operating. Note that the * value is in effect scaled by the clock frequency, * since the adjustment is added at each tick interrupt. */ ftemp = time_offset; #ifdef PPS_SYNC /* XXX even if PPS signal dies we should finish adjustment ? */ if (time_status & STA_PPSTIME && time_status & STA_PPSSIGNAL) L_RSHIFT(ftemp, pps_shift); else L_RSHIFT(ftemp, SHIFT_PLL + time_constant); #else L_RSHIFT(ftemp, SHIFT_PLL + time_constant); #endif /* PPS_SYNC */ time_adj = ftemp; L_SUB(time_offset, ftemp); L_ADD(time_adj, time_freq); /* * Apply any correction from adjtime(2). If more than one second * off we slew at a rate of 5ms/s (5000 PPM) else 500us/s (500PPM) * until the last second is slewed the final < 500 usecs. */ if (time_adjtime != 0) { if (time_adjtime > 1000000) tickrate = 5000; else if (time_adjtime < -1000000) tickrate = -5000; else if (time_adjtime > 500) tickrate = 500; else if (time_adjtime < -500) tickrate = -500; else tickrate = time_adjtime; time_adjtime -= tickrate; L_LINT(ftemp, tickrate * 1000); L_ADD(time_adj, ftemp); } *adjustment = time_adj; #ifdef PPS_SYNC if (pps_valid > 0) pps_valid--; else time_status &= ~STA_PPSSIGNAL; #endif /* PPS_SYNC */ } /* * ntp_init() - initialize variables and structures * * This routine must be called after the kernel variables hz and tick * are set or changed and before the next tick interrupt. In this * particular implementation, these values are assumed set elsewhere in * the kernel. The design allows the clock frequency and tick interval * to be changed while the system is running. So, this routine should * probably be integrated with the code that does that. */ static void -ntp_init() +ntp_init(void) { /* * The following variables are initialized only at startup. Only * those structures not cleared by the compiler need to be * initialized, and these only in the simulator. In the actual * kernel, any nonzero values here will quickly evaporate. */ L_CLR(time_offset); L_CLR(time_freq); #ifdef PPS_SYNC pps_tf[0].tv_sec = pps_tf[0].tv_nsec = 0; pps_tf[1].tv_sec = pps_tf[1].tv_nsec = 0; pps_tf[2].tv_sec = pps_tf[2].tv_nsec = 0; pps_fcount = 0; L_CLR(pps_freq); #endif /* PPS_SYNC */ } SYSINIT(ntpclocks, SI_SUB_CLOCKS, SI_ORDER_MIDDLE, ntp_init, NULL); /* * hardupdate() - local clock update * * This routine is called by ntp_adjtime() to update the local clock * phase and frequency. The implementation is of an adaptive-parameter, * hybrid phase/frequency-lock loop (PLL/FLL). The routine computes new * time and frequency offset estimates for each call. If the kernel PPS * discipline code is configured (PPS_SYNC), the PPS signal itself * determines the new time offset, instead of the calling argument. * Presumably, calls to ntp_adjtime() occur only when the caller * believes the local clock is valid within some bound (+-128 ms with * NTP). If the caller's time is far different than the PPS time, an * argument will ensue, and it's not clear who will lose. * * For uncompensated quartz crystal oscillators and nominal update * intervals less than 256 s, operation should be in phase-lock mode, * where the loop is disciplined to phase. For update intervals greater * than 1024 s, operation should be in frequency-lock mode, where the * loop is disciplined to frequency. Between 256 s and 1024 s, the mode * is selected by the STA_MODE status bit. */ static void hardupdate(offset) long offset; /* clock offset (ns) */ { long mtemp; l_fp ftemp; + NTPADJ_ASSERT_LOCKED(); + /* * Select how the phase is to be controlled and from which * source. If the PPS signal is present and enabled to * discipline the time, the PPS offset is used; otherwise, the * argument offset is used. */ if (!(time_status & STA_PLL)) return; if (!(time_status & STA_PPSTIME && time_status & STA_PPSSIGNAL)) { if (offset > MAXPHASE) time_monitor = MAXPHASE; else if (offset < -MAXPHASE) time_monitor = -MAXPHASE; else time_monitor = offset; L_LINT(time_offset, time_monitor); } /* * Select how the frequency is to be controlled and in which * mode (PLL or FLL). If the PPS signal is present and enabled * to discipline the frequency, the PPS frequency is used; * otherwise, the argument offset is used to compute it. */ if (time_status & STA_PPSFREQ && time_status & STA_PPSSIGNAL) { time_reftime = time_uptime; return; } if (time_status & STA_FREQHOLD || time_reftime == 0) time_reftime = time_uptime; mtemp = time_uptime - time_reftime; L_LINT(ftemp, time_monitor); L_RSHIFT(ftemp, (SHIFT_PLL + 2 + time_constant) << 1); L_MPY(ftemp, mtemp); L_ADD(time_freq, ftemp); time_status &= ~STA_MODE; if (mtemp >= MINSEC && (time_status & STA_FLL || mtemp > MAXSEC)) { L_LINT(ftemp, (time_monitor << 4) / mtemp); L_RSHIFT(ftemp, SHIFT_FLL + 4); L_ADD(time_freq, ftemp); time_status |= STA_MODE; } time_reftime = time_uptime; if (L_GINT(time_freq) > MAXFREQ) L_LINT(time_freq, MAXFREQ); else if (L_GINT(time_freq) < -MAXFREQ) L_LINT(time_freq, -MAXFREQ); } #ifdef PPS_SYNC /* * hardpps() - discipline CPU clock oscillator to external PPS signal * * This routine is called at each PPS interrupt in order to discipline * the CPU clock oscillator to the PPS signal. There are two independent * first-order feedback loops, one for the phase, the other for the * frequency. The phase loop measures and grooms the PPS phase offset * and leaves it in a handy spot for the seconds overflow routine. The * frequency loop averages successive PPS phase differences and * calculates the PPS frequency offset, which is also processed by the * seconds overflow routine. The code requires the caller to capture the * time and architecture-dependent hardware counter values in * nanoseconds at the on-time PPS signal transition. * * Note that, on some Unix systems this routine runs at an interrupt * priority level higher than the timer interrupt routine hardclock(). * Therefore, the variables used are distinct from the hardclock() * variables, except for the actual time and frequency variables, which * are determined by this routine and updated atomically. */ void hardpps(tsp, nsec) struct timespec *tsp; /* time at PPS */ long nsec; /* hardware counter at PPS */ { long u_sec, u_nsec, v_nsec; /* temps */ l_fp ftemp; + NTPADJ_LOCK(); + /* * The signal is first processed by a range gate and frequency * discriminator. The range gate rejects noise spikes outside * the range +-500 us. The frequency discriminator rejects input * signals with apparent frequency outside the range 1 +-500 * PPM. If two hits occur in the same second, we ignore the * later hit; if not and a hit occurs outside the range gate, * keep the later hit for later comparison, but do not process * it. */ time_status |= STA_PPSSIGNAL | STA_PPSJITTER; time_status &= ~(STA_PPSWANDER | STA_PPSERROR); pps_valid = PPS_VALID; u_sec = tsp->tv_sec; u_nsec = tsp->tv_nsec; if (u_nsec >= (NANOSECOND >> 1)) { u_nsec -= NANOSECOND; u_sec++; } v_nsec = u_nsec - pps_tf[0].tv_nsec; - if (u_sec == pps_tf[0].tv_sec && v_nsec < NANOSECOND - - MAXFREQ) - return; + if (u_sec == pps_tf[0].tv_sec && v_nsec < NANOSECOND - MAXFREQ) + goto out; pps_tf[2] = pps_tf[1]; pps_tf[1] = pps_tf[0]; pps_tf[0].tv_sec = u_sec; pps_tf[0].tv_nsec = u_nsec; /* * Compute the difference between the current and previous * counter values. If the difference exceeds 0.5 s, assume it * has wrapped around, so correct 1.0 s. If the result exceeds * the tick interval, the sample point has crossed a tick * boundary during the last second, so correct the tick. Very * intricate. */ u_nsec = nsec; if (u_nsec > (NANOSECOND >> 1)) u_nsec -= NANOSECOND; else if (u_nsec < -(NANOSECOND >> 1)) u_nsec += NANOSECOND; pps_fcount += u_nsec; if (v_nsec > MAXFREQ || v_nsec < -MAXFREQ) - return; + goto out; time_status &= ~STA_PPSJITTER; /* * A three-stage median filter is used to help denoise the PPS * time. The median sample becomes the time offset estimate; the * difference between the other two samples becomes the time * dispersion (jitter) estimate. */ if (pps_tf[0].tv_nsec > pps_tf[1].tv_nsec) { if (pps_tf[1].tv_nsec > pps_tf[2].tv_nsec) { v_nsec = pps_tf[1].tv_nsec; /* 0 1 2 */ u_nsec = pps_tf[0].tv_nsec - pps_tf[2].tv_nsec; } else if (pps_tf[2].tv_nsec > pps_tf[0].tv_nsec) { v_nsec = pps_tf[0].tv_nsec; /* 2 0 1 */ u_nsec = pps_tf[2].tv_nsec - pps_tf[1].tv_nsec; } else { v_nsec = pps_tf[2].tv_nsec; /* 0 2 1 */ u_nsec = pps_tf[0].tv_nsec - pps_tf[1].tv_nsec; } } else { if (pps_tf[1].tv_nsec < pps_tf[2].tv_nsec) { v_nsec = pps_tf[1].tv_nsec; /* 2 1 0 */ u_nsec = pps_tf[2].tv_nsec - pps_tf[0].tv_nsec; } else if (pps_tf[2].tv_nsec < pps_tf[0].tv_nsec) { v_nsec = pps_tf[0].tv_nsec; /* 1 0 2 */ u_nsec = pps_tf[1].tv_nsec - pps_tf[2].tv_nsec; } else { v_nsec = pps_tf[2].tv_nsec; /* 1 2 0 */ u_nsec = pps_tf[1].tv_nsec - pps_tf[0].tv_nsec; } } /* * Nominal jitter is due to PPS signal noise and interrupt * latency. If it exceeds the popcorn threshold, the sample is * discarded. otherwise, if so enabled, the time offset is * updated. We can tolerate a modest loss of data here without * much degrading time accuracy. * * The measurements being checked here were made with the system * timecounter, so the popcorn threshold is not allowed to fall below * the number of nanoseconds in two ticks of the timecounter. For a * timecounter running faster than 1 GHz the lower bound is 2ns, just * to avoid a nonsensical threshold of zero. */ - if (u_nsec > lmax(pps_jitter << PPS_POPCORN, + if (u_nsec > lmax(pps_jitter << PPS_POPCORN, 2 * (NANOSECOND / (long)qmin(NANOSECOND, tc_getfrequency())))) { time_status |= STA_PPSJITTER; pps_jitcnt++; } else if (time_status & STA_PPSTIME) { time_monitor = -v_nsec; L_LINT(time_offset, time_monitor); } pps_jitter += (u_nsec - pps_jitter) >> PPS_FAVG; u_sec = pps_tf[0].tv_sec - pps_lastsec; if (u_sec < (1 << pps_shift)) - return; + goto out; /* * At the end of the calibration interval the difference between * the first and last counter values becomes the scaled * frequency. It will later be divided by the length of the * interval to determine the frequency update. If the frequency * exceeds a sanity threshold, or if the actual calibration * interval is not equal to the expected length, the data are * discarded. We can tolerate a modest loss of data here without * much degrading frequency accuracy. */ pps_calcnt++; v_nsec = -pps_fcount; pps_lastsec = pps_tf[0].tv_sec; pps_fcount = 0; u_nsec = MAXFREQ << pps_shift; - if (v_nsec > u_nsec || v_nsec < -u_nsec || u_sec != (1 << - pps_shift)) { + if (v_nsec > u_nsec || v_nsec < -u_nsec || u_sec != (1 << pps_shift)) { time_status |= STA_PPSERROR; pps_errcnt++; - return; + goto out; } /* * Here the raw frequency offset and wander (stability) is * calculated. If the wander is less than the wander threshold * for four consecutive averaging intervals, the interval is * doubled; if it is greater than the threshold for four * consecutive intervals, the interval is halved. The scaled * frequency offset is converted to frequency offset. The * stability metric is calculated as the average of recent * frequency changes, but is used only for performance * monitoring. */ L_LINT(ftemp, v_nsec); L_RSHIFT(ftemp, pps_shift); L_SUB(ftemp, pps_freq); u_nsec = L_GINT(ftemp); if (u_nsec > PPS_MAXWANDER) { L_LINT(ftemp, PPS_MAXWANDER); pps_intcnt--; time_status |= STA_PPSWANDER; pps_stbcnt++; } else if (u_nsec < -PPS_MAXWANDER) { L_LINT(ftemp, -PPS_MAXWANDER); pps_intcnt--; time_status |= STA_PPSWANDER; pps_stbcnt++; } else { pps_intcnt++; } if (pps_intcnt >= 4) { pps_intcnt = 4; if (pps_shift < pps_shiftmax) { pps_shift++; pps_intcnt = 0; } } else if (pps_intcnt <= -4 || pps_shift > pps_shiftmax) { pps_intcnt = -4; if (pps_shift > PPS_FAVG) { pps_shift--; pps_intcnt = 0; } } if (u_nsec < 0) u_nsec = -u_nsec; pps_stabil += (u_nsec * SCALE_PPM - pps_stabil) >> PPS_FAVG; /* * The PPS frequency is recalculated and clamped to the maximum * MAXFREQ. If enabled, the system clock frequency is updated as * well. */ L_ADD(pps_freq, ftemp); u_nsec = L_GINT(pps_freq); if (u_nsec > MAXFREQ) L_LINT(pps_freq, MAXFREQ); else if (u_nsec < -MAXFREQ) L_LINT(pps_freq, -MAXFREQ); if (time_status & STA_PPSFREQ) time_freq = pps_freq; + +out: + NTPADJ_UNLOCK(); } #endif /* PPS_SYNC */ #ifndef _SYS_SYSPROTO_H_ struct adjtime_args { struct timeval *delta; struct timeval *olddelta; }; #endif /* ARGSUSED */ int sys_adjtime(struct thread *td, struct adjtime_args *uap) { struct timeval delta, olddelta, *deltap; int error; if (uap->delta) { error = copyin(uap->delta, &delta, sizeof(delta)); if (error) return (error); deltap = δ } else deltap = NULL; error = kern_adjtime(td, deltap, &olddelta); if (uap->olddelta && error == 0) error = copyout(&olddelta, uap->olddelta, sizeof(olddelta)); return (error); } int kern_adjtime(struct thread *td, struct timeval *delta, struct timeval *olddelta) { struct timeval atv; + int64_t ltr, ltw; int error; - mtx_lock(&Giant); - if (olddelta) { - atv.tv_sec = time_adjtime / 1000000; - atv.tv_usec = time_adjtime % 1000000; + if (delta != NULL) { + error = priv_check(td, PRIV_ADJTIME); + if (error != 0) + return (error); + ltw = (int64_t)delta->tv_sec * 1000000 + delta->tv_usec; + } + NTPADJ_LOCK(); + ltr = time_adjtime; + if (delta != NULL) + time_adjtime = ltw; + NTPADJ_UNLOCK(); + if (olddelta != NULL) { + atv.tv_sec = ltr / 1000000; + atv.tv_usec = ltr % 1000000; if (atv.tv_usec < 0) { atv.tv_usec += 1000000; atv.tv_sec--; } *olddelta = atv; } - if (delta) { - if ((error = priv_check(td, PRIV_ADJTIME))) { - mtx_unlock(&Giant); - return (error); - } - time_adjtime = (int64_t)delta->tv_sec * 1000000 + - delta->tv_usec; - } - mtx_unlock(&Giant); return (0); } static struct callout resettodr_callout; static int resettodr_period = 1800; static void periodic_resettodr(void *arg __unused) { - if (!ntp_is_time_error()) { - mtx_lock(&Giant); + /* + * Read of time_status is lock-less, which is fine since + * ntp_is_time_error() operates on the consistent read value. + */ + if (!ntp_is_time_error(time_status)) resettodr(); - mtx_unlock(&Giant); - } if (resettodr_period > 0) callout_schedule(&resettodr_callout, resettodr_period * hz); } static void shutdown_resettodr(void *arg __unused, int howto __unused) { callout_drain(&resettodr_callout); - if (resettodr_period > 0 && !ntp_is_time_error()) { - mtx_lock(&Giant); + /* Another unlocked read of time_status */ + if (resettodr_period > 0 && !ntp_is_time_error(time_status)) resettodr(); - mtx_unlock(&Giant); - } } static int sysctl_resettodr_period(SYSCTL_HANDLER_ARGS) { int error; error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (error || !req->newptr) return (error); if (cold) goto done; if (resettodr_period == 0) callout_stop(&resettodr_callout); else callout_reset(&resettodr_callout, resettodr_period * hz, periodic_resettodr, NULL); done: return (0); } -SYSCTL_PROC(_machdep, OID_AUTO, rtc_save_period, CTLTYPE_INT|CTLFLAG_RWTUN, - &resettodr_period, 1800, sysctl_resettodr_period, "I", - "Save system time to RTC with this period (in seconds)"); +SYSCTL_PROC(_machdep, OID_AUTO, rtc_save_period, CTLTYPE_INT | CTLFLAG_RWTUN | + CTLFLAG_MPSAFE, &resettodr_period, 1800, sysctl_resettodr_period, "I", + "Save system time to RTC with this period (in seconds)"); static void start_periodic_resettodr(void *arg __unused) { EVENTHANDLER_REGISTER(shutdown_pre_sync, shutdown_resettodr, NULL, SHUTDOWN_PRI_FIRST); callout_init(&resettodr_callout, 1); if (resettodr_period == 0) return; callout_reset(&resettodr_callout, resettodr_period * hz, periodic_resettodr, NULL); } SYSINIT(periodic_resettodr, SI_SUB_LAST, SI_ORDER_MIDDLE, start_periodic_resettodr, NULL); Index: projects/vnet/sys/kern/kern_sig.c =================================================================== --- projects/vnet/sys/kern/kern_sig.c (revision 302276) +++ projects/vnet/sys/kern/kern_sig.c (revision 302277) @@ -1,3621 +1,3635 @@ /*- * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_sig.c 8.7 (Berkeley) 4/18/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_gzio.h" #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define ONSIG 32 /* NSIG for osig* syscalls. XXX. */ SDT_PROVIDER_DECLARE(proc); SDT_PROBE_DEFINE3(proc, , , signal__send, "struct thread *", "struct proc *", "int"); SDT_PROBE_DEFINE2(proc, , , signal__clear, "int", "ksiginfo_t *"); SDT_PROBE_DEFINE3(proc, , , signal__discard, "struct thread *", "struct proc *", "int"); static int coredump(struct thread *); static int killpg1(struct thread *td, int sig, int pgid, int all, ksiginfo_t *ksi); static int issignal(struct thread *td); static int sigprop(int sig); static void tdsigwakeup(struct thread *, int, sig_t, int); -static void sig_suspend_threads(struct thread *, struct proc *, int); +static int sig_suspend_threads(struct thread *, struct proc *, int); static int filt_sigattach(struct knote *kn); static void filt_sigdetach(struct knote *kn); static int filt_signal(struct knote *kn, long hint); static struct thread *sigtd(struct proc *p, int sig, int prop); static void sigqueue_start(void); static uma_zone_t ksiginfo_zone = NULL; struct filterops sig_filtops = { .f_isfd = 0, .f_attach = filt_sigattach, .f_detach = filt_sigdetach, .f_event = filt_signal, }; static int kern_logsigexit = 1; SYSCTL_INT(_kern, KERN_LOGSIGEXIT, logsigexit, CTLFLAG_RW, &kern_logsigexit, 0, "Log processes quitting on abnormal signals to syslog(3)"); static int kern_forcesigexit = 1; SYSCTL_INT(_kern, OID_AUTO, forcesigexit, CTLFLAG_RW, &kern_forcesigexit, 0, "Force trap signal to be handled"); static SYSCTL_NODE(_kern, OID_AUTO, sigqueue, CTLFLAG_RW, 0, "POSIX real time signal"); static int max_pending_per_proc = 128; SYSCTL_INT(_kern_sigqueue, OID_AUTO, max_pending_per_proc, CTLFLAG_RW, &max_pending_per_proc, 0, "Max pending signals per proc"); static int preallocate_siginfo = 1024; SYSCTL_INT(_kern_sigqueue, OID_AUTO, preallocate, CTLFLAG_RDTUN, &preallocate_siginfo, 0, "Preallocated signal memory size"); static int signal_overflow = 0; SYSCTL_INT(_kern_sigqueue, OID_AUTO, overflow, CTLFLAG_RD, &signal_overflow, 0, "Number of signals overflew"); static int signal_alloc_fail = 0; SYSCTL_INT(_kern_sigqueue, OID_AUTO, alloc_fail, CTLFLAG_RD, &signal_alloc_fail, 0, "signals failed to be allocated"); SYSINIT(signal, SI_SUB_P1003_1B, SI_ORDER_FIRST+3, sigqueue_start, NULL); /* * Policy -- Can ucred cr1 send SIGIO to process cr2? * Should use cr_cansignal() once cr_cansignal() allows SIGIO and SIGURG * in the right situations. */ #define CANSIGIO(cr1, cr2) \ ((cr1)->cr_uid == 0 || \ (cr1)->cr_ruid == (cr2)->cr_ruid || \ (cr1)->cr_uid == (cr2)->cr_ruid || \ (cr1)->cr_ruid == (cr2)->cr_uid || \ (cr1)->cr_uid == (cr2)->cr_uid) static int sugid_coredump; SYSCTL_INT(_kern, OID_AUTO, sugid_coredump, CTLFLAG_RWTUN, &sugid_coredump, 0, "Allow setuid and setgid processes to dump core"); static int capmode_coredump; SYSCTL_INT(_kern, OID_AUTO, capmode_coredump, CTLFLAG_RWTUN, &capmode_coredump, 0, "Allow processes in capability mode to dump core"); static int do_coredump = 1; SYSCTL_INT(_kern, OID_AUTO, coredump, CTLFLAG_RW, &do_coredump, 0, "Enable/Disable coredumps"); static int set_core_nodump_flag = 0; SYSCTL_INT(_kern, OID_AUTO, nodump_coredump, CTLFLAG_RW, &set_core_nodump_flag, 0, "Enable setting the NODUMP flag on coredump files"); static int coredump_devctl = 0; SYSCTL_INT(_kern, OID_AUTO, coredump_devctl, CTLFLAG_RW, &coredump_devctl, 0, "Generate a devctl notification when processes coredump"); /* * Signal properties and actions. * The array below categorizes the signals and their default actions * according to the following properties: */ #define SA_KILL 0x01 /* terminates process by default */ #define SA_CORE 0x02 /* ditto and coredumps */ #define SA_STOP 0x04 /* suspend process */ #define SA_TTYSTOP 0x08 /* ditto, from tty */ #define SA_IGNORE 0x10 /* ignore by default */ #define SA_CONT 0x20 /* continue if suspended */ #define SA_CANTMASK 0x40 /* non-maskable, catchable */ static int sigproptbl[NSIG] = { SA_KILL, /* SIGHUP */ SA_KILL, /* SIGINT */ SA_KILL|SA_CORE, /* SIGQUIT */ SA_KILL|SA_CORE, /* SIGILL */ SA_KILL|SA_CORE, /* SIGTRAP */ SA_KILL|SA_CORE, /* SIGABRT */ SA_KILL|SA_CORE, /* SIGEMT */ SA_KILL|SA_CORE, /* SIGFPE */ SA_KILL, /* SIGKILL */ SA_KILL|SA_CORE, /* SIGBUS */ SA_KILL|SA_CORE, /* SIGSEGV */ SA_KILL|SA_CORE, /* SIGSYS */ SA_KILL, /* SIGPIPE */ SA_KILL, /* SIGALRM */ SA_KILL, /* SIGTERM */ SA_IGNORE, /* SIGURG */ SA_STOP, /* SIGSTOP */ SA_STOP|SA_TTYSTOP, /* SIGTSTP */ SA_IGNORE|SA_CONT, /* SIGCONT */ SA_IGNORE, /* SIGCHLD */ SA_STOP|SA_TTYSTOP, /* SIGTTIN */ SA_STOP|SA_TTYSTOP, /* SIGTTOU */ SA_IGNORE, /* SIGIO */ SA_KILL, /* SIGXCPU */ SA_KILL, /* SIGXFSZ */ SA_KILL, /* SIGVTALRM */ SA_KILL, /* SIGPROF */ SA_IGNORE, /* SIGWINCH */ SA_IGNORE, /* SIGINFO */ SA_KILL, /* SIGUSR1 */ SA_KILL, /* SIGUSR2 */ }; static void reschedule_signals(struct proc *p, sigset_t block, int flags); static void sigqueue_start(void) { ksiginfo_zone = uma_zcreate("ksiginfo", sizeof(ksiginfo_t), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); uma_prealloc(ksiginfo_zone, preallocate_siginfo); p31b_setcfg(CTL_P1003_1B_REALTIME_SIGNALS, _POSIX_REALTIME_SIGNALS); p31b_setcfg(CTL_P1003_1B_RTSIG_MAX, SIGRTMAX - SIGRTMIN + 1); p31b_setcfg(CTL_P1003_1B_SIGQUEUE_MAX, max_pending_per_proc); } ksiginfo_t * ksiginfo_alloc(int wait) { int flags; flags = M_ZERO; if (! wait) flags |= M_NOWAIT; if (ksiginfo_zone != NULL) return ((ksiginfo_t *)uma_zalloc(ksiginfo_zone, flags)); return (NULL); } void ksiginfo_free(ksiginfo_t *ksi) { uma_zfree(ksiginfo_zone, ksi); } static __inline int ksiginfo_tryfree(ksiginfo_t *ksi) { if (!(ksi->ksi_flags & KSI_EXT)) { uma_zfree(ksiginfo_zone, ksi); return (1); } return (0); } void sigqueue_init(sigqueue_t *list, struct proc *p) { SIGEMPTYSET(list->sq_signals); SIGEMPTYSET(list->sq_kill); TAILQ_INIT(&list->sq_list); list->sq_proc = p; list->sq_flags = SQ_INIT; } /* * Get a signal's ksiginfo. * Return: * 0 - signal not found * others - signal number */ static int sigqueue_get(sigqueue_t *sq, int signo, ksiginfo_t *si) { struct proc *p = sq->sq_proc; struct ksiginfo *ksi, *next; int count = 0; KASSERT(sq->sq_flags & SQ_INIT, ("sigqueue not inited")); if (!SIGISMEMBER(sq->sq_signals, signo)) return (0); if (SIGISMEMBER(sq->sq_kill, signo)) { count++; SIGDELSET(sq->sq_kill, signo); } TAILQ_FOREACH_SAFE(ksi, &sq->sq_list, ksi_link, next) { if (ksi->ksi_signo == signo) { if (count == 0) { TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link); ksi->ksi_sigq = NULL; ksiginfo_copy(ksi, si); if (ksiginfo_tryfree(ksi) && p != NULL) p->p_pendingcnt--; } if (++count > 1) break; } } if (count <= 1) SIGDELSET(sq->sq_signals, signo); si->ksi_signo = signo; return (signo); } void sigqueue_take(ksiginfo_t *ksi) { struct ksiginfo *kp; struct proc *p; sigqueue_t *sq; if (ksi == NULL || (sq = ksi->ksi_sigq) == NULL) return; p = sq->sq_proc; TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link); ksi->ksi_sigq = NULL; if (!(ksi->ksi_flags & KSI_EXT) && p != NULL) p->p_pendingcnt--; for (kp = TAILQ_FIRST(&sq->sq_list); kp != NULL; kp = TAILQ_NEXT(kp, ksi_link)) { if (kp->ksi_signo == ksi->ksi_signo) break; } if (kp == NULL && !SIGISMEMBER(sq->sq_kill, ksi->ksi_signo)) SIGDELSET(sq->sq_signals, ksi->ksi_signo); } static int sigqueue_add(sigqueue_t *sq, int signo, ksiginfo_t *si) { struct proc *p = sq->sq_proc; struct ksiginfo *ksi; int ret = 0; KASSERT(sq->sq_flags & SQ_INIT, ("sigqueue not inited")); if (signo == SIGKILL || signo == SIGSTOP || si == NULL) { SIGADDSET(sq->sq_kill, signo); goto out_set_bit; } /* directly insert the ksi, don't copy it */ if (si->ksi_flags & KSI_INS) { if (si->ksi_flags & KSI_HEAD) TAILQ_INSERT_HEAD(&sq->sq_list, si, ksi_link); else TAILQ_INSERT_TAIL(&sq->sq_list, si, ksi_link); si->ksi_sigq = sq; goto out_set_bit; } if (__predict_false(ksiginfo_zone == NULL)) { SIGADDSET(sq->sq_kill, signo); goto out_set_bit; } if (p != NULL && p->p_pendingcnt >= max_pending_per_proc) { signal_overflow++; ret = EAGAIN; } else if ((ksi = ksiginfo_alloc(0)) == NULL) { signal_alloc_fail++; ret = EAGAIN; } else { if (p != NULL) p->p_pendingcnt++; ksiginfo_copy(si, ksi); ksi->ksi_signo = signo; if (si->ksi_flags & KSI_HEAD) TAILQ_INSERT_HEAD(&sq->sq_list, ksi, ksi_link); else TAILQ_INSERT_TAIL(&sq->sq_list, ksi, ksi_link); ksi->ksi_sigq = sq; } if ((si->ksi_flags & KSI_TRAP) != 0 || (si->ksi_flags & KSI_SIGQ) == 0) { if (ret != 0) SIGADDSET(sq->sq_kill, signo); ret = 0; goto out_set_bit; } if (ret != 0) return (ret); out_set_bit: SIGADDSET(sq->sq_signals, signo); return (ret); } void sigqueue_flush(sigqueue_t *sq) { struct proc *p = sq->sq_proc; ksiginfo_t *ksi; KASSERT(sq->sq_flags & SQ_INIT, ("sigqueue not inited")); if (p != NULL) PROC_LOCK_ASSERT(p, MA_OWNED); while ((ksi = TAILQ_FIRST(&sq->sq_list)) != NULL) { TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link); ksi->ksi_sigq = NULL; if (ksiginfo_tryfree(ksi) && p != NULL) p->p_pendingcnt--; } SIGEMPTYSET(sq->sq_signals); SIGEMPTYSET(sq->sq_kill); } static void sigqueue_move_set(sigqueue_t *src, sigqueue_t *dst, const sigset_t *set) { sigset_t tmp; struct proc *p1, *p2; ksiginfo_t *ksi, *next; KASSERT(src->sq_flags & SQ_INIT, ("src sigqueue not inited")); KASSERT(dst->sq_flags & SQ_INIT, ("dst sigqueue not inited")); p1 = src->sq_proc; p2 = dst->sq_proc; /* Move siginfo to target list */ TAILQ_FOREACH_SAFE(ksi, &src->sq_list, ksi_link, next) { if (SIGISMEMBER(*set, ksi->ksi_signo)) { TAILQ_REMOVE(&src->sq_list, ksi, ksi_link); if (p1 != NULL) p1->p_pendingcnt--; TAILQ_INSERT_TAIL(&dst->sq_list, ksi, ksi_link); ksi->ksi_sigq = dst; if (p2 != NULL) p2->p_pendingcnt++; } } /* Move pending bits to target list */ tmp = src->sq_kill; SIGSETAND(tmp, *set); SIGSETOR(dst->sq_kill, tmp); SIGSETNAND(src->sq_kill, tmp); tmp = src->sq_signals; SIGSETAND(tmp, *set); SIGSETOR(dst->sq_signals, tmp); SIGSETNAND(src->sq_signals, tmp); } #if 0 static void sigqueue_move(sigqueue_t *src, sigqueue_t *dst, int signo) { sigset_t set; SIGEMPTYSET(set); SIGADDSET(set, signo); sigqueue_move_set(src, dst, &set); } #endif static void sigqueue_delete_set(sigqueue_t *sq, const sigset_t *set) { struct proc *p = sq->sq_proc; ksiginfo_t *ksi, *next; KASSERT(sq->sq_flags & SQ_INIT, ("src sigqueue not inited")); /* Remove siginfo queue */ TAILQ_FOREACH_SAFE(ksi, &sq->sq_list, ksi_link, next) { if (SIGISMEMBER(*set, ksi->ksi_signo)) { TAILQ_REMOVE(&sq->sq_list, ksi, ksi_link); ksi->ksi_sigq = NULL; if (ksiginfo_tryfree(ksi) && p != NULL) p->p_pendingcnt--; } } SIGSETNAND(sq->sq_kill, *set); SIGSETNAND(sq->sq_signals, *set); } void sigqueue_delete(sigqueue_t *sq, int signo) { sigset_t set; SIGEMPTYSET(set); SIGADDSET(set, signo); sigqueue_delete_set(sq, &set); } /* Remove a set of signals for a process */ static void sigqueue_delete_set_proc(struct proc *p, const sigset_t *set) { sigqueue_t worklist; struct thread *td0; PROC_LOCK_ASSERT(p, MA_OWNED); sigqueue_init(&worklist, NULL); sigqueue_move_set(&p->p_sigqueue, &worklist, set); FOREACH_THREAD_IN_PROC(p, td0) sigqueue_move_set(&td0->td_sigqueue, &worklist, set); sigqueue_flush(&worklist); } void sigqueue_delete_proc(struct proc *p, int signo) { sigset_t set; SIGEMPTYSET(set); SIGADDSET(set, signo); sigqueue_delete_set_proc(p, &set); } static void sigqueue_delete_stopmask_proc(struct proc *p) { sigset_t set; SIGEMPTYSET(set); SIGADDSET(set, SIGSTOP); SIGADDSET(set, SIGTSTP); SIGADDSET(set, SIGTTIN); SIGADDSET(set, SIGTTOU); sigqueue_delete_set_proc(p, &set); } /* * Determine signal that should be delivered to thread td, the current * thread, 0 if none. If there is a pending stop signal with default * action, the process stops in issignal(). */ int cursig(struct thread *td) { PROC_LOCK_ASSERT(td->td_proc, MA_OWNED); mtx_assert(&td->td_proc->p_sigacts->ps_mtx, MA_OWNED); THREAD_LOCK_ASSERT(td, MA_NOTOWNED); return (SIGPENDING(td) ? issignal(td) : 0); } /* * Arrange for ast() to handle unmasked pending signals on return to user * mode. This must be called whenever a signal is added to td_sigqueue or * unmasked in td_sigmask. */ void signotify(struct thread *td) { struct proc *p; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); if (SIGPENDING(td)) { thread_lock(td); td->td_flags |= TDF_NEEDSIGCHK | TDF_ASTPENDING; thread_unlock(td); } } int sigonstack(size_t sp) { struct thread *td = curthread; return ((td->td_pflags & TDP_ALTSTACK) ? #if defined(COMPAT_43) ((td->td_sigstk.ss_size == 0) ? (td->td_sigstk.ss_flags & SS_ONSTACK) : ((sp - (size_t)td->td_sigstk.ss_sp) < td->td_sigstk.ss_size)) #else ((sp - (size_t)td->td_sigstk.ss_sp) < td->td_sigstk.ss_size) #endif : 0); } static __inline int sigprop(int sig) { if (sig > 0 && sig < NSIG) return (sigproptbl[_SIG_IDX(sig)]); return (0); } int sig_ffs(sigset_t *set) { int i; for (i = 0; i < _SIG_WORDS; i++) if (set->__bits[i]) return (ffs(set->__bits[i]) + (i * 32)); return (0); } static bool sigact_flag_test(const struct sigaction *act, int flag) { /* * SA_SIGINFO is reset when signal disposition is set to * ignore or default. Other flags are kept according to user * settings. */ return ((act->sa_flags & flag) != 0 && (flag != SA_SIGINFO || ((__sighandler_t *)act->sa_sigaction != SIG_IGN && (__sighandler_t *)act->sa_sigaction != SIG_DFL))); } /* * kern_sigaction * sigaction * freebsd4_sigaction * osigaction */ int kern_sigaction(struct thread *td, int sig, const struct sigaction *act, struct sigaction *oact, int flags) { struct sigacts *ps; struct proc *p = td->td_proc; if (!_SIG_VALID(sig)) return (EINVAL); if (act != NULL && act->sa_handler != SIG_DFL && act->sa_handler != SIG_IGN && (act->sa_flags & ~(SA_ONSTACK | SA_RESTART | SA_RESETHAND | SA_NOCLDSTOP | SA_NODEFER | SA_NOCLDWAIT | SA_SIGINFO)) != 0) return (EINVAL); PROC_LOCK(p); ps = p->p_sigacts; mtx_lock(&ps->ps_mtx); if (oact) { oact->sa_mask = ps->ps_catchmask[_SIG_IDX(sig)]; oact->sa_flags = 0; if (SIGISMEMBER(ps->ps_sigonstack, sig)) oact->sa_flags |= SA_ONSTACK; if (!SIGISMEMBER(ps->ps_sigintr, sig)) oact->sa_flags |= SA_RESTART; if (SIGISMEMBER(ps->ps_sigreset, sig)) oact->sa_flags |= SA_RESETHAND; if (SIGISMEMBER(ps->ps_signodefer, sig)) oact->sa_flags |= SA_NODEFER; if (SIGISMEMBER(ps->ps_siginfo, sig)) { oact->sa_flags |= SA_SIGINFO; oact->sa_sigaction = (__siginfohandler_t *)ps->ps_sigact[_SIG_IDX(sig)]; } else oact->sa_handler = ps->ps_sigact[_SIG_IDX(sig)]; if (sig == SIGCHLD && ps->ps_flag & PS_NOCLDSTOP) oact->sa_flags |= SA_NOCLDSTOP; if (sig == SIGCHLD && ps->ps_flag & PS_NOCLDWAIT) oact->sa_flags |= SA_NOCLDWAIT; } if (act) { if ((sig == SIGKILL || sig == SIGSTOP) && act->sa_handler != SIG_DFL) { mtx_unlock(&ps->ps_mtx); PROC_UNLOCK(p); return (EINVAL); } /* * Change setting atomically. */ ps->ps_catchmask[_SIG_IDX(sig)] = act->sa_mask; SIG_CANTMASK(ps->ps_catchmask[_SIG_IDX(sig)]); if (sigact_flag_test(act, SA_SIGINFO)) { ps->ps_sigact[_SIG_IDX(sig)] = (__sighandler_t *)act->sa_sigaction; SIGADDSET(ps->ps_siginfo, sig); } else { ps->ps_sigact[_SIG_IDX(sig)] = act->sa_handler; SIGDELSET(ps->ps_siginfo, sig); } if (!sigact_flag_test(act, SA_RESTART)) SIGADDSET(ps->ps_sigintr, sig); else SIGDELSET(ps->ps_sigintr, sig); if (sigact_flag_test(act, SA_ONSTACK)) SIGADDSET(ps->ps_sigonstack, sig); else SIGDELSET(ps->ps_sigonstack, sig); if (sigact_flag_test(act, SA_RESETHAND)) SIGADDSET(ps->ps_sigreset, sig); else SIGDELSET(ps->ps_sigreset, sig); if (sigact_flag_test(act, SA_NODEFER)) SIGADDSET(ps->ps_signodefer, sig); else SIGDELSET(ps->ps_signodefer, sig); if (sig == SIGCHLD) { if (act->sa_flags & SA_NOCLDSTOP) ps->ps_flag |= PS_NOCLDSTOP; else ps->ps_flag &= ~PS_NOCLDSTOP; if (act->sa_flags & SA_NOCLDWAIT) { /* * Paranoia: since SA_NOCLDWAIT is implemented * by reparenting the dying child to PID 1 (and * trust it to reap the zombie), PID 1 itself * is forbidden to set SA_NOCLDWAIT. */ if (p->p_pid == 1) ps->ps_flag &= ~PS_NOCLDWAIT; else ps->ps_flag |= PS_NOCLDWAIT; } else ps->ps_flag &= ~PS_NOCLDWAIT; if (ps->ps_sigact[_SIG_IDX(SIGCHLD)] == SIG_IGN) ps->ps_flag |= PS_CLDSIGIGN; else ps->ps_flag &= ~PS_CLDSIGIGN; } /* * Set bit in ps_sigignore for signals that are set to SIG_IGN, * and for signals set to SIG_DFL where the default is to * ignore. However, don't put SIGCONT in ps_sigignore, as we * have to restart the process. */ if (ps->ps_sigact[_SIG_IDX(sig)] == SIG_IGN || (sigprop(sig) & SA_IGNORE && ps->ps_sigact[_SIG_IDX(sig)] == SIG_DFL)) { /* never to be seen again */ sigqueue_delete_proc(p, sig); if (sig != SIGCONT) /* easier in psignal */ SIGADDSET(ps->ps_sigignore, sig); SIGDELSET(ps->ps_sigcatch, sig); } else { SIGDELSET(ps->ps_sigignore, sig); if (ps->ps_sigact[_SIG_IDX(sig)] == SIG_DFL) SIGDELSET(ps->ps_sigcatch, sig); else SIGADDSET(ps->ps_sigcatch, sig); } #ifdef COMPAT_FREEBSD4 if (ps->ps_sigact[_SIG_IDX(sig)] == SIG_IGN || ps->ps_sigact[_SIG_IDX(sig)] == SIG_DFL || (flags & KSA_FREEBSD4) == 0) SIGDELSET(ps->ps_freebsd4, sig); else SIGADDSET(ps->ps_freebsd4, sig); #endif #ifdef COMPAT_43 if (ps->ps_sigact[_SIG_IDX(sig)] == SIG_IGN || ps->ps_sigact[_SIG_IDX(sig)] == SIG_DFL || (flags & KSA_OSIGSET) == 0) SIGDELSET(ps->ps_osigset, sig); else SIGADDSET(ps->ps_osigset, sig); #endif } mtx_unlock(&ps->ps_mtx); PROC_UNLOCK(p); return (0); } #ifndef _SYS_SYSPROTO_H_ struct sigaction_args { int sig; struct sigaction *act; struct sigaction *oact; }; #endif int sys_sigaction(td, uap) struct thread *td; register struct sigaction_args *uap; { struct sigaction act, oact; register struct sigaction *actp, *oactp; int error; actp = (uap->act != NULL) ? &act : NULL; oactp = (uap->oact != NULL) ? &oact : NULL; if (actp) { error = copyin(uap->act, actp, sizeof(act)); if (error) return (error); } error = kern_sigaction(td, uap->sig, actp, oactp, 0); if (oactp && !error) error = copyout(oactp, uap->oact, sizeof(oact)); return (error); } #ifdef COMPAT_FREEBSD4 #ifndef _SYS_SYSPROTO_H_ struct freebsd4_sigaction_args { int sig; struct sigaction *act; struct sigaction *oact; }; #endif int freebsd4_sigaction(td, uap) struct thread *td; register struct freebsd4_sigaction_args *uap; { struct sigaction act, oact; register struct sigaction *actp, *oactp; int error; actp = (uap->act != NULL) ? &act : NULL; oactp = (uap->oact != NULL) ? &oact : NULL; if (actp) { error = copyin(uap->act, actp, sizeof(act)); if (error) return (error); } error = kern_sigaction(td, uap->sig, actp, oactp, KSA_FREEBSD4); if (oactp && !error) error = copyout(oactp, uap->oact, sizeof(oact)); return (error); } #endif /* COMAPT_FREEBSD4 */ #ifdef COMPAT_43 /* XXX - COMPAT_FBSD3 */ #ifndef _SYS_SYSPROTO_H_ struct osigaction_args { int signum; struct osigaction *nsa; struct osigaction *osa; }; #endif int osigaction(td, uap) struct thread *td; register struct osigaction_args *uap; { struct osigaction sa; struct sigaction nsa, osa; register struct sigaction *nsap, *osap; int error; if (uap->signum <= 0 || uap->signum >= ONSIG) return (EINVAL); nsap = (uap->nsa != NULL) ? &nsa : NULL; osap = (uap->osa != NULL) ? &osa : NULL; if (nsap) { error = copyin(uap->nsa, &sa, sizeof(sa)); if (error) return (error); nsap->sa_handler = sa.sa_handler; nsap->sa_flags = sa.sa_flags; OSIG2SIG(sa.sa_mask, nsap->sa_mask); } error = kern_sigaction(td, uap->signum, nsap, osap, KSA_OSIGSET); if (osap && !error) { sa.sa_handler = osap->sa_handler; sa.sa_flags = osap->sa_flags; SIG2OSIG(osap->sa_mask, sa.sa_mask); error = copyout(&sa, uap->osa, sizeof(sa)); } return (error); } #if !defined(__i386__) /* Avoid replicating the same stub everywhere */ int osigreturn(td, uap) struct thread *td; struct osigreturn_args *uap; { return (nosys(td, (struct nosys_args *)uap)); } #endif #endif /* COMPAT_43 */ /* * Initialize signal state for process 0; * set to ignore signals that are ignored by default. */ void siginit(p) struct proc *p; { register int i; struct sigacts *ps; PROC_LOCK(p); ps = p->p_sigacts; mtx_lock(&ps->ps_mtx); for (i = 1; i <= NSIG; i++) { if (sigprop(i) & SA_IGNORE && i != SIGCONT) { SIGADDSET(ps->ps_sigignore, i); } } mtx_unlock(&ps->ps_mtx); PROC_UNLOCK(p); } /* * Reset specified signal to the default disposition. */ static void sigdflt(struct sigacts *ps, int sig) { mtx_assert(&ps->ps_mtx, MA_OWNED); SIGDELSET(ps->ps_sigcatch, sig); if ((sigprop(sig) & SA_IGNORE) != 0 && sig != SIGCONT) SIGADDSET(ps->ps_sigignore, sig); ps->ps_sigact[_SIG_IDX(sig)] = SIG_DFL; SIGDELSET(ps->ps_siginfo, sig); } /* * Reset signals for an exec of the specified process. */ void execsigs(struct proc *p) { sigset_t osigignore; struct sigacts *ps; int sig; struct thread *td; /* * Reset caught signals. Held signals remain held * through td_sigmask (unless they were caught, * and are now ignored by default). */ PROC_LOCK_ASSERT(p, MA_OWNED); td = FIRST_THREAD_IN_PROC(p); ps = p->p_sigacts; mtx_lock(&ps->ps_mtx); while (SIGNOTEMPTY(ps->ps_sigcatch)) { sig = sig_ffs(&ps->ps_sigcatch); sigdflt(ps, sig); if ((sigprop(sig) & SA_IGNORE) != 0) sigqueue_delete_proc(p, sig); } /* * As CloudABI processes cannot modify signal handlers, fully * reset all signals to their default behavior. Do ignore * SIGPIPE, as it would otherwise be impossible to recover from * writes to broken pipes and sockets. */ if (SV_PROC_ABI(p) == SV_ABI_CLOUDABI) { osigignore = ps->ps_sigignore; while (SIGNOTEMPTY(osigignore)) { sig = sig_ffs(&osigignore); SIGDELSET(osigignore, sig); if (sig != SIGPIPE) sigdflt(ps, sig); } SIGADDSET(ps->ps_sigignore, SIGPIPE); } /* * Reset stack state to the user stack. * Clear set of signals caught on the signal stack. */ td->td_sigstk.ss_flags = SS_DISABLE; td->td_sigstk.ss_size = 0; td->td_sigstk.ss_sp = 0; td->td_pflags &= ~TDP_ALTSTACK; /* * Reset no zombies if child dies flag as Solaris does. */ ps->ps_flag &= ~(PS_NOCLDWAIT | PS_CLDSIGIGN); if (ps->ps_sigact[_SIG_IDX(SIGCHLD)] == SIG_IGN) ps->ps_sigact[_SIG_IDX(SIGCHLD)] = SIG_DFL; mtx_unlock(&ps->ps_mtx); } /* * kern_sigprocmask() * * Manipulate signal mask. */ int kern_sigprocmask(struct thread *td, int how, sigset_t *set, sigset_t *oset, int flags) { sigset_t new_block, oset1; struct proc *p; int error; p = td->td_proc; if ((flags & SIGPROCMASK_PROC_LOCKED) != 0) PROC_LOCK_ASSERT(p, MA_OWNED); else PROC_LOCK(p); mtx_assert(&p->p_sigacts->ps_mtx, (flags & SIGPROCMASK_PS_LOCKED) != 0 ? MA_OWNED : MA_NOTOWNED); if (oset != NULL) *oset = td->td_sigmask; error = 0; if (set != NULL) { switch (how) { case SIG_BLOCK: SIG_CANTMASK(*set); oset1 = td->td_sigmask; SIGSETOR(td->td_sigmask, *set); new_block = td->td_sigmask; SIGSETNAND(new_block, oset1); break; case SIG_UNBLOCK: SIGSETNAND(td->td_sigmask, *set); signotify(td); goto out; case SIG_SETMASK: SIG_CANTMASK(*set); oset1 = td->td_sigmask; if (flags & SIGPROCMASK_OLD) SIGSETLO(td->td_sigmask, *set); else td->td_sigmask = *set; new_block = td->td_sigmask; SIGSETNAND(new_block, oset1); signotify(td); break; default: error = EINVAL; goto out; } /* * The new_block set contains signals that were not previously * blocked, but are blocked now. * * In case we block any signal that was not previously blocked * for td, and process has the signal pending, try to schedule * signal delivery to some thread that does not block the * signal, possibly waking it up. */ if (p->p_numthreads != 1) reschedule_signals(p, new_block, flags); } out: if (!(flags & SIGPROCMASK_PROC_LOCKED)) PROC_UNLOCK(p); return (error); } #ifndef _SYS_SYSPROTO_H_ struct sigprocmask_args { int how; const sigset_t *set; sigset_t *oset; }; #endif int sys_sigprocmask(td, uap) register struct thread *td; struct sigprocmask_args *uap; { sigset_t set, oset; sigset_t *setp, *osetp; int error; setp = (uap->set != NULL) ? &set : NULL; osetp = (uap->oset != NULL) ? &oset : NULL; if (setp) { error = copyin(uap->set, setp, sizeof(set)); if (error) return (error); } error = kern_sigprocmask(td, uap->how, setp, osetp, 0); if (osetp && !error) { error = copyout(osetp, uap->oset, sizeof(oset)); } return (error); } #ifdef COMPAT_43 /* XXX - COMPAT_FBSD3 */ #ifndef _SYS_SYSPROTO_H_ struct osigprocmask_args { int how; osigset_t mask; }; #endif int osigprocmask(td, uap) register struct thread *td; struct osigprocmask_args *uap; { sigset_t set, oset; int error; OSIG2SIG(uap->mask, set); error = kern_sigprocmask(td, uap->how, &set, &oset, 1); SIG2OSIG(oset, td->td_retval[0]); return (error); } #endif /* COMPAT_43 */ int sys_sigwait(struct thread *td, struct sigwait_args *uap) { ksiginfo_t ksi; sigset_t set; int error; error = copyin(uap->set, &set, sizeof(set)); if (error) { td->td_retval[0] = error; return (0); } error = kern_sigtimedwait(td, set, &ksi, NULL); if (error) { if (error == EINTR && td->td_proc->p_osrel < P_OSREL_SIGWAIT) error = ERESTART; if (error == ERESTART) return (error); td->td_retval[0] = error; return (0); } error = copyout(&ksi.ksi_signo, uap->sig, sizeof(ksi.ksi_signo)); td->td_retval[0] = error; return (0); } int sys_sigtimedwait(struct thread *td, struct sigtimedwait_args *uap) { struct timespec ts; struct timespec *timeout; sigset_t set; ksiginfo_t ksi; int error; if (uap->timeout) { error = copyin(uap->timeout, &ts, sizeof(ts)); if (error) return (error); timeout = &ts; } else timeout = NULL; error = copyin(uap->set, &set, sizeof(set)); if (error) return (error); error = kern_sigtimedwait(td, set, &ksi, timeout); if (error) return (error); if (uap->info) error = copyout(&ksi.ksi_info, uap->info, sizeof(siginfo_t)); if (error == 0) td->td_retval[0] = ksi.ksi_signo; return (error); } int sys_sigwaitinfo(struct thread *td, struct sigwaitinfo_args *uap) { ksiginfo_t ksi; sigset_t set; int error; error = copyin(uap->set, &set, sizeof(set)); if (error) return (error); error = kern_sigtimedwait(td, set, &ksi, NULL); if (error) return (error); if (uap->info) error = copyout(&ksi.ksi_info, uap->info, sizeof(siginfo_t)); if (error == 0) td->td_retval[0] = ksi.ksi_signo; return (error); } int kern_sigtimedwait(struct thread *td, sigset_t waitset, ksiginfo_t *ksi, struct timespec *timeout) { struct sigacts *ps; sigset_t saved_mask, new_block; struct proc *p; int error, sig, timo, timevalid = 0; struct timespec rts, ets, ts; struct timeval tv; p = td->td_proc; error = 0; ets.tv_sec = 0; ets.tv_nsec = 0; if (timeout != NULL) { if (timeout->tv_nsec >= 0 && timeout->tv_nsec < 1000000000) { timevalid = 1; getnanouptime(&rts); ets = rts; timespecadd(&ets, timeout); } } ksiginfo_init(ksi); /* Some signals can not be waited for. */ SIG_CANTMASK(waitset); ps = p->p_sigacts; PROC_LOCK(p); saved_mask = td->td_sigmask; SIGSETNAND(td->td_sigmask, waitset); for (;;) { mtx_lock(&ps->ps_mtx); sig = cursig(td); mtx_unlock(&ps->ps_mtx); if (sig != 0 && SIGISMEMBER(waitset, sig)) { if (sigqueue_get(&td->td_sigqueue, sig, ksi) != 0 || sigqueue_get(&p->p_sigqueue, sig, ksi) != 0) { error = 0; break; } } if (error != 0) break; /* * POSIX says this must be checked after looking for pending * signals. */ if (timeout != NULL) { if (!timevalid) { error = EINVAL; break; } getnanouptime(&rts); if (timespeccmp(&rts, &ets, >=)) { error = EAGAIN; break; } ts = ets; timespecsub(&ts, &rts); TIMESPEC_TO_TIMEVAL(&tv, &ts); timo = tvtohz(&tv); } else { timo = 0; } error = msleep(ps, &p->p_mtx, PPAUSE|PCATCH, "sigwait", timo); if (timeout != NULL) { if (error == ERESTART) { /* Timeout can not be restarted. */ error = EINTR; } else if (error == EAGAIN) { /* We will calculate timeout by ourself. */ error = 0; } } } new_block = saved_mask; SIGSETNAND(new_block, td->td_sigmask); td->td_sigmask = saved_mask; /* * Fewer signals can be delivered to us, reschedule signal * notification. */ if (p->p_numthreads != 1) reschedule_signals(p, new_block, 0); if (error == 0) { SDT_PROBE2(proc, , , signal__clear, sig, ksi); if (ksi->ksi_code == SI_TIMER) itimer_accept(p, ksi->ksi_timerid, ksi); #ifdef KTRACE if (KTRPOINT(td, KTR_PSIG)) { sig_t action; mtx_lock(&ps->ps_mtx); action = ps->ps_sigact[_SIG_IDX(sig)]; mtx_unlock(&ps->ps_mtx); ktrpsig(sig, action, &td->td_sigmask, ksi->ksi_code); } #endif if (sig == SIGKILL) sigexit(td, sig); } PROC_UNLOCK(p); return (error); } #ifndef _SYS_SYSPROTO_H_ struct sigpending_args { sigset_t *set; }; #endif int sys_sigpending(td, uap) struct thread *td; struct sigpending_args *uap; { struct proc *p = td->td_proc; sigset_t pending; PROC_LOCK(p); pending = p->p_sigqueue.sq_signals; SIGSETOR(pending, td->td_sigqueue.sq_signals); PROC_UNLOCK(p); return (copyout(&pending, uap->set, sizeof(sigset_t))); } #ifdef COMPAT_43 /* XXX - COMPAT_FBSD3 */ #ifndef _SYS_SYSPROTO_H_ struct osigpending_args { int dummy; }; #endif int osigpending(td, uap) struct thread *td; struct osigpending_args *uap; { struct proc *p = td->td_proc; sigset_t pending; PROC_LOCK(p); pending = p->p_sigqueue.sq_signals; SIGSETOR(pending, td->td_sigqueue.sq_signals); PROC_UNLOCK(p); SIG2OSIG(pending, td->td_retval[0]); return (0); } #endif /* COMPAT_43 */ #if defined(COMPAT_43) /* * Generalized interface signal handler, 4.3-compatible. */ #ifndef _SYS_SYSPROTO_H_ struct osigvec_args { int signum; struct sigvec *nsv; struct sigvec *osv; }; #endif /* ARGSUSED */ int osigvec(td, uap) struct thread *td; register struct osigvec_args *uap; { struct sigvec vec; struct sigaction nsa, osa; register struct sigaction *nsap, *osap; int error; if (uap->signum <= 0 || uap->signum >= ONSIG) return (EINVAL); nsap = (uap->nsv != NULL) ? &nsa : NULL; osap = (uap->osv != NULL) ? &osa : NULL; if (nsap) { error = copyin(uap->nsv, &vec, sizeof(vec)); if (error) return (error); nsap->sa_handler = vec.sv_handler; OSIG2SIG(vec.sv_mask, nsap->sa_mask); nsap->sa_flags = vec.sv_flags; nsap->sa_flags ^= SA_RESTART; /* opposite of SV_INTERRUPT */ } error = kern_sigaction(td, uap->signum, nsap, osap, KSA_OSIGSET); if (osap && !error) { vec.sv_handler = osap->sa_handler; SIG2OSIG(osap->sa_mask, vec.sv_mask); vec.sv_flags = osap->sa_flags; vec.sv_flags &= ~SA_NOCLDWAIT; vec.sv_flags ^= SA_RESTART; error = copyout(&vec, uap->osv, sizeof(vec)); } return (error); } #ifndef _SYS_SYSPROTO_H_ struct osigblock_args { int mask; }; #endif int osigblock(td, uap) register struct thread *td; struct osigblock_args *uap; { sigset_t set, oset; OSIG2SIG(uap->mask, set); kern_sigprocmask(td, SIG_BLOCK, &set, &oset, 0); SIG2OSIG(oset, td->td_retval[0]); return (0); } #ifndef _SYS_SYSPROTO_H_ struct osigsetmask_args { int mask; }; #endif int osigsetmask(td, uap) struct thread *td; struct osigsetmask_args *uap; { sigset_t set, oset; OSIG2SIG(uap->mask, set); kern_sigprocmask(td, SIG_SETMASK, &set, &oset, 0); SIG2OSIG(oset, td->td_retval[0]); return (0); } #endif /* COMPAT_43 */ /* * Suspend calling thread until signal, providing mask to be set in the * meantime. */ #ifndef _SYS_SYSPROTO_H_ struct sigsuspend_args { const sigset_t *sigmask; }; #endif /* ARGSUSED */ int sys_sigsuspend(td, uap) struct thread *td; struct sigsuspend_args *uap; { sigset_t mask; int error; error = copyin(uap->sigmask, &mask, sizeof(mask)); if (error) return (error); return (kern_sigsuspend(td, mask)); } int kern_sigsuspend(struct thread *td, sigset_t mask) { struct proc *p = td->td_proc; int has_sig, sig; /* * When returning from sigsuspend, we want * the old mask to be restored after the * signal handler has finished. Thus, we * save it here and mark the sigacts structure * to indicate this. */ PROC_LOCK(p); kern_sigprocmask(td, SIG_SETMASK, &mask, &td->td_oldsigmask, SIGPROCMASK_PROC_LOCKED); td->td_pflags |= TDP_OLDMASK; /* * Process signals now. Otherwise, we can get spurious wakeup * due to signal entered process queue, but delivered to other * thread. But sigsuspend should return only on signal * delivery. */ (p->p_sysent->sv_set_syscall_retval)(td, EINTR); for (has_sig = 0; !has_sig;) { while (msleep(&p->p_sigacts, &p->p_mtx, PPAUSE|PCATCH, "pause", 0) == 0) /* void */; thread_suspend_check(0); mtx_lock(&p->p_sigacts->ps_mtx); while ((sig = cursig(td)) != 0) has_sig += postsig(sig); mtx_unlock(&p->p_sigacts->ps_mtx); } PROC_UNLOCK(p); td->td_errno = EINTR; td->td_pflags |= TDP_NERRNO; return (EJUSTRETURN); } #ifdef COMPAT_43 /* XXX - COMPAT_FBSD3 */ /* * Compatibility sigsuspend call for old binaries. Note nonstandard calling * convention: libc stub passes mask, not pointer, to save a copyin. */ #ifndef _SYS_SYSPROTO_H_ struct osigsuspend_args { osigset_t mask; }; #endif /* ARGSUSED */ int osigsuspend(td, uap) struct thread *td; struct osigsuspend_args *uap; { sigset_t mask; OSIG2SIG(uap->mask, mask); return (kern_sigsuspend(td, mask)); } #endif /* COMPAT_43 */ #if defined(COMPAT_43) #ifndef _SYS_SYSPROTO_H_ struct osigstack_args { struct sigstack *nss; struct sigstack *oss; }; #endif /* ARGSUSED */ int osigstack(td, uap) struct thread *td; register struct osigstack_args *uap; { struct sigstack nss, oss; int error = 0; if (uap->nss != NULL) { error = copyin(uap->nss, &nss, sizeof(nss)); if (error) return (error); } oss.ss_sp = td->td_sigstk.ss_sp; oss.ss_onstack = sigonstack(cpu_getstack(td)); if (uap->nss != NULL) { td->td_sigstk.ss_sp = nss.ss_sp; td->td_sigstk.ss_size = 0; td->td_sigstk.ss_flags |= nss.ss_onstack & SS_ONSTACK; td->td_pflags |= TDP_ALTSTACK; } if (uap->oss != NULL) error = copyout(&oss, uap->oss, sizeof(oss)); return (error); } #endif /* COMPAT_43 */ #ifndef _SYS_SYSPROTO_H_ struct sigaltstack_args { stack_t *ss; stack_t *oss; }; #endif /* ARGSUSED */ int sys_sigaltstack(td, uap) struct thread *td; register struct sigaltstack_args *uap; { stack_t ss, oss; int error; if (uap->ss != NULL) { error = copyin(uap->ss, &ss, sizeof(ss)); if (error) return (error); } error = kern_sigaltstack(td, (uap->ss != NULL) ? &ss : NULL, (uap->oss != NULL) ? &oss : NULL); if (error) return (error); if (uap->oss != NULL) error = copyout(&oss, uap->oss, sizeof(stack_t)); return (error); } int kern_sigaltstack(struct thread *td, stack_t *ss, stack_t *oss) { struct proc *p = td->td_proc; int oonstack; oonstack = sigonstack(cpu_getstack(td)); if (oss != NULL) { *oss = td->td_sigstk; oss->ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; } if (ss != NULL) { if (oonstack) return (EPERM); if ((ss->ss_flags & ~SS_DISABLE) != 0) return (EINVAL); if (!(ss->ss_flags & SS_DISABLE)) { if (ss->ss_size < p->p_sysent->sv_minsigstksz) return (ENOMEM); td->td_sigstk = *ss; td->td_pflags |= TDP_ALTSTACK; } else { td->td_pflags &= ~TDP_ALTSTACK; } } return (0); } /* * Common code for kill process group/broadcast kill. * cp is calling process. */ static int killpg1(struct thread *td, int sig, int pgid, int all, ksiginfo_t *ksi) { struct proc *p; struct pgrp *pgrp; int err; int ret; ret = ESRCH; if (all) { /* * broadcast */ sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_pid <= 1 || p->p_flag & P_SYSTEM || p == td->td_proc || p->p_state == PRS_NEW) { PROC_UNLOCK(p); continue; } err = p_cansignal(td, p, sig); if (err == 0) { if (sig) pksignal(p, sig, ksi); ret = err; } else if (ret == ESRCH) ret = err; PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); } else { sx_slock(&proctree_lock); if (pgid == 0) { /* * zero pgid means send to my process group. */ pgrp = td->td_proc->p_pgrp; PGRP_LOCK(pgrp); } else { pgrp = pgfind(pgid); if (pgrp == NULL) { sx_sunlock(&proctree_lock); return (ESRCH); } } sx_sunlock(&proctree_lock); LIST_FOREACH(p, &pgrp->pg_members, p_pglist) { PROC_LOCK(p); if (p->p_pid <= 1 || p->p_flag & P_SYSTEM || p->p_state == PRS_NEW) { PROC_UNLOCK(p); continue; } err = p_cansignal(td, p, sig); if (err == 0) { if (sig) pksignal(p, sig, ksi); ret = err; } else if (ret == ESRCH) ret = err; PROC_UNLOCK(p); } PGRP_UNLOCK(pgrp); } return (ret); } #ifndef _SYS_SYSPROTO_H_ struct kill_args { int pid; int signum; }; #endif /* ARGSUSED */ int sys_kill(struct thread *td, struct kill_args *uap) { ksiginfo_t ksi; struct proc *p; int error; /* * A process in capability mode can send signals only to himself. * The main rationale behind this is that abort(3) is implemented as * kill(getpid(), SIGABRT). */ if (IN_CAPABILITY_MODE(td) && uap->pid != td->td_proc->p_pid) return (ECAPMODE); AUDIT_ARG_SIGNUM(uap->signum); AUDIT_ARG_PID(uap->pid); if ((u_int)uap->signum > _SIG_MAXSIG) return (EINVAL); ksiginfo_init(&ksi); ksi.ksi_signo = uap->signum; ksi.ksi_code = SI_USER; ksi.ksi_pid = td->td_proc->p_pid; ksi.ksi_uid = td->td_ucred->cr_ruid; if (uap->pid > 0) { /* kill single process */ if ((p = pfind(uap->pid)) == NULL) { if ((p = zpfind(uap->pid)) == NULL) return (ESRCH); } AUDIT_ARG_PROCESS(p); error = p_cansignal(td, p, uap->signum); if (error == 0 && uap->signum) pksignal(p, uap->signum, &ksi); PROC_UNLOCK(p); return (error); } switch (uap->pid) { case -1: /* broadcast signal */ return (killpg1(td, uap->signum, 0, 1, &ksi)); case 0: /* signal own process group */ return (killpg1(td, uap->signum, 0, 0, &ksi)); default: /* negative explicit process group */ return (killpg1(td, uap->signum, -uap->pid, 0, &ksi)); } /* NOTREACHED */ } int sys_pdkill(td, uap) struct thread *td; struct pdkill_args *uap; { struct proc *p; cap_rights_t rights; int error; AUDIT_ARG_SIGNUM(uap->signum); AUDIT_ARG_FD(uap->fd); if ((u_int)uap->signum > _SIG_MAXSIG) return (EINVAL); error = procdesc_find(td, uap->fd, cap_rights_init(&rights, CAP_PDKILL), &p); if (error) return (error); AUDIT_ARG_PROCESS(p); error = p_cansignal(td, p, uap->signum); if (error == 0 && uap->signum) kern_psignal(p, uap->signum); PROC_UNLOCK(p); return (error); } #if defined(COMPAT_43) #ifndef _SYS_SYSPROTO_H_ struct okillpg_args { int pgid; int signum; }; #endif /* ARGSUSED */ int okillpg(struct thread *td, struct okillpg_args *uap) { ksiginfo_t ksi; AUDIT_ARG_SIGNUM(uap->signum); AUDIT_ARG_PID(uap->pgid); if ((u_int)uap->signum > _SIG_MAXSIG) return (EINVAL); ksiginfo_init(&ksi); ksi.ksi_signo = uap->signum; ksi.ksi_code = SI_USER; ksi.ksi_pid = td->td_proc->p_pid; ksi.ksi_uid = td->td_ucred->cr_ruid; return (killpg1(td, uap->signum, uap->pgid, 0, &ksi)); } #endif /* COMPAT_43 */ #ifndef _SYS_SYSPROTO_H_ struct sigqueue_args { pid_t pid; int signum; /* union sigval */ void *value; }; #endif int sys_sigqueue(struct thread *td, struct sigqueue_args *uap) { ksiginfo_t ksi; struct proc *p; int error; if ((u_int)uap->signum > _SIG_MAXSIG) return (EINVAL); /* * Specification says sigqueue can only send signal to * single process. */ if (uap->pid <= 0) return (EINVAL); if ((p = pfind(uap->pid)) == NULL) { if ((p = zpfind(uap->pid)) == NULL) return (ESRCH); } error = p_cansignal(td, p, uap->signum); if (error == 0 && uap->signum != 0) { ksiginfo_init(&ksi); ksi.ksi_flags = KSI_SIGQ; ksi.ksi_signo = uap->signum; ksi.ksi_code = SI_QUEUE; ksi.ksi_pid = td->td_proc->p_pid; ksi.ksi_uid = td->td_ucred->cr_ruid; ksi.ksi_value.sival_ptr = uap->value; error = pksignal(p, ksi.ksi_signo, &ksi); } PROC_UNLOCK(p); return (error); } /* * Send a signal to a process group. */ void gsignal(int pgid, int sig, ksiginfo_t *ksi) { struct pgrp *pgrp; if (pgid != 0) { sx_slock(&proctree_lock); pgrp = pgfind(pgid); sx_sunlock(&proctree_lock); if (pgrp != NULL) { pgsignal(pgrp, sig, 0, ksi); PGRP_UNLOCK(pgrp); } } } /* * Send a signal to a process group. If checktty is 1, * limit to members which have a controlling terminal. */ void pgsignal(struct pgrp *pgrp, int sig, int checkctty, ksiginfo_t *ksi) { struct proc *p; if (pgrp) { PGRP_LOCK_ASSERT(pgrp, MA_OWNED); LIST_FOREACH(p, &pgrp->pg_members, p_pglist) { PROC_LOCK(p); if (p->p_state == PRS_NORMAL && (checkctty == 0 || p->p_flag & P_CONTROLT)) pksignal(p, sig, ksi); PROC_UNLOCK(p); } } } /* * Recalculate the signal mask and reset the signal disposition after * usermode frame for delivery is formed. Should be called after * mach-specific routine, because sysent->sv_sendsig() needs correct * ps_siginfo and signal mask. */ static void postsig_done(int sig, struct thread *td, struct sigacts *ps) { sigset_t mask; mtx_assert(&ps->ps_mtx, MA_OWNED); td->td_ru.ru_nsignals++; mask = ps->ps_catchmask[_SIG_IDX(sig)]; if (!SIGISMEMBER(ps->ps_signodefer, sig)) SIGADDSET(mask, sig); kern_sigprocmask(td, SIG_BLOCK, &mask, NULL, SIGPROCMASK_PROC_LOCKED | SIGPROCMASK_PS_LOCKED); if (SIGISMEMBER(ps->ps_sigreset, sig)) sigdflt(ps, sig); } /* * Send a signal caused by a trap to the current thread. If it will be * caught immediately, deliver it with correct code. Otherwise, post it * normally. */ void trapsignal(struct thread *td, ksiginfo_t *ksi) { struct sigacts *ps; struct proc *p; int sig; int code; p = td->td_proc; sig = ksi->ksi_signo; code = ksi->ksi_code; KASSERT(_SIG_VALID(sig), ("invalid signal")); PROC_LOCK(p); ps = p->p_sigacts; mtx_lock(&ps->ps_mtx); if ((p->p_flag & P_TRACED) == 0 && SIGISMEMBER(ps->ps_sigcatch, sig) && !SIGISMEMBER(td->td_sigmask, sig)) { #ifdef KTRACE if (KTRPOINT(curthread, KTR_PSIG)) ktrpsig(sig, ps->ps_sigact[_SIG_IDX(sig)], &td->td_sigmask, code); #endif (*p->p_sysent->sv_sendsig)(ps->ps_sigact[_SIG_IDX(sig)], ksi, &td->td_sigmask); postsig_done(sig, td, ps); mtx_unlock(&ps->ps_mtx); } else { /* * Avoid a possible infinite loop if the thread * masking the signal or process is ignoring the * signal. */ if (kern_forcesigexit && (SIGISMEMBER(td->td_sigmask, sig) || ps->ps_sigact[_SIG_IDX(sig)] == SIG_IGN)) { SIGDELSET(td->td_sigmask, sig); SIGDELSET(ps->ps_sigcatch, sig); SIGDELSET(ps->ps_sigignore, sig); ps->ps_sigact[_SIG_IDX(sig)] = SIG_DFL; } mtx_unlock(&ps->ps_mtx); p->p_code = code; /* XXX for core dump/debugger */ p->p_sig = sig; /* XXX to verify code */ tdsendsignal(p, td, sig, ksi); } PROC_UNLOCK(p); } static struct thread * sigtd(struct proc *p, int sig, int prop) { struct thread *td, *signal_td; PROC_LOCK_ASSERT(p, MA_OWNED); /* * Check if current thread can handle the signal without * switching context to another thread. */ if (curproc == p && !SIGISMEMBER(curthread->td_sigmask, sig)) return (curthread); signal_td = NULL; FOREACH_THREAD_IN_PROC(p, td) { if (!SIGISMEMBER(td->td_sigmask, sig)) { signal_td = td; break; } } if (signal_td == NULL) signal_td = FIRST_THREAD_IN_PROC(p); return (signal_td); } /* * Send the signal to the process. If the signal has an action, the action * is usually performed by the target process rather than the caller; we add * the signal to the set of pending signals for the process. * * Exceptions: * o When a stop signal is sent to a sleeping process that takes the * default action, the process is stopped without awakening it. * o SIGCONT restarts stopped processes (or puts them back to sleep) * regardless of the signal action (eg, blocked or ignored). * * Other ignored signals are discarded immediately. * * NB: This function may be entered from the debugger via the "kill" DDB * command. There is little that can be done to mitigate the possibly messy * side effects of this unwise possibility. */ void kern_psignal(struct proc *p, int sig) { ksiginfo_t ksi; ksiginfo_init(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = SI_KERNEL; (void) tdsendsignal(p, NULL, sig, &ksi); } int pksignal(struct proc *p, int sig, ksiginfo_t *ksi) { return (tdsendsignal(p, NULL, sig, ksi)); } /* Utility function for finding a thread to send signal event to. */ int sigev_findtd(struct proc *p ,struct sigevent *sigev, struct thread **ttd) { struct thread *td; if (sigev->sigev_notify == SIGEV_THREAD_ID) { td = tdfind(sigev->sigev_notify_thread_id, p->p_pid); if (td == NULL) return (ESRCH); *ttd = td; } else { *ttd = NULL; PROC_LOCK(p); } return (0); } void tdsignal(struct thread *td, int sig) { ksiginfo_t ksi; ksiginfo_init(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = SI_KERNEL; (void) tdsendsignal(td->td_proc, td, sig, &ksi); } void tdksignal(struct thread *td, int sig, ksiginfo_t *ksi) { (void) tdsendsignal(td->td_proc, td, sig, ksi); } int tdsendsignal(struct proc *p, struct thread *td, int sig, ksiginfo_t *ksi) { sig_t action; sigqueue_t *sigqueue; int prop; struct sigacts *ps; int intrval; int ret = 0; int wakeup_swapper; MPASS(td == NULL || p == td->td_proc); PROC_LOCK_ASSERT(p, MA_OWNED); if (!_SIG_VALID(sig)) panic("%s(): invalid signal %d", __func__, sig); KASSERT(ksi == NULL || !KSI_ONQ(ksi), ("%s: ksi on queue", __func__)); /* * IEEE Std 1003.1-2001: return success when killing a zombie. */ if (p->p_state == PRS_ZOMBIE) { if (ksi && (ksi->ksi_flags & KSI_INS)) ksiginfo_tryfree(ksi); return (ret); } ps = p->p_sigacts; - KNOTE_LOCKED(&p->p_klist, NOTE_SIGNAL | sig); + KNOTE_LOCKED(p->p_klist, NOTE_SIGNAL | sig); prop = sigprop(sig); if (td == NULL) { td = sigtd(p, sig, prop); sigqueue = &p->p_sigqueue; } else sigqueue = &td->td_sigqueue; SDT_PROBE3(proc, , , signal__send, td, p, sig); /* * If the signal is being ignored, * then we forget about it immediately. * (Note: we don't set SIGCONT in ps_sigignore, * and if it is set to SIG_IGN, * action will be SIG_DFL here.) */ mtx_lock(&ps->ps_mtx); if (SIGISMEMBER(ps->ps_sigignore, sig)) { SDT_PROBE3(proc, , , signal__discard, td, p, sig); mtx_unlock(&ps->ps_mtx); if (ksi && (ksi->ksi_flags & KSI_INS)) ksiginfo_tryfree(ksi); return (ret); } if (SIGISMEMBER(td->td_sigmask, sig)) action = SIG_HOLD; else if (SIGISMEMBER(ps->ps_sigcatch, sig)) action = SIG_CATCH; else action = SIG_DFL; if (SIGISMEMBER(ps->ps_sigintr, sig)) intrval = EINTR; else intrval = ERESTART; mtx_unlock(&ps->ps_mtx); if (prop & SA_CONT) sigqueue_delete_stopmask_proc(p); else if (prop & SA_STOP) { /* * If sending a tty stop signal to a member of an orphaned * process group, discard the signal here if the action * is default; don't stop the process below if sleeping, * and don't clear any pending SIGCONT. */ if ((prop & SA_TTYSTOP) && (p->p_pgrp->pg_jobc == 0) && (action == SIG_DFL)) { if (ksi && (ksi->ksi_flags & KSI_INS)) ksiginfo_tryfree(ksi); return (ret); } sigqueue_delete_proc(p, SIGCONT); if (p->p_flag & P_CONTINUED) { p->p_flag &= ~P_CONTINUED; PROC_LOCK(p->p_pptr); sigqueue_take(p->p_ksi); PROC_UNLOCK(p->p_pptr); } } ret = sigqueue_add(sigqueue, sig, ksi); if (ret != 0) return (ret); signotify(td); /* * Defer further processing for signals which are held, * except that stopped processes must be continued by SIGCONT. */ if (action == SIG_HOLD && !((prop & SA_CONT) && (p->p_flag & P_STOPPED_SIG))) return (ret); /* * SIGKILL: Remove procfs STOPEVENTs. */ if (sig == SIGKILL) { /* from procfs_ioctl.c: PIOCBIC */ p->p_stops = 0; /* from procfs_ioctl.c: PIOCCONT */ p->p_step = 0; wakeup(&p->p_step); } /* * Some signals have a process-wide effect and a per-thread * component. Most processing occurs when the process next * tries to cross the user boundary, however there are some * times when processing needs to be done immediately, such as * waking up threads so that they can cross the user boundary. * We try to do the per-process part here. */ if (P_SHOULDSTOP(p)) { KASSERT(!(p->p_flag & P_WEXIT), ("signal to stopped but exiting process")); if (sig == SIGKILL) { /* * If traced process is already stopped, * then no further action is necessary. */ if (p->p_flag & P_TRACED) goto out; /* * SIGKILL sets process running. * It will die elsewhere. * All threads must be restarted. */ p->p_flag &= ~P_STOPPED_SIG; goto runfast; } if (prop & SA_CONT) { /* * If traced process is already stopped, * then no further action is necessary. */ if (p->p_flag & P_TRACED) goto out; /* * If SIGCONT is default (or ignored), we continue the * process but don't leave the signal in sigqueue as * it has no further action. If SIGCONT is held, we * continue the process and leave the signal in * sigqueue. If the process catches SIGCONT, let it * handle the signal itself. If it isn't waiting on * an event, it goes back to run state. * Otherwise, process goes back to sleep state. */ p->p_flag &= ~P_STOPPED_SIG; PROC_SLOCK(p); if (p->p_numthreads == p->p_suspcount) { PROC_SUNLOCK(p); p->p_flag |= P_CONTINUED; p->p_xsig = SIGCONT; PROC_LOCK(p->p_pptr); childproc_continued(p); PROC_UNLOCK(p->p_pptr); PROC_SLOCK(p); } if (action == SIG_DFL) { thread_unsuspend(p); PROC_SUNLOCK(p); sigqueue_delete(sigqueue, sig); goto out; } if (action == SIG_CATCH) { /* * The process wants to catch it so it needs * to run at least one thread, but which one? */ PROC_SUNLOCK(p); goto runfast; } /* * The signal is not ignored or caught. */ thread_unsuspend(p); PROC_SUNLOCK(p); goto out; } if (prop & SA_STOP) { /* * If traced process is already stopped, * then no further action is necessary. */ if (p->p_flag & P_TRACED) goto out; /* * Already stopped, don't need to stop again * (If we did the shell could get confused). * Just make sure the signal STOP bit set. */ p->p_flag |= P_STOPPED_SIG; sigqueue_delete(sigqueue, sig); goto out; } /* * All other kinds of signals: * If a thread is sleeping interruptibly, simulate a * wakeup so that when it is continued it will be made * runnable and can look at the signal. However, don't make * the PROCESS runnable, leave it stopped. * It may run a bit until it hits a thread_suspend_check(). */ wakeup_swapper = 0; PROC_SLOCK(p); thread_lock(td); if (TD_ON_SLEEPQ(td) && (td->td_flags & TDF_SINTR)) wakeup_swapper = sleepq_abort(td, intrval); thread_unlock(td); PROC_SUNLOCK(p); if (wakeup_swapper) kick_proc0(); goto out; /* * Mutexes are short lived. Threads waiting on them will * hit thread_suspend_check() soon. */ } else if (p->p_state == PRS_NORMAL) { if (p->p_flag & P_TRACED || action == SIG_CATCH) { tdsigwakeup(td, sig, action, intrval); goto out; } MPASS(action == SIG_DFL); if (prop & SA_STOP) { if (p->p_flag & (P_PPWAIT|P_WEXIT)) goto out; p->p_flag |= P_STOPPED_SIG; p->p_xsig = sig; PROC_SLOCK(p); - sig_suspend_threads(td, p, 1); + wakeup_swapper = sig_suspend_threads(td, p, 1); if (p->p_numthreads == p->p_suspcount) { /* * only thread sending signal to another * process can reach here, if thread is sending * signal to its process, because thread does * not suspend itself here, p_numthreads * should never be equal to p_suspcount. */ thread_stopped(p); PROC_SUNLOCK(p); sigqueue_delete_proc(p, p->p_xsig); } else PROC_SUNLOCK(p); + if (wakeup_swapper) + kick_proc0(); goto out; } } else { /* Not in "NORMAL" state. discard the signal. */ sigqueue_delete(sigqueue, sig); goto out; } /* * The process is not stopped so we need to apply the signal to all the * running threads. */ runfast: tdsigwakeup(td, sig, action, intrval); PROC_SLOCK(p); thread_unsuspend(p); PROC_SUNLOCK(p); out: /* If we jump here, proc slock should not be owned. */ PROC_SLOCK_ASSERT(p, MA_NOTOWNED); return (ret); } /* * The force of a signal has been directed against a single * thread. We need to see what we can do about knocking it * out of any sleep it may be in etc. */ static void tdsigwakeup(struct thread *td, int sig, sig_t action, int intrval) { struct proc *p = td->td_proc; register int prop; int wakeup_swapper; wakeup_swapper = 0; PROC_LOCK_ASSERT(p, MA_OWNED); prop = sigprop(sig); PROC_SLOCK(p); thread_lock(td); /* * Bring the priority of a thread up if we want it to get * killed in this lifetime. Be careful to avoid bumping the * priority of the idle thread, since we still allow to signal * kernel processes. */ if (action == SIG_DFL && (prop & SA_KILL) != 0 && td->td_priority > PUSER && !TD_IS_IDLETHREAD(td)) sched_prio(td, PUSER); if (TD_ON_SLEEPQ(td)) { /* * If thread is sleeping uninterruptibly * we can't interrupt the sleep... the signal will * be noticed when the process returns through * trap() or syscall(). */ if ((td->td_flags & TDF_SINTR) == 0) goto out; /* * If SIGCONT is default (or ignored) and process is * asleep, we are finished; the process should not * be awakened. */ if ((prop & SA_CONT) && action == SIG_DFL) { thread_unlock(td); PROC_SUNLOCK(p); sigqueue_delete(&p->p_sigqueue, sig); /* * It may be on either list in this state. * Remove from both for now. */ sigqueue_delete(&td->td_sigqueue, sig); return; } /* * Don't awaken a sleeping thread for SIGSTOP if the * STOP signal is deferred. */ - if ((prop & SA_STOP) && (td->td_flags & TDF_SBDRY)) + if ((prop & SA_STOP) != 0 && (td->td_flags & (TDF_SBDRY | + TDF_SERESTART | TDF_SEINTR)) == TDF_SBDRY) goto out; /* * Give low priority threads a better chance to run. */ if (td->td_priority > PUSER && !TD_IS_IDLETHREAD(td)) sched_prio(td, PUSER); wakeup_swapper = sleepq_abort(td, intrval); } else { /* * Other states do nothing with the signal immediately, * other than kicking ourselves if we are running. * It will either never be noticed, or noticed very soon. */ #ifdef SMP if (TD_IS_RUNNING(td) && td != curthread) forward_signal(td); #endif } out: PROC_SUNLOCK(p); thread_unlock(td); if (wakeup_swapper) kick_proc0(); } -static void +static int sig_suspend_threads(struct thread *td, struct proc *p, int sending) { struct thread *td2; + int wakeup_swapper; PROC_LOCK_ASSERT(p, MA_OWNED); PROC_SLOCK_ASSERT(p, MA_OWNED); + wakeup_swapper = 0; FOREACH_THREAD_IN_PROC(p, td2) { thread_lock(td2); td2->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK; if ((TD_IS_SLEEPING(td2) || TD_IS_SWAPPED(td2)) && (td2->td_flags & TDF_SINTR)) { if (td2->td_flags & TDF_SBDRY) { /* * Once a thread is asleep with - * TDF_SBDRY set, it should never + * TDF_SBDRY and without TDF_SERESTART + * or TDF_SEINTR set, it should never * become suspended due to this check. */ KASSERT(!TD_IS_SUSPENDED(td2), ("thread with deferred stops suspended")); + if ((td2->td_flags & (TDF_SERESTART | + TDF_SEINTR)) != 0 && sending) { + wakeup_swapper |= sleepq_abort(td, + (td2->td_flags & TDF_SERESTART) + != 0 ? ERESTART : EINTR); + } } else if (!TD_IS_SUSPENDED(td2)) { thread_suspend_one(td2); } } else if (!TD_IS_SUSPENDED(td2)) { if (sending || td != td2) td2->td_flags |= TDF_ASTPENDING; #ifdef SMP if (TD_IS_RUNNING(td2) && td2 != td) forward_signal(td2); #endif } thread_unlock(td2); } + return (wakeup_swapper); } int ptracestop(struct thread *td, int sig) { struct proc *p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); KASSERT(!(p->p_flag & P_WEXIT), ("Stopping exiting process")); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &p->p_mtx.lock_object, "Stopping for traced signal"); td->td_dbgflags |= TDB_XSIG; td->td_xsig = sig; CTR4(KTR_PTRACE, "ptracestop: tid %d (pid %d) flags %#x sig %d", td->td_tid, p->p_pid, td->td_dbgflags, sig); PROC_SLOCK(p); while ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_XSIG)) { if (p->p_flag & P_SINGLE_EXIT && !(td->td_dbgflags & TDB_EXIT)) { /* * Ignore ptrace stops except for thread exit * events when the process exits. */ td->td_dbgflags &= ~TDB_XSIG; PROC_SUNLOCK(p); return (sig); } /* * Just make wait() to work, the last stopped thread * will win. */ p->p_xsig = sig; p->p_xthread = td; p->p_flag |= (P_STOPPED_SIG|P_STOPPED_TRACE); sig_suspend_threads(td, p, 0); if ((td->td_dbgflags & TDB_STOPATFORK) != 0) { td->td_dbgflags &= ~TDB_STOPATFORK; cv_broadcast(&p->p_dbgwait); } stopme: thread_suspend_switch(td, p); if (p->p_xthread == td) p->p_xthread = NULL; if (!(p->p_flag & P_TRACED)) break; if (td->td_dbgflags & TDB_SUSPEND) { if (p->p_flag & P_SINGLE_EXIT) break; goto stopme; } } PROC_SUNLOCK(p); return (td->td_xsig); } static void reschedule_signals(struct proc *p, sigset_t block, int flags) { struct sigacts *ps; struct thread *td; int sig; PROC_LOCK_ASSERT(p, MA_OWNED); ps = p->p_sigacts; mtx_assert(&ps->ps_mtx, (flags & SIGPROCMASK_PS_LOCKED) != 0 ? MA_OWNED : MA_NOTOWNED); if (SIGISEMPTY(p->p_siglist)) return; SIGSETAND(block, p->p_siglist); while ((sig = sig_ffs(&block)) != 0) { SIGDELSET(block, sig); td = sigtd(p, sig, 0); signotify(td); if (!(flags & SIGPROCMASK_PS_LOCKED)) mtx_lock(&ps->ps_mtx); if (p->p_flag & P_TRACED || SIGISMEMBER(ps->ps_sigcatch, sig)) tdsigwakeup(td, sig, SIG_CATCH, (SIGISMEMBER(ps->ps_sigintr, sig) ? EINTR : ERESTART)); if (!(flags & SIGPROCMASK_PS_LOCKED)) mtx_unlock(&ps->ps_mtx); } } void tdsigcleanup(struct thread *td) { struct proc *p; sigset_t unblocked; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sigqueue_flush(&td->td_sigqueue); if (p->p_numthreads == 1) return; /* * Since we cannot handle signals, notify signal post code * about this by filling the sigmask. * * Also, if needed, wake up thread(s) that do not block the * same signals as the exiting thread, since the thread might * have been selected for delivery and woken up. */ SIGFILLSET(unblocked); SIGSETNAND(unblocked, td->td_sigmask); SIGFILLSET(td->td_sigmask); reschedule_signals(p, unblocked, 0); } static int sigdeferstop_curr_flags(int cflags) { MPASS((cflags & (TDF_SEINTR | TDF_SERESTART)) == 0 || (cflags & TDF_SBDRY) != 0); return (cflags & (TDF_SBDRY | TDF_SEINTR | TDF_SERESTART)); } /* * Defer the delivery of SIGSTOP for the current thread, according to * the requested mode. Returns previous flags, which must be restored * by sigallowstop(). * * TDF_SBDRY, TDF_SEINTR, and TDF_SERESTART flags are only set and * cleared by the current thread, which allow the lock-less read-only * accesses below. */ int sigdeferstop(int mode) { struct thread *td; int cflags, nflags; td = curthread; cflags = sigdeferstop_curr_flags(td->td_flags); switch (mode) { case SIGDEFERSTOP_NOP: nflags = cflags; break; case SIGDEFERSTOP_OFF: nflags = 0; break; case SIGDEFERSTOP_SILENT: nflags = (cflags | TDF_SBDRY) & ~(TDF_SEINTR | TDF_SERESTART); break; case SIGDEFERSTOP_EINTR: nflags = (cflags | TDF_SBDRY | TDF_SEINTR) & ~TDF_SERESTART; break; case SIGDEFERSTOP_ERESTART: nflags = (cflags | TDF_SBDRY | TDF_SERESTART) & ~TDF_SEINTR; break; default: panic("sigdeferstop: invalid mode %x", mode); break; } if (cflags != nflags) { thread_lock(td); td->td_flags = (td->td_flags & ~cflags) | nflags; thread_unlock(td); } return (cflags); } /* * Restores the STOP handling mode, typically permitting the delivery * of SIGSTOP for the current thread. This does not immediately * suspend if a stop was posted. Instead, the thread will suspend * either via ast() or a subsequent interruptible sleep. */ void sigallowstop(int prev) { struct thread *td; int cflags; KASSERT((prev & ~(TDF_SBDRY | TDF_SEINTR | TDF_SERESTART)) == 0, ("sigallowstop: incorrect previous mode %x", prev)); td = curthread; cflags = sigdeferstop_curr_flags(td->td_flags); if (cflags != prev) { thread_lock(td); td->td_flags = (td->td_flags & ~cflags) | prev; thread_unlock(td); } } /* * If the current process has received a signal (should be caught or cause * termination, should interrupt current syscall), return the signal number. * Stop signals with default action are processed immediately, then cleared; * they aren't returned. This is checked after each entry to the system for * a syscall or trap (though this can usually be done without calling issignal * by checking the pending signal masks in cursig.) The normal call * sequence is * * while (sig = cursig(curthread)) * postsig(sig); */ static int issignal(struct thread *td) { struct proc *p; struct sigacts *ps; struct sigqueue *queue; sigset_t sigpending; int sig, prop, newsig; p = td->td_proc; ps = p->p_sigacts; mtx_assert(&ps->ps_mtx, MA_OWNED); PROC_LOCK_ASSERT(p, MA_OWNED); for (;;) { int traced = (p->p_flag & P_TRACED) || (p->p_stops & S_SIG); sigpending = td->td_sigqueue.sq_signals; SIGSETOR(sigpending, p->p_sigqueue.sq_signals); SIGSETNAND(sigpending, td->td_sigmask); - if (p->p_flag & P_PPWAIT || td->td_flags & TDF_SBDRY) + if ((p->p_flag & P_PPWAIT) != 0 || (td->td_flags & + (TDF_SBDRY | TDF_SERESTART | TDF_SEINTR)) == TDF_SBDRY) SIG_STOPSIGMASK(sigpending); if (SIGISEMPTY(sigpending)) /* no signal to send */ return (0); sig = sig_ffs(&sigpending); if (p->p_stops & S_SIG) { mtx_unlock(&ps->ps_mtx); stopevent(p, S_SIG, sig); mtx_lock(&ps->ps_mtx); } /* * We should see pending but ignored signals * only if P_TRACED was on when they were posted. */ if (SIGISMEMBER(ps->ps_sigignore, sig) && (traced == 0)) { sigqueue_delete(&td->td_sigqueue, sig); sigqueue_delete(&p->p_sigqueue, sig); continue; } if (p->p_flag & P_TRACED && (p->p_flag & P_PPTRACE) == 0) { /* * If traced, always stop. * Remove old signal from queue before the stop. * XXX shrug off debugger, it causes siginfo to * be thrown away. */ queue = &td->td_sigqueue; td->td_dbgksi.ksi_signo = 0; if (sigqueue_get(queue, sig, &td->td_dbgksi) == 0) { queue = &p->p_sigqueue; sigqueue_get(queue, sig, &td->td_dbgksi); } mtx_unlock(&ps->ps_mtx); newsig = ptracestop(td, sig); mtx_lock(&ps->ps_mtx); if (sig != newsig) { /* * If parent wants us to take the signal, * then it will leave it in p->p_xsig; * otherwise we just look for signals again. */ if (newsig == 0) continue; sig = newsig; /* * Put the new signal into td_sigqueue. If the * signal is being masked, look for other * signals. */ sigqueue_add(queue, sig, NULL); if (SIGISMEMBER(td->td_sigmask, sig)) continue; signotify(td); } else { if (td->td_dbgksi.ksi_signo != 0) { td->td_dbgksi.ksi_flags |= KSI_HEAD; if (sigqueue_add(&td->td_sigqueue, sig, &td->td_dbgksi) != 0) td->td_dbgksi.ksi_signo = 0; } if (td->td_dbgksi.ksi_signo == 0) sigqueue_add(&td->td_sigqueue, sig, NULL); } /* * If the traced bit got turned off, go back up * to the top to rescan signals. This ensures * that p_sig* and p_sigact are consistent. */ if ((p->p_flag & P_TRACED) == 0) continue; } prop = sigprop(sig); /* * Decide whether the signal should be returned. * Return the signal's number, or fall through * to clear it from the pending mask. */ switch ((intptr_t)p->p_sigacts->ps_sigact[_SIG_IDX(sig)]) { case (intptr_t)SIG_DFL: /* * Don't take default actions on system processes. */ if (p->p_pid <= 1) { #ifdef DIAGNOSTIC /* * Are you sure you want to ignore SIGSEGV * in init? XXX */ printf("Process (pid %lu) got signal %d\n", (u_long)p->p_pid, sig); #endif break; /* == ignore */ } /* * If there is a pending stop signal to process * with default action, stop here, * then clear the signal. However, * if process is member of an orphaned * process group, ignore tty stop signals. */ if (prop & SA_STOP) { if (p->p_flag & (P_TRACED|P_WEXIT) || (p->p_pgrp->pg_jobc == 0 && prop & SA_TTYSTOP)) break; /* == ignore */ mtx_unlock(&ps->ps_mtx); WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &p->p_mtx.lock_object, "Catching SIGSTOP"); p->p_flag |= P_STOPPED_SIG; p->p_xsig = sig; PROC_SLOCK(p); sig_suspend_threads(td, p, 0); thread_suspend_switch(td, p); PROC_SUNLOCK(p); mtx_lock(&ps->ps_mtx); break; } else if (prop & SA_IGNORE) { /* * Except for SIGCONT, shouldn't get here. * Default action is to ignore; drop it. */ break; /* == ignore */ } else return (sig); /*NOTREACHED*/ case (intptr_t)SIG_IGN: /* * Masking above should prevent us ever trying * to take action on an ignored signal other * than SIGCONT, unless process is traced. */ if ((prop & SA_CONT) == 0 && (p->p_flag & P_TRACED) == 0) printf("issignal\n"); break; /* == ignore */ default: /* * This signal has an action, let * postsig() process it. */ return (sig); } sigqueue_delete(&td->td_sigqueue, sig); /* take the signal! */ sigqueue_delete(&p->p_sigqueue, sig); } /* NOTREACHED */ } void thread_stopped(struct proc *p) { int n; PROC_LOCK_ASSERT(p, MA_OWNED); PROC_SLOCK_ASSERT(p, MA_OWNED); n = p->p_suspcount; if (p == curproc) n++; if ((p->p_flag & P_STOPPED_SIG) && (n == p->p_numthreads)) { PROC_SUNLOCK(p); p->p_flag &= ~P_WAITED; PROC_LOCK(p->p_pptr); childproc_stopped(p, (p->p_flag & P_TRACED) ? CLD_TRAPPED : CLD_STOPPED); PROC_UNLOCK(p->p_pptr); PROC_SLOCK(p); } } /* * Take the action for the specified signal * from the current set of pending signals. */ int postsig(sig) register int sig; { struct thread *td = curthread; register struct proc *p = td->td_proc; struct sigacts *ps; sig_t action; ksiginfo_t ksi; sigset_t returnmask; KASSERT(sig != 0, ("postsig")); PROC_LOCK_ASSERT(p, MA_OWNED); ps = p->p_sigacts; mtx_assert(&ps->ps_mtx, MA_OWNED); ksiginfo_init(&ksi); if (sigqueue_get(&td->td_sigqueue, sig, &ksi) == 0 && sigqueue_get(&p->p_sigqueue, sig, &ksi) == 0) return (0); ksi.ksi_signo = sig; if (ksi.ksi_code == SI_TIMER) itimer_accept(p, ksi.ksi_timerid, &ksi); action = ps->ps_sigact[_SIG_IDX(sig)]; #ifdef KTRACE if (KTRPOINT(td, KTR_PSIG)) ktrpsig(sig, action, td->td_pflags & TDP_OLDMASK ? &td->td_oldsigmask : &td->td_sigmask, ksi.ksi_code); #endif if (p->p_stops & S_SIG) { mtx_unlock(&ps->ps_mtx); stopevent(p, S_SIG, sig); mtx_lock(&ps->ps_mtx); } if (action == SIG_DFL) { /* * Default action, where the default is to kill * the process. (Other cases were ignored above.) */ mtx_unlock(&ps->ps_mtx); sigexit(td, sig); /* NOTREACHED */ } else { /* * If we get here, the signal must be caught. */ KASSERT(action != SIG_IGN && !SIGISMEMBER(td->td_sigmask, sig), ("postsig action")); /* * Set the new mask value and also defer further * occurrences of this signal. * * Special case: user has done a sigsuspend. Here the * current mask is not of interest, but rather the * mask from before the sigsuspend is what we want * restored after the signal processing is completed. */ if (td->td_pflags & TDP_OLDMASK) { returnmask = td->td_oldsigmask; td->td_pflags &= ~TDP_OLDMASK; } else returnmask = td->td_sigmask; if (p->p_sig == sig) { p->p_code = 0; p->p_sig = 0; } (*p->p_sysent->sv_sendsig)(action, &ksi, &returnmask); postsig_done(sig, td, ps); } return (1); } /* * Kill the current process for stated reason. */ void killproc(p, why) struct proc *p; char *why; { PROC_LOCK_ASSERT(p, MA_OWNED); CTR3(KTR_PROC, "killproc: proc %p (pid %d, %s)", p, p->p_pid, p->p_comm); log(LOG_ERR, "pid %d (%s), uid %d, was killed: %s\n", p->p_pid, p->p_comm, p->p_ucred ? p->p_ucred->cr_uid : -1, why); p->p_flag |= P_WKILLED; kern_psignal(p, SIGKILL); } /* * Force the current process to exit with the specified signal, dumping core * if appropriate. We bypass the normal tests for masked and caught signals, * allowing unrecoverable failures to terminate the process without changing * signal state. Mark the accounting record with the signal termination. * If dumping core, save the signal number for the debugger. Calls exit and * does not return. */ void sigexit(td, sig) struct thread *td; int sig; { struct proc *p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); p->p_acflag |= AXSIG; /* * We must be single-threading to generate a core dump. This * ensures that the registers in the core file are up-to-date. * Also, the ELF dump handler assumes that the thread list doesn't * change out from under it. * * XXX If another thread attempts to single-thread before us * (e.g. via fork()), we won't get a dump at all. */ if ((sigprop(sig) & SA_CORE) && thread_single(p, SINGLE_NO_EXIT) == 0) { p->p_sig = sig; /* * Log signals which would cause core dumps * (Log as LOG_INFO to appease those who don't want * these messages.) * XXX : Todo, as well as euid, write out ruid too * Note that coredump() drops proc lock. */ if (coredump(td) == 0) sig |= WCOREFLAG; if (kern_logsigexit) log(LOG_INFO, "pid %d (%s), uid %d: exited on signal %d%s\n", p->p_pid, p->p_comm, td->td_ucred ? td->td_ucred->cr_uid : -1, sig &~ WCOREFLAG, sig & WCOREFLAG ? " (core dumped)" : ""); } else PROC_UNLOCK(p); exit1(td, 0, sig); /* NOTREACHED */ } /* * Send queued SIGCHLD to parent when child process's state * is changed. */ static void sigparent(struct proc *p, int reason, int status) { PROC_LOCK_ASSERT(p, MA_OWNED); PROC_LOCK_ASSERT(p->p_pptr, MA_OWNED); if (p->p_ksi != NULL) { p->p_ksi->ksi_signo = SIGCHLD; p->p_ksi->ksi_code = reason; p->p_ksi->ksi_status = status; p->p_ksi->ksi_pid = p->p_pid; p->p_ksi->ksi_uid = p->p_ucred->cr_ruid; if (KSI_ONQ(p->p_ksi)) return; } pksignal(p->p_pptr, SIGCHLD, p->p_ksi); } static void childproc_jobstate(struct proc *p, int reason, int sig) { struct sigacts *ps; PROC_LOCK_ASSERT(p, MA_OWNED); PROC_LOCK_ASSERT(p->p_pptr, MA_OWNED); /* * Wake up parent sleeping in kern_wait(), also send * SIGCHLD to parent, but SIGCHLD does not guarantee * that parent will awake, because parent may masked * the signal. */ p->p_pptr->p_flag |= P_STATCHILD; wakeup(p->p_pptr); ps = p->p_pptr->p_sigacts; mtx_lock(&ps->ps_mtx); if ((ps->ps_flag & PS_NOCLDSTOP) == 0) { mtx_unlock(&ps->ps_mtx); sigparent(p, reason, sig); } else mtx_unlock(&ps->ps_mtx); } void childproc_stopped(struct proc *p, int reason) { childproc_jobstate(p, reason, p->p_xsig); } void childproc_continued(struct proc *p) { childproc_jobstate(p, CLD_CONTINUED, SIGCONT); } void childproc_exited(struct proc *p) { int reason, status; if (WCOREDUMP(p->p_xsig)) { reason = CLD_DUMPED; status = WTERMSIG(p->p_xsig); } else if (WIFSIGNALED(p->p_xsig)) { reason = CLD_KILLED; status = WTERMSIG(p->p_xsig); } else { reason = CLD_EXITED; status = p->p_xexit; } /* * XXX avoid calling wakeup(p->p_pptr), the work is * done in exit1(). */ sigparent(p, reason, status); } /* * We only have 1 character for the core count in the format * string, so the range will be 0-9 */ #define MAX_NUM_CORES 10 static int num_cores = 5; static int sysctl_debug_num_cores_check (SYSCTL_HANDLER_ARGS) { int error; int new_val; new_val = num_cores; error = sysctl_handle_int(oidp, &new_val, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (new_val > MAX_NUM_CORES) new_val = MAX_NUM_CORES; if (new_val < 0) new_val = 0; num_cores = new_val; return (0); } SYSCTL_PROC(_debug, OID_AUTO, ncores, CTLTYPE_INT|CTLFLAG_RW, 0, sizeof(int), sysctl_debug_num_cores_check, "I", ""); #define GZ_SUFFIX ".gz" #ifdef GZIO static int compress_user_cores = 1; SYSCTL_INT(_kern, OID_AUTO, compress_user_cores, CTLFLAG_RWTUN, &compress_user_cores, 0, "Compression of user corefiles"); int compress_user_cores_gzlevel = 6; SYSCTL_INT(_kern, OID_AUTO, compress_user_cores_gzlevel, CTLFLAG_RWTUN, &compress_user_cores_gzlevel, 0, "Corefile gzip compression level"); #else static int compress_user_cores = 0; #endif /* * Protect the access to corefilename[] by allproc_lock. */ #define corefilename_lock allproc_lock static char corefilename[MAXPATHLEN] = {"%N.core"}; TUNABLE_STR("kern.corefile", corefilename, sizeof(corefilename)); static int sysctl_kern_corefile(SYSCTL_HANDLER_ARGS) { int error; sx_xlock(&corefilename_lock); error = sysctl_handle_string(oidp, corefilename, sizeof(corefilename), req); sx_xunlock(&corefilename_lock); return (error); } SYSCTL_PROC(_kern, OID_AUTO, corefile, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, sysctl_kern_corefile, "A", "Process corefile name format string"); /* * corefile_open(comm, uid, pid, td, compress, vpp, namep) * Expand the name described in corefilename, using name, uid, and pid * and open/create core file. * corefilename is a printf-like string, with three format specifiers: * %N name of process ("name") * %P process id (pid) * %U user id (uid) * For example, "%N.core" is the default; they can be disabled completely * by using "/dev/null", or all core files can be stored in "/cores/%U/%N-%P". * This is controlled by the sysctl variable kern.corefile (see above). */ static int corefile_open(const char *comm, uid_t uid, pid_t pid, struct thread *td, int compress, struct vnode **vpp, char **namep) { struct nameidata nd; struct sbuf sb; const char *format; char *hostname, *name; int indexpos, i, error, cmode, flags, oflags; hostname = NULL; format = corefilename; name = malloc(MAXPATHLEN, M_TEMP, M_WAITOK | M_ZERO); indexpos = -1; (void)sbuf_new(&sb, name, MAXPATHLEN, SBUF_FIXEDLEN); sx_slock(&corefilename_lock); for (i = 0; format[i] != '\0'; i++) { switch (format[i]) { case '%': /* Format character */ i++; switch (format[i]) { case '%': sbuf_putc(&sb, '%'); break; case 'H': /* hostname */ if (hostname == NULL) { hostname = malloc(MAXHOSTNAMELEN, M_TEMP, M_WAITOK); } getcredhostname(td->td_ucred, hostname, MAXHOSTNAMELEN); sbuf_printf(&sb, "%s", hostname); break; case 'I': /* autoincrementing index */ sbuf_printf(&sb, "0"); indexpos = sbuf_len(&sb) - 1; break; case 'N': /* process name */ sbuf_printf(&sb, "%s", comm); break; case 'P': /* process id */ sbuf_printf(&sb, "%u", pid); break; case 'U': /* user id */ sbuf_printf(&sb, "%u", uid); break; default: log(LOG_ERR, "Unknown format character %c in " "corename `%s'\n", format[i], format); break; } break; default: sbuf_putc(&sb, format[i]); break; } } sx_sunlock(&corefilename_lock); free(hostname, M_TEMP); if (compress) sbuf_printf(&sb, GZ_SUFFIX); if (sbuf_error(&sb) != 0) { log(LOG_ERR, "pid %ld (%s), uid (%lu): corename is too " "long\n", (long)pid, comm, (u_long)uid); sbuf_delete(&sb); free(name, M_TEMP); return (ENOMEM); } sbuf_finish(&sb); sbuf_delete(&sb); cmode = S_IRUSR | S_IWUSR; oflags = VN_OPEN_NOAUDIT | VN_OPEN_NAMECACHE | (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0); /* * If the core format has a %I in it, then we need to check * for existing corefiles before returning a name. * To do this we iterate over 0..num_cores to find a * non-existing core file name to use. */ if (indexpos != -1) { for (i = 0; i < num_cores; i++) { flags = O_CREAT | O_EXCL | FWRITE | O_NOFOLLOW; name[indexpos] = '0' + i; NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name, td); error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred, NULL); if (error) { if (error == EEXIST) continue; log(LOG_ERR, "pid %d (%s), uid (%u): Path `%s' failed " "on initial open test, error = %d\n", pid, comm, uid, name, error); } goto out; } } flags = O_CREAT | FWRITE | O_NOFOLLOW; NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name, td); error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred, NULL); out: if (error) { #ifdef AUDIT audit_proc_coredump(td, name, error); #endif free(name, M_TEMP); return (error); } NDFREE(&nd, NDF_ONLY_PNBUF); *vpp = nd.ni_vp; *namep = name; return (0); } static int coredump_sanitise_path(const char *path) { size_t i; /* * Only send a subset of ASCII to devd(8) because it * might pass these strings to sh -c. */ for (i = 0; path[i]; i++) if (!(isalpha(path[i]) || isdigit(path[i])) && path[i] != '/' && path[i] != '.' && path[i] != '-') return (0); return (1); } /* * Dump a process' core. The main routine does some * policy checking, and creates the name of the coredump; * then it passes on a vnode and a size limit to the process-specific * coredump routine if there is one; if there _is not_ one, it returns * ENOSYS; otherwise it returns the error from the process-specific routine. */ static int coredump(struct thread *td) { struct proc *p = td->td_proc; struct ucred *cred = td->td_ucred; struct vnode *vp; struct flock lf; struct vattr vattr; int error, error1, locked; char *name; /* name of corefile */ void *rl_cookie; off_t limit; char *data = NULL; char *fullpath, *freepath = NULL; size_t len; static const char comm_name[] = "comm="; static const char core_name[] = "core="; PROC_LOCK_ASSERT(p, MA_OWNED); MPASS((p->p_flag & P_HADTHREADS) == 0 || p->p_singlethread == td); _STOPEVENT(p, S_CORE, 0); if (!do_coredump || (!sugid_coredump && (p->p_flag & P_SUGID) != 0) || (p->p_flag2 & P2_NOTRACE) != 0) { PROC_UNLOCK(p); return (EFAULT); } /* * Note that the bulk of limit checking is done after * the corefile is created. The exception is if the limit * for corefiles is 0, in which case we don't bother * creating the corefile at all. This layout means that * a corefile is truncated instead of not being created, * if it is larger than the limit. */ limit = (off_t)lim_cur(td, RLIMIT_CORE); if (limit == 0 || racct_get_available(p, RACCT_CORE) == 0) { PROC_UNLOCK(p); return (EFBIG); } PROC_UNLOCK(p); error = corefile_open(p->p_comm, cred->cr_uid, p->p_pid, td, compress_user_cores, &vp, &name); if (error != 0) return (error); /* * Don't dump to non-regular files or files with links. * Do not dump into system files. */ if (vp->v_type != VREG || VOP_GETATTR(vp, &vattr, cred) != 0 || vattr.va_nlink != 1 || (vp->v_vflag & VV_SYSTEM) != 0) { VOP_UNLOCK(vp, 0); error = EFAULT; goto out; } VOP_UNLOCK(vp, 0); /* Postpone other writers, including core dumps of other processes. */ rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; lf.l_type = F_WRLCK; locked = (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &lf, F_FLOCK) == 0); VATTR_NULL(&vattr); vattr.va_size = 0; if (set_core_nodump_flag) vattr.va_flags = UF_NODUMP; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); VOP_SETATTR(vp, &vattr, cred); VOP_UNLOCK(vp, 0); PROC_LOCK(p); p->p_acflag |= ACORE; PROC_UNLOCK(p); if (p->p_sysent->sv_coredump != NULL) { error = p->p_sysent->sv_coredump(td, vp, limit, compress_user_cores ? IMGACT_CORE_COMPRESS : 0); } else { error = ENOSYS; } if (locked) { lf.l_type = F_UNLCK; VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_FLOCK); } vn_rangelock_unlock(vp, rl_cookie); /* * Notify the userland helper that a process triggered a core dump. * This allows the helper to run an automated debugging session. */ if (error != 0 || coredump_devctl == 0) goto out; len = MAXPATHLEN * 2 + sizeof(comm_name) - 1 + sizeof(' ') + sizeof(core_name) - 1; data = malloc(len, M_TEMP, M_WAITOK); if (vn_fullpath_global(td, p->p_textvp, &fullpath, &freepath) != 0) goto out; if (!coredump_sanitise_path(fullpath)) goto out; snprintf(data, len, "%s%s ", comm_name, fullpath); free(freepath, M_TEMP); freepath = NULL; if (vn_fullpath_global(td, vp, &fullpath, &freepath) != 0) goto out; if (!coredump_sanitise_path(fullpath)) goto out; strlcat(data, core_name, len); strlcat(data, fullpath, len); devctl_notify("kernel", "signal", "coredump", data); out: error1 = vn_close(vp, FWRITE, cred, td); if (error == 0) error = error1; #ifdef AUDIT audit_proc_coredump(td, name, error); #endif free(freepath, M_TEMP); free(data, M_TEMP); free(name, M_TEMP); return (error); } /* * Nonexistent system call-- signal process (may want to handle it). Flag * error in case process won't see signal immediately (blocked or ignored). */ #ifndef _SYS_SYSPROTO_H_ struct nosys_args { int dummy; }; #endif /* ARGSUSED */ int nosys(td, args) struct thread *td; struct nosys_args *args; { struct proc *p = td->td_proc; PROC_LOCK(p); tdsignal(td, SIGSYS); PROC_UNLOCK(p); return (ENOSYS); } /* * Send a SIGIO or SIGURG signal to a process or process group using stored * credentials rather than those of the current process. */ void pgsigio(sigiop, sig, checkctty) struct sigio **sigiop; int sig, checkctty; { ksiginfo_t ksi; struct sigio *sigio; ksiginfo_init(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = SI_KERNEL; SIGIO_LOCK(); sigio = *sigiop; if (sigio == NULL) { SIGIO_UNLOCK(); return; } if (sigio->sio_pgid > 0) { PROC_LOCK(sigio->sio_proc); if (CANSIGIO(sigio->sio_ucred, sigio->sio_proc->p_ucred)) kern_psignal(sigio->sio_proc, sig); PROC_UNLOCK(sigio->sio_proc); } else if (sigio->sio_pgid < 0) { struct proc *p; PGRP_LOCK(sigio->sio_pgrp); LIST_FOREACH(p, &sigio->sio_pgrp->pg_members, p_pglist) { PROC_LOCK(p); if (p->p_state == PRS_NORMAL && CANSIGIO(sigio->sio_ucred, p->p_ucred) && (checkctty == 0 || (p->p_flag & P_CONTROLT))) kern_psignal(p, sig); PROC_UNLOCK(p); } PGRP_UNLOCK(sigio->sio_pgrp); } SIGIO_UNLOCK(); } static int filt_sigattach(struct knote *kn) { struct proc *p = curproc; kn->kn_ptr.p_proc = p; kn->kn_flags |= EV_CLEAR; /* automatically set */ - knlist_add(&p->p_klist, kn, 0); + knlist_add(p->p_klist, kn, 0); return (0); } static void filt_sigdetach(struct knote *kn) { struct proc *p = kn->kn_ptr.p_proc; - knlist_remove(&p->p_klist, kn, 0); + knlist_remove(p->p_klist, kn, 0); } /* * signal knotes are shared with proc knotes, so we apply a mask to * the hint in order to differentiate them from process hints. This * could be avoided by using a signal-specific knote list, but probably * isn't worth the trouble. */ static int filt_signal(struct knote *kn, long hint) { if (hint & NOTE_SIGNAL) { hint &= ~NOTE_SIGNAL; if (kn->kn_id == hint) kn->kn_data++; } return (kn->kn_data != 0); } struct sigacts * sigacts_alloc(void) { struct sigacts *ps; ps = malloc(sizeof(struct sigacts), M_SUBPROC, M_WAITOK | M_ZERO); refcount_init(&ps->ps_refcnt, 1); mtx_init(&ps->ps_mtx, "sigacts", NULL, MTX_DEF); return (ps); } void sigacts_free(struct sigacts *ps) { if (refcount_release(&ps->ps_refcnt) == 0) return; mtx_destroy(&ps->ps_mtx); free(ps, M_SUBPROC); } struct sigacts * sigacts_hold(struct sigacts *ps) { refcount_acquire(&ps->ps_refcnt); return (ps); } void sigacts_copy(struct sigacts *dest, struct sigacts *src) { KASSERT(dest->ps_refcnt == 1, ("sigacts_copy to shared dest")); mtx_lock(&src->ps_mtx); bcopy(src, dest, offsetof(struct sigacts, ps_refcnt)); mtx_unlock(&src->ps_mtx); } int sigacts_shared(struct sigacts *ps) { return (ps->ps_refcnt > 1); } Index: projects/vnet/sys/kern/subr_clock.c =================================================================== --- projects/vnet/sys/kern/subr_clock.c (revision 302276) +++ projects/vnet/sys/kern/subr_clock.c (revision 302277) @@ -1,224 +1,224 @@ /*- * Copyright (c) 1988 University of Utah. * Copyright (c) 1982, 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: Utah $Hdr: clock.c 1.18 91/01/21$ * from: @(#)clock.c 8.2 (Berkeley) 1/12/94 * from: NetBSD: clock_subr.c,v 1.6 2001/07/07 17:04:02 thorpej Exp * and * from: src/sys/i386/isa/clock.c,v 1.176 2001/09/04 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include int tz_minuteswest; int tz_dsttime; /* * The adjkerntz and wall_cmos_clock sysctls are in the "machdep" sysctl * namespace because they were misplaced there originally. */ static int adjkerntz; static int sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS) { int error; error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); if (!error && req->newptr) resettodr(); return (error); } -SYSCTL_PROC(_machdep, OID_AUTO, adjkerntz, CTLTYPE_INT|CTLFLAG_RW, - &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", +SYSCTL_PROC(_machdep, OID_AUTO, adjkerntz, CTLTYPE_INT | CTLFLAG_RW | + CTLFLAG_MPSAFE, &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "Local offset from UTC in seconds"); static int ct_debug; SYSCTL_INT(_debug, OID_AUTO, clocktime, CTLFLAG_RW, &ct_debug, 0, "Enable printing of clocktime debugging"); static int wall_cmos_clock; SYSCTL_INT(_machdep, OID_AUTO, wall_cmos_clock, CTLFLAG_RW, &wall_cmos_clock, 0, "Enables application of machdep.adjkerntz"); /*--------------------------------------------------------------------* * Generic routines to convert between a POSIX date * (seconds since 1/1/1970) and yr/mo/day/hr/min/sec * Derived from NetBSD arch/hp300/hp300/clock.c */ #define FEBRUARY 2 #define days_in_year(y) (leapyear(y) ? 366 : 365) #define days_in_month(y, m) \ (month_days[(m) - 1] + (m == FEBRUARY ? leapyear(y) : 0)) /* Day of week. Days are counted from 1/1/1970, which was a Thursday */ #define day_of_week(days) (((days) + 4) % 7) static const int month_days[12] = { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; /* * This inline avoids some unnecessary modulo operations * as compared with the usual macro: * ( ((year % 4) == 0 && * (year % 100) != 0) || * ((year % 400) == 0) ) * It is otherwise equivalent. */ static int leapyear(int year) { int rv = 0; if ((year & 3) == 0) { rv = 1; if ((year % 100) == 0) { rv = 0; if ((year % 400) == 0) rv = 1; } } return (rv); } static void print_ct(struct clocktime *ct) { printf("[%04d-%02d-%02d %02d:%02d:%02d]", ct->year, ct->mon, ct->day, ct->hour, ct->min, ct->sec); } int clock_ct_to_ts(struct clocktime *ct, struct timespec *ts) { int i, year, days; year = ct->year; if (ct_debug) { printf("ct_to_ts("); print_ct(ct); printf(")"); } /* Sanity checks. */ if (ct->mon < 1 || ct->mon > 12 || ct->day < 1 || ct->day > days_in_month(year, ct->mon) || ct->hour > 23 || ct->min > 59 || ct->sec > 59 || (sizeof(time_t) == 4 && year > 2037)) { /* time_t overflow */ if (ct_debug) printf(" = EINVAL\n"); return (EINVAL); } /* * Compute days since start of time * First from years, then from months. */ days = 0; for (i = POSIX_BASE_YEAR; i < year; i++) days += days_in_year(i); /* Months */ for (i = 1; i < ct->mon; i++) days += days_in_month(year, i); days += (ct->day - 1); ts->tv_sec = (((time_t)days * 24 + ct->hour) * 60 + ct->min) * 60 + ct->sec; ts->tv_nsec = ct->nsec; if (ct_debug) printf(" = %ld.%09ld\n", (long)ts->tv_sec, (long)ts->tv_nsec); return (0); } void clock_ts_to_ct(struct timespec *ts, struct clocktime *ct) { int i, year, days; time_t rsec; /* remainder seconds */ time_t secs; secs = ts->tv_sec; days = secs / SECDAY; rsec = secs % SECDAY; ct->dow = day_of_week(days); /* Subtract out whole years, counting them in i. */ for (year = POSIX_BASE_YEAR; days >= days_in_year(year); year++) days -= days_in_year(year); ct->year = year; /* Subtract out whole months, counting them in i. */ for (i = 1; days >= days_in_month(year, i); i++) days -= days_in_month(year, i); ct->mon = i; /* Days are what is left over (+1) from all that. */ ct->day = days + 1; /* Hours, minutes, seconds are easy */ ct->hour = rsec / 3600; rsec = rsec % 3600; ct->min = rsec / 60; rsec = rsec % 60; ct->sec = rsec; ct->nsec = ts->tv_nsec; if (ct_debug) { printf("ts_to_ct(%ld.%09ld) = ", (long)ts->tv_sec, (long)ts->tv_nsec); print_ct(ct); printf("\n"); } } int utc_offset(void) { return (tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0)); } Index: projects/vnet/sys/kern/subr_rtc.c =================================================================== --- projects/vnet/sys/kern/subr_rtc.c (revision 302276) +++ projects/vnet/sys/kern/subr_rtc.c (revision 302277) @@ -1,178 +1,185 @@ /*- * Copyright (c) 1988 University of Utah. * Copyright (c) 1982, 1990, 1993 * The Regents of the University of California. * Copyright (c) 2011 The FreeBSD Foundation * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department. * * Portions of this software were developed by Julien Ridoux at the University * of Melbourne under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: Utah $Hdr: clock.c 1.18 91/01/21$ * from: @(#)clock.c 8.2 (Berkeley) 1/12/94 * from: NetBSD: clock_subr.c,v 1.6 2001/07/07 17:04:02 thorpej Exp * and * from: src/sys/i386/isa/clock.c,v 1.176 2001/09/04 */ /* * Helpers for time-of-day clocks. This is useful for architectures that need * support multiple models of such clocks, and generally serves to make the * code more machine-independent. * If the clock in question can also be used as a time counter, the driver * needs to initiate this. * This code is not yet used by all architectures. */ #include __FBSDID("$FreeBSD$"); #include "opt_ffclock.h" #include #include #include #include #include +#include +#include #include #ifdef FFCLOCK #include #endif #include #include "clock_if.h" static device_t clock_dev = NULL; static long clock_res; static struct timespec clock_adj; +static struct mtx resettodr_lock; +MTX_SYSINIT(resettodr_init, &resettodr_lock, "tod2rl", MTX_DEF); /* XXX: should be kern. now, it's no longer machdep. */ static int disable_rtc_set; SYSCTL_INT(_machdep, OID_AUTO, disable_rtc_set, CTLFLAG_RW, &disable_rtc_set, 0, "Disallow adjusting time-of-day clock"); void clock_register(device_t dev, long res) /* res has units of microseconds */ { if (clock_dev != NULL) { if (clock_res <= res) { if (bootverbose) device_printf(dev, "not installed as " "time-of-day clock: clock %s has higher " "resolution\n", device_get_name(clock_dev)); return; } if (bootverbose) device_printf(clock_dev, "removed as " "time-of-day clock: clock %s has higher " "resolution\n", device_get_name(dev)); } clock_dev = dev; clock_res = res; clock_adj.tv_sec = res / 2 / 1000000; clock_adj.tv_nsec = res / 2 % 1000000 * 1000; if (bootverbose) device_printf(dev, "registered as a time-of-day clock " "(resolution %ldus, adjustment %jd.%09jds)\n", res, (intmax_t)clock_adj.tv_sec, (intmax_t)clock_adj.tv_nsec); } /* * inittodr and settodr derived from the i386 versions written * by Christoph Robitschko , reintroduced and * updated by Chris Stenton 8/10/94 */ /* * Initialize the time of day register, based on the time base which is, e.g. * from a filesystem. */ void inittodr(time_t base) { struct timespec ts; int error; if (clock_dev == NULL) { printf("warning: no time-of-day clock registered, system time " "will not be set accurately\n"); goto wrong_time; } /* XXX: We should poll all registered RTCs in case of failure */ error = CLOCK_GETTIME(clock_dev, &ts); if (error != 0 && error != EINVAL) { printf("warning: clock_gettime failed (%d), the system time " "will not be set accurately\n", error); goto wrong_time; } if (error == EINVAL || ts.tv_sec < 0) { printf("Invalid time in real time clock.\n" "Check and reset the date immediately!\n"); goto wrong_time; } ts.tv_sec += utc_offset(); timespecadd(&ts, &clock_adj); tc_setclock(&ts); #ifdef FFCLOCK ffclock_reset_clock(&ts); #endif return; wrong_time: if (base > 0) { ts.tv_sec = base; ts.tv_nsec = 0; tc_setclock(&ts); } } /* * Write system time back to RTC */ void resettodr(void) { struct timespec ts; int error; if (disable_rtc_set || clock_dev == NULL) return; + mtx_lock(&resettodr_lock); getnanotime(&ts); timespecadd(&ts, &clock_adj); ts.tv_sec -= utc_offset(); /* XXX: We should really set all registered RTCs */ - if ((error = CLOCK_SETTIME(clock_dev, &ts)) != 0) + error = CLOCK_SETTIME(clock_dev, &ts); + mtx_unlock(&resettodr_lock); + if (error != 0) printf("warning: clock_settime failed (%d), time-of-day clock " "not adjusted to system time\n", error); } Index: projects/vnet/sys/net/if.c =================================================================== --- projects/vnet/sys/net/if.c (revision 302276) +++ projects/vnet/sys/net/if.c (revision 302277) @@ -1,4148 +1,4151 @@ /*- * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if.c 8.5 (Berkeley) 1/9/95 * $FreeBSD$ */ #include "opt_compat.h" #include "opt_inet6.h" #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #include #include #include #ifdef INET #include #endif /* INET */ #ifdef INET6 #include #include #endif /* INET6 */ #endif /* INET || INET6 */ #include #ifdef COMPAT_FREEBSD32 #include #include #endif SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers"); SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management"); SYSCTL_INT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN, &ifqmaxlen, 0, "max send queue size"); /* Log link state change events */ static int log_link_state_change = 1; SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW, &log_link_state_change, 0, "log interface link state change events"); /* Log promiscuous mode change events */ static int log_promisc_mode_change = 1; SYSCTL_INT(_net_link, OID_AUTO, log_promisc_mode_change, CTLFLAG_RDTUN, &log_promisc_mode_change, 1, "log promiscuous mode change events"); /* Interface description */ static unsigned int ifdescr_maxlen = 1024; SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW, &ifdescr_maxlen, 0, "administrative maximum length for interface description"); static MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions"); /* global sx for non-critical path ifdescr */ static struct sx ifdescr_sx; SX_SYSINIT(ifdescr_sx, &ifdescr_sx, "ifnet descr"); void (*bridge_linkstate_p)(struct ifnet *ifp); void (*ng_ether_link_state_p)(struct ifnet *ifp, int state); void (*lagg_linkstate_p)(struct ifnet *ifp, int state); /* These are external hooks for CARP. */ void (*carp_linkstate_p)(struct ifnet *ifp); void (*carp_demote_adj_p)(int, char *); int (*carp_master_p)(struct ifaddr *); #if defined(INET) || defined(INET6) int (*carp_forus_p)(struct ifnet *ifp, u_char *dhost); int (*carp_output_p)(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa); int (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *); int (*carp_attach_p)(struct ifaddr *, int); void (*carp_detach_p)(struct ifaddr *); #endif #ifdef INET int (*carp_iamatch_p)(struct ifaddr *, uint8_t **); #endif #ifdef INET6 struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6); caddr_t (*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr); #endif struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL; /* * XXX: Style; these should be sorted alphabetically, and unprototyped * static functions should be prototyped. Currently they are sorted by * declaration order. */ static void if_attachdomain(void *); static void if_attachdomain1(struct ifnet *); static int ifconf(u_long, caddr_t); static void if_freemulti(struct ifmultiaddr *); static void if_grow(void); static void if_input_default(struct ifnet *, struct mbuf *); static int if_requestencap_default(struct ifnet *, struct if_encap_req *); static void if_route(struct ifnet *, int flag, int fam); static int if_setflag(struct ifnet *, int, int, int *, int); static int if_transmit(struct ifnet *ifp, struct mbuf *m); static void if_unroute(struct ifnet *, int flag, int fam); static void link_rtrequest(int, struct rtentry *, struct rt_addrinfo *); static int ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *); static int if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int); static void do_link_state_change(void *, int); static int if_getgroup(struct ifgroupreq *, struct ifnet *); static int if_getgroupmembers(struct ifgroupreq *); static void if_delgroups(struct ifnet *); static void if_attach_internal(struct ifnet *, int, struct if_clone *); static int if_detach_internal(struct ifnet *, int, struct if_clone **); #ifdef VIMAGE static void if_vmove(struct ifnet *, struct vnet *); #endif #ifdef INET6 /* * XXX: declare here to avoid to include many inet6 related files.. * should be more generalized? */ extern void nd6_setmtu(struct ifnet *); #endif /* ipsec helper hooks */ VNET_DEFINE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]); VNET_DEFINE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]); VNET_DEFINE(int, if_index); int ifqmaxlen = IFQ_MAXLEN; VNET_DEFINE(struct ifnethead, ifnet); /* depend on static init XXX */ VNET_DEFINE(struct ifgrouphead, ifg_head); static VNET_DEFINE(int, if_indexlim) = 8; /* Table of ifnet by index. */ VNET_DEFINE(struct ifnet **, ifindex_table); #define V_if_indexlim VNET(if_indexlim) #define V_ifindex_table VNET(ifindex_table) /* * The global network interface list (V_ifnet) and related state (such as * if_index, if_indexlim, and ifindex_table) are protected by an sxlock and * an rwlock. Either may be acquired shared to stablize the list, but both * must be acquired writable to modify the list. This model allows us to * both stablize the interface list during interrupt thread processing, but * also to stablize it over long-running ioctls, without introducing priority * inversions and deadlocks. */ struct rwlock ifnet_rwlock; RW_SYSINIT_FLAGS(ifnet_rw, &ifnet_rwlock, "ifnet_rw", RW_RECURSE); struct sx ifnet_sxlock; SX_SYSINIT_FLAGS(ifnet_sx, &ifnet_sxlock, "ifnet_sx", SX_RECURSE); /* * The allocation of network interfaces is a rather non-atomic affair; we * need to select an index before we are ready to expose the interface for * use, so will use this pointer value to indicate reservation. */ #define IFNET_HOLD (void *)(uintptr_t)(-1) static if_com_alloc_t *if_com_alloc[256]; static if_com_free_t *if_com_free[256]; static MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals"); MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); struct ifnet * ifnet_byindex_locked(u_short idx) { if (idx > V_if_index) return (NULL); if (V_ifindex_table[idx] == IFNET_HOLD) return (NULL); return (V_ifindex_table[idx]); } struct ifnet * ifnet_byindex(u_short idx) { struct ifnet *ifp; IFNET_RLOCK_NOSLEEP(); ifp = ifnet_byindex_locked(idx); IFNET_RUNLOCK_NOSLEEP(); return (ifp); } struct ifnet * ifnet_byindex_ref(u_short idx) { struct ifnet *ifp; IFNET_RLOCK_NOSLEEP(); ifp = ifnet_byindex_locked(idx); if (ifp == NULL || (ifp->if_flags & IFF_DYING)) { IFNET_RUNLOCK_NOSLEEP(); return (NULL); } if_ref(ifp); IFNET_RUNLOCK_NOSLEEP(); return (ifp); } /* * Allocate an ifindex array entry; return 0 on success or an error on * failure. */ static u_short ifindex_alloc(void) { u_short idx; IFNET_WLOCK_ASSERT(); retry: /* * Try to find an empty slot below V_if_index. If we fail, take the * next slot. */ for (idx = 1; idx <= V_if_index; idx++) { if (V_ifindex_table[idx] == NULL) break; } /* Catch if_index overflow. */ if (idx >= V_if_indexlim) { if_grow(); goto retry; } if (idx > V_if_index) V_if_index = idx; return (idx); } static void ifindex_free_locked(u_short idx) { IFNET_WLOCK_ASSERT(); V_ifindex_table[idx] = NULL; while (V_if_index > 0 && V_ifindex_table[V_if_index] == NULL) V_if_index--; } static void ifindex_free(u_short idx) { IFNET_WLOCK(); ifindex_free_locked(idx); IFNET_WUNLOCK(); } static void ifnet_setbyindex_locked(u_short idx, struct ifnet *ifp) { IFNET_WLOCK_ASSERT(); V_ifindex_table[idx] = ifp; } static void ifnet_setbyindex(u_short idx, struct ifnet *ifp) { IFNET_WLOCK(); ifnet_setbyindex_locked(idx, ifp); IFNET_WUNLOCK(); } struct ifaddr * ifaddr_byindex(u_short idx) { struct ifnet *ifp; struct ifaddr *ifa = NULL; IFNET_RLOCK_NOSLEEP(); ifp = ifnet_byindex_locked(idx); if (ifp != NULL && (ifa = ifp->if_addr) != NULL) ifa_ref(ifa); IFNET_RUNLOCK_NOSLEEP(); return (ifa); } /* * Network interface utility routines. * * Routines with ifa_ifwith* names take sockaddr *'s as * parameters. */ static void vnet_if_init(const void *unused __unused) { TAILQ_INIT(&V_ifnet); TAILQ_INIT(&V_ifg_head); IFNET_WLOCK(); if_grow(); /* create initial table */ IFNET_WUNLOCK(); vnet_if_clone_init(); } VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init, NULL); #ifdef VIMAGE static void vnet_if_uninit(const void *unused __unused) { VNET_ASSERT(TAILQ_EMPTY(&V_ifnet), ("%s:%d tailq &V_ifnet=%p " "not empty", __func__, __LINE__, &V_ifnet)); VNET_ASSERT(TAILQ_EMPTY(&V_ifg_head), ("%s:%d tailq &V_ifg_head=%p " "not empty", __func__, __LINE__, &V_ifg_head)); free((caddr_t)V_ifindex_table, M_IFNET); } VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST, vnet_if_uninit, NULL); static void vnet_if_return(const void *unused __unused) { struct ifnet *ifp, *nifp; /* Return all inherited interfaces to their parent vnets. */ TAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) { if (ifp->if_home_vnet != ifp->if_vnet) if_vmove(ifp, ifp->if_home_vnet); } } VNET_SYSUNINIT(vnet_if_return, SI_SUB_VNET_DONE, SI_ORDER_ANY, vnet_if_return, NULL); #endif static void if_grow(void) { int oldlim; u_int n; struct ifnet **e; IFNET_WLOCK_ASSERT(); oldlim = V_if_indexlim; IFNET_WUNLOCK(); n = (oldlim << 1) * sizeof(*e); e = malloc(n, M_IFNET, M_WAITOK | M_ZERO); IFNET_WLOCK(); if (V_if_indexlim != oldlim) { free(e, M_IFNET); return; } if (V_ifindex_table != NULL) { memcpy((caddr_t)e, (caddr_t)V_ifindex_table, n/2); free((caddr_t)V_ifindex_table, M_IFNET); } V_if_indexlim <<= 1; V_ifindex_table = e; } /* * Allocate a struct ifnet and an index for an interface. A layer 2 * common structure will also be allocated if an allocation routine is * registered for the passed type. */ struct ifnet * if_alloc(u_char type) { struct ifnet *ifp; u_short idx; ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO); IFNET_WLOCK(); idx = ifindex_alloc(); ifnet_setbyindex_locked(idx, IFNET_HOLD); IFNET_WUNLOCK(); ifp->if_index = idx; ifp->if_type = type; ifp->if_alloctype = type; +#ifdef VIMAGE + ifp->if_vnet = curvnet; +#endif if (if_com_alloc[type] != NULL) { ifp->if_l2com = if_com_alloc[type](type, ifp); if (ifp->if_l2com == NULL) { free(ifp, M_IFNET); ifindex_free(idx); return (NULL); } } IF_ADDR_LOCK_INIT(ifp); TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp); ifp->if_afdata_initialized = 0; IF_AFDATA_LOCK_INIT(ifp); TAILQ_INIT(&ifp->if_addrhead); TAILQ_INIT(&ifp->if_multiaddrs); TAILQ_INIT(&ifp->if_groups); #ifdef MAC mac_ifnet_init(ifp); #endif ifq_init(&ifp->if_snd, ifp); refcount_init(&ifp->if_refcount, 1); /* Index reference. */ for (int i = 0; i < IFCOUNTERS; i++) ifp->if_counters[i] = counter_u64_alloc(M_WAITOK); ifp->if_get_counter = if_get_counter_default; ifnet_setbyindex(ifp->if_index, ifp); return (ifp); } /* * Do the actual work of freeing a struct ifnet, and layer 2 common * structure. This call is made when the last reference to an * interface is released. */ static void if_free_internal(struct ifnet *ifp) { KASSERT((ifp->if_flags & IFF_DYING), ("if_free_internal: interface not dying")); if (if_com_free[ifp->if_alloctype] != NULL) if_com_free[ifp->if_alloctype](ifp->if_l2com, ifp->if_alloctype); #ifdef MAC mac_ifnet_destroy(ifp); #endif /* MAC */ if (ifp->if_description != NULL) free(ifp->if_description, M_IFDESCR); IF_AFDATA_DESTROY(ifp); IF_ADDR_LOCK_DESTROY(ifp); ifq_delete(&ifp->if_snd); for (int i = 0; i < IFCOUNTERS; i++) counter_u64_free(ifp->if_counters[i]); free(ifp, M_IFNET); } /* * Deregister an interface and free the associated storage. */ void if_free(struct ifnet *ifp) { ifp->if_flags |= IFF_DYING; /* XXX: Locking */ CURVNET_SET_QUIET(ifp->if_vnet); IFNET_WLOCK(); KASSERT(ifp == ifnet_byindex_locked(ifp->if_index), ("%s: freeing unallocated ifnet", ifp->if_xname)); ifindex_free_locked(ifp->if_index); IFNET_WUNLOCK(); if (refcount_release(&ifp->if_refcount)) if_free_internal(ifp); CURVNET_RESTORE(); } /* * Interfaces to keep an ifnet type-stable despite the possibility of the * driver calling if_free(). If there are additional references, we defer * freeing the underlying data structure. */ void if_ref(struct ifnet *ifp) { /* We don't assert the ifnet list lock here, but arguably should. */ refcount_acquire(&ifp->if_refcount); } void if_rele(struct ifnet *ifp) { if (!refcount_release(&ifp->if_refcount)) return; if_free_internal(ifp); } void ifq_init(struct ifaltq *ifq, struct ifnet *ifp) { mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF); if (ifq->ifq_maxlen == 0) ifq->ifq_maxlen = ifqmaxlen; ifq->altq_type = 0; ifq->altq_disc = NULL; ifq->altq_flags &= ALTQF_CANTCHANGE; ifq->altq_tbr = NULL; ifq->altq_ifp = ifp; } void ifq_delete(struct ifaltq *ifq) { mtx_destroy(&ifq->ifq_mtx); } /* * Perform generic interface initialization tasks and attach the interface * to the list of "active" interfaces. If vmove flag is set on entry * to if_attach_internal(), perform only a limited subset of initialization * tasks, given that we are moving from one vnet to another an ifnet which * has already been fully initialized. * * Note that if_detach_internal() removes group membership unconditionally * even when vmove flag is set, and if_attach_internal() adds only IFG_ALL. * Thus, when if_vmove() is applied to a cloned interface, group membership * is lost while a cloned one always joins a group whose name is * ifc->ifc_name. To recover this after if_detach_internal() and * if_attach_internal(), the cloner should be specified to * if_attach_internal() via ifc. If it is non-NULL, if_attach_internal() * attempts to join a group whose name is ifc->ifc_name. * * XXX: * - The decision to return void and thus require this function to * succeed is questionable. * - We should probably do more sanity checking. For instance we don't * do anything to insure if_xname is unique or non-empty. */ void if_attach(struct ifnet *ifp) { if_attach_internal(ifp, 0, NULL); } /* * Compute the least common TSO limit. */ void if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *pmax) { /* * 1) If there is no limit currently, take the limit from * the network adapter. * * 2) If the network adapter has a limit below the current * limit, apply it. */ if (pmax->tsomaxbytes == 0 || (ifp->if_hw_tsomax != 0 && ifp->if_hw_tsomax < pmax->tsomaxbytes)) { pmax->tsomaxbytes = ifp->if_hw_tsomax; } if (pmax->tsomaxsegcount == 0 || (ifp->if_hw_tsomaxsegcount != 0 && ifp->if_hw_tsomaxsegcount < pmax->tsomaxsegcount)) { pmax->tsomaxsegcount = ifp->if_hw_tsomaxsegcount; } if (pmax->tsomaxsegsize == 0 || (ifp->if_hw_tsomaxsegsize != 0 && ifp->if_hw_tsomaxsegsize < pmax->tsomaxsegsize)) { pmax->tsomaxsegsize = ifp->if_hw_tsomaxsegsize; } } /* * Update TSO limit of a network adapter. * * Returns zero if no change. Else non-zero. */ int if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *pmax) { int retval = 0; if (ifp->if_hw_tsomax != pmax->tsomaxbytes) { ifp->if_hw_tsomax = pmax->tsomaxbytes; retval++; } if (ifp->if_hw_tsomaxsegsize != pmax->tsomaxsegsize) { ifp->if_hw_tsomaxsegsize = pmax->tsomaxsegsize; retval++; } if (ifp->if_hw_tsomaxsegcount != pmax->tsomaxsegcount) { ifp->if_hw_tsomaxsegcount = pmax->tsomaxsegcount; retval++; } return (retval); } static void if_attach_internal(struct ifnet *ifp, int vmove, struct if_clone *ifc) { unsigned socksize, ifasize; int namelen, masklen; struct sockaddr_dl *sdl; struct ifaddr *ifa; if (ifp->if_index == 0 || ifp != ifnet_byindex(ifp->if_index)) panic ("%s: BUG: if_attach called without if_alloc'd input()\n", ifp->if_xname); #ifdef VIMAGE ifp->if_vnet = curvnet; if (ifp->if_home_vnet == NULL) ifp->if_home_vnet = curvnet; #endif if_addgroup(ifp, IFG_ALL); /* Restore group membership for cloned interfaces. */ if (vmove && ifc != NULL) if_clone_addgroup(ifp, ifc); getmicrotime(&ifp->if_lastchange); ifp->if_epoch = time_uptime; KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) || (ifp->if_transmit != NULL && ifp->if_qflush != NULL), ("transmit and qflush must both either be set or both be NULL")); if (ifp->if_transmit == NULL) { ifp->if_transmit = if_transmit; ifp->if_qflush = if_qflush; } if (ifp->if_input == NULL) ifp->if_input = if_input_default; if (ifp->if_requestencap == NULL) ifp->if_requestencap = if_requestencap_default; if (!vmove) { #ifdef MAC mac_ifnet_create(ifp); #endif /* * Create a Link Level name for this device. */ namelen = strlen(ifp->if_xname); /* * Always save enough space for any possiable name so we * can do a rename in place later. */ masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ; socksize = masklen + ifp->if_addrlen; if (socksize < sizeof(*sdl)) socksize = sizeof(*sdl); socksize = roundup2(socksize, sizeof(long)); ifasize = sizeof(*ifa) + 2 * socksize; ifa = ifa_alloc(ifasize, M_WAITOK); sdl = (struct sockaddr_dl *)(ifa + 1); sdl->sdl_len = socksize; sdl->sdl_family = AF_LINK; bcopy(ifp->if_xname, sdl->sdl_data, namelen); sdl->sdl_nlen = namelen; sdl->sdl_index = ifp->if_index; sdl->sdl_type = ifp->if_type; ifp->if_addr = ifa; ifa->ifa_ifp = ifp; ifa->ifa_rtrequest = link_rtrequest; ifa->ifa_addr = (struct sockaddr *)sdl; sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl); ifa->ifa_netmask = (struct sockaddr *)sdl; sdl->sdl_len = masklen; while (namelen != 0) sdl->sdl_data[--namelen] = 0xff; TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link); /* Reliably crash if used uninitialized. */ ifp->if_broadcastaddr = NULL; #if defined(INET) || defined(INET6) /* Use defaults for TSO, if nothing is set */ if (ifp->if_hw_tsomax == 0 && ifp->if_hw_tsomaxsegcount == 0 && ifp->if_hw_tsomaxsegsize == 0) { /* * The TSO defaults needs to be such that an * NFS mbuf list of 35 mbufs totalling just * below 64K works and that a chain of mbufs * can be defragged into at most 32 segments: */ ifp->if_hw_tsomax = min(IP_MAXPACKET, (32 * MCLBYTES) - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)); ifp->if_hw_tsomaxsegcount = 35; ifp->if_hw_tsomaxsegsize = 2048; /* 2K */ /* XXX some drivers set IFCAP_TSO after ethernet attach */ if (ifp->if_capabilities & IFCAP_TSO) { if_printf(ifp, "Using defaults for TSO: %u/%u/%u\n", ifp->if_hw_tsomax, ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); } } #endif } #ifdef VIMAGE else { /* * Update the interface index in the link layer address * of the interface. */ for (ifa = ifp->if_addr; ifa != NULL; ifa = TAILQ_NEXT(ifa, ifa_link)) { if (ifa->ifa_addr->sa_family == AF_LINK) { sdl = (struct sockaddr_dl *)ifa->ifa_addr; sdl->sdl_index = ifp->if_index; } } } #endif IFNET_WLOCK(); TAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link); #ifdef VIMAGE curvnet->vnet_ifcnt++; #endif IFNET_WUNLOCK(); if (domain_init_status >= 2) if_attachdomain1(ifp); EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp); if (IS_DEFAULT_VNET(curvnet)) devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL); /* Announce the interface. */ rt_ifannouncemsg(ifp, IFAN_ARRIVAL); } static void if_attachdomain(void *dummy) { struct ifnet *ifp; TAILQ_FOREACH(ifp, &V_ifnet, if_link) if_attachdomain1(ifp); } SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND, if_attachdomain, NULL); static void if_attachdomain1(struct ifnet *ifp) { struct domain *dp; /* * Since dp->dom_ifattach calls malloc() with M_WAITOK, we * cannot lock ifp->if_afdata initialization, entirely. */ IF_AFDATA_LOCK(ifp); if (ifp->if_afdata_initialized >= domain_init_status) { IF_AFDATA_UNLOCK(ifp); log(LOG_WARNING, "%s called more than once on %s\n", __func__, ifp->if_xname); return; } ifp->if_afdata_initialized = domain_init_status; IF_AFDATA_UNLOCK(ifp); /* address family dependent data region */ bzero(ifp->if_afdata, sizeof(ifp->if_afdata)); for (dp = domains; dp; dp = dp->dom_next) { if (dp->dom_ifattach) ifp->if_afdata[dp->dom_family] = (*dp->dom_ifattach)(ifp); } } /* * Remove any unicast or broadcast network addresses from an interface. */ void if_purgeaddrs(struct ifnet *ifp) { struct ifaddr *ifa, *next; /* XXX cannot hold IF_ADDR_WLOCK over called functions. */ TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) { if (ifa->ifa_addr->sa_family == AF_LINK) continue; #ifdef INET /* XXX: Ugly!! ad hoc just for INET */ if (ifa->ifa_addr->sa_family == AF_INET) { struct ifaliasreq ifr; bzero(&ifr, sizeof(ifr)); ifr.ifra_addr = *ifa->ifa_addr; if (ifa->ifa_dstaddr) ifr.ifra_broadaddr = *ifa->ifa_dstaddr; if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp, NULL) == 0) continue; } #endif /* INET */ #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) { in6_purgeaddr(ifa); /* ifp_addrhead is already updated */ continue; } #endif /* INET6 */ IF_ADDR_WLOCK(ifp); TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_free(ifa); } } /* * Remove any multicast network addresses from an interface when an ifnet * is going away. */ static void if_purgemaddrs(struct ifnet *ifp) { struct ifmultiaddr *ifma; struct ifmultiaddr *next; IF_ADDR_WLOCK(ifp); TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) if_delmulti_locked(ifp, ifma, 1); IF_ADDR_WUNLOCK(ifp); } /* * Detach an interface, removing it from the list of "active" interfaces. * If vmove flag is set on entry to if_detach_internal(), perform only a * limited subset of cleanup tasks, given that we are moving an ifnet from * one vnet to another, where it must be fully operational. * * XXXRW: There are some significant questions about event ordering, and * how to prevent things from starting to use the interface during detach. */ void if_detach(struct ifnet *ifp) { CURVNET_SET_QUIET(ifp->if_vnet); if_detach_internal(ifp, 0, NULL); CURVNET_RESTORE(); } /* * The vmove flag, if set, indicates that we are called from a callpath * that is moving an interface to a different vnet instance. * * The shutdown flag, if set, indicates that we are called in the * process of shutting down a vnet instance. Currently only the * vnet_if_return SYSUNINIT function sets it. Note: we can be called * on a vnet instance shutdown without this flag being set, e.g., when * the cloned interfaces are destoyed as first thing of teardown. */ static int if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp) { struct ifaddr *ifa; int i; struct domain *dp; struct ifnet *iter; int found = 0; #ifdef VIMAGE int shutdown; shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET && ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0; #endif IFNET_WLOCK(); TAILQ_FOREACH(iter, &V_ifnet, if_link) if (iter == ifp) { TAILQ_REMOVE(&V_ifnet, ifp, if_link); found = 1; break; } IFNET_WUNLOCK(); if (!found) { /* * While we would want to panic here, we cannot * guarantee that the interface is indeed still on * the list given we don't hold locks all the way. */ return (ENOENT); #if 0 if (vmove) panic("%s: ifp=%p not on the ifnet tailq %p", __func__, ifp, &V_ifnet); else return; /* XXX this should panic as well? */ #endif } /* * At this point we know the interface still was on the ifnet list * and we removed it so we are in a stable state. */ #ifdef VIMAGE curvnet->vnet_ifcnt--; #endif /* * In any case (destroy or vmove) detach us from the groups * and remove/wait for pending events on the taskq. * XXX-BZ in theory an interface could still enqueue a taskq change? */ if_delgroups(ifp); taskqueue_drain(taskqueue_swi, &ifp->if_linktask); /* * Check if this is a cloned interface or not. Must do even if * shutting down as a if_vmove_reclaim() would move the ifp and * the if_clone_addgroup() will have a corrupted string overwise * from a gibberish pointer. */ if (vmove && ifcp != NULL) *ifcp = if_clone_findifc(ifp); if_down(ifp); #ifdef VIMAGE /* * On VNET shutdown abort here as the stack teardown will do all * the work top-down for us. */ if (shutdown) { /* * In case of a vmove we are done here without error. * If we would signal an error it would lead to the same * abort as if we did not find the ifnet anymore. * if_detach() calls us in void context and does not care * about an early abort notification, so life is splendid :) */ goto finish_vnet_shutdown; } #endif /* * At this point we are not tearing down a VNET and are either * going to destroy or vmove the interface and have to cleanup * accordingly. */ /* * Remove routes and flush queues. */ #ifdef ALTQ if (ALTQ_IS_ENABLED(&ifp->if_snd)) altq_disable(&ifp->if_snd); if (ALTQ_IS_ATTACHED(&ifp->if_snd)) altq_detach(&ifp->if_snd); #endif if_purgeaddrs(ifp); #ifdef INET in_ifdetach(ifp); #endif #ifdef INET6 /* * Remove all IPv6 kernel structs related to ifp. This should be done * before removing routing entries below, since IPv6 interface direct * routes are expected to be removed by the IPv6-specific kernel API. * Otherwise, the kernel will detect some inconsistency and bark it. */ in6_ifdetach(ifp); #endif if_purgemaddrs(ifp); /* Announce that the interface is gone. */ rt_ifannouncemsg(ifp, IFAN_DEPARTURE); EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); if (IS_DEFAULT_VNET(curvnet)) devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL); if (!vmove) { /* * Prevent further calls into the device driver via ifnet. */ if_dead(ifp); /* * Remove link ifaddr pointer and maybe decrement if_index. * Clean up all addresses. */ ifp->if_addr = NULL; /* We can now free link ifaddr. */ IF_ADDR_WLOCK(ifp); if (!TAILQ_EMPTY(&ifp->if_addrhead)) { ifa = TAILQ_FIRST(&ifp->if_addrhead); TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_free(ifa); } else IF_ADDR_WUNLOCK(ifp); } rt_flushifroutes(ifp); #ifdef VIMAGE finish_vnet_shutdown: #endif /* * We cannot hold the lock over dom_ifdetach calls as they might * sleep, for example trying to drain a callout, thus open up the * theoretical race with re-attaching. */ IF_AFDATA_LOCK(ifp); i = ifp->if_afdata_initialized; ifp->if_afdata_initialized = 0; IF_AFDATA_UNLOCK(ifp); for (dp = domains; i > 0 && dp; dp = dp->dom_next) { if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) { (*dp->dom_ifdetach)(ifp, ifp->if_afdata[dp->dom_family]); ifp->if_afdata[dp->dom_family] = NULL; } } return (0); } #ifdef VIMAGE /* * if_vmove() performs a limited version of if_detach() in current * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg. * An attempt is made to shrink if_index in current vnet, find an * unused if_index in target vnet and calls if_grow() if necessary, * and finally find an unused if_xname for the target vnet. */ static void if_vmove(struct ifnet *ifp, struct vnet *new_vnet) { struct if_clone *ifc; u_int bif_dlt, bif_hdrlen; int rc; /* * if_detach_internal() will call the eventhandler to notify * interface departure. That will detach if_bpf. We need to * safe the dlt and hdrlen so we can re-attach it later. */ bpf_get_bp_params(ifp->if_bpf, &bif_dlt, &bif_hdrlen); /* * Detach from current vnet, but preserve LLADDR info, do not * mark as dead etc. so that the ifnet can be reattached later. * If we cannot find it, we lost the race to someone else. */ rc = if_detach_internal(ifp, 1, &ifc); if (rc != 0) return; /* * Unlink the ifnet from ifindex_table[] in current vnet, and shrink * the if_index for that vnet if possible. * * NOTE: IFNET_WLOCK/IFNET_WUNLOCK() are assumed to be unvirtualized, * or we'd lock on one vnet and unlock on another. */ IFNET_WLOCK(); ifindex_free_locked(ifp->if_index); IFNET_WUNLOCK(); /* * Perform interface-specific reassignment tasks, if provided by * the driver. */ if (ifp->if_reassign != NULL) ifp->if_reassign(ifp, new_vnet, NULL); /* * Switch to the context of the target vnet. */ CURVNET_SET_QUIET(new_vnet); IFNET_WLOCK(); ifp->if_index = ifindex_alloc(); ifnet_setbyindex_locked(ifp->if_index, ifp); IFNET_WUNLOCK(); if_attach_internal(ifp, 1, ifc); if (ifp->if_bpf == NULL) bpfattach(ifp, bif_dlt, bif_hdrlen); CURVNET_RESTORE(); } /* * Move an ifnet to or from another child prison/vnet, specified by the jail id. */ static int if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid) { struct prison *pr; struct ifnet *difp; int shutdown; /* Try to find the prison within our visibility. */ sx_slock(&allprison_lock); pr = prison_find_child(td->td_ucred->cr_prison, jid); sx_sunlock(&allprison_lock); if (pr == NULL) return (ENXIO); prison_hold_locked(pr); mtx_unlock(&pr->pr_mtx); /* Do not try to move the iface from and to the same prison. */ if (pr->pr_vnet == ifp->if_vnet) { prison_free(pr); return (EEXIST); } /* Make sure the named iface does not exists in the dst. prison/vnet. */ /* XXX Lock interfaces to avoid races. */ CURVNET_SET_QUIET(pr->pr_vnet); difp = ifunit(ifname); if (difp != NULL) { CURVNET_RESTORE(); prison_free(pr); return (EEXIST); } /* Make sure the VNET is stable. */ shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET && ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0; if (shutdown) { CURVNET_RESTORE(); prison_free(pr); return (EBUSY); } CURVNET_RESTORE(); /* Move the interface into the child jail/vnet. */ if_vmove(ifp, pr->pr_vnet); /* Report the new if_xname back to the userland. */ sprintf(ifname, "%s", ifp->if_xname); prison_free(pr); return (0); } static int if_vmove_reclaim(struct thread *td, char *ifname, int jid) { struct prison *pr; struct vnet *vnet_dst; struct ifnet *ifp; int shutdown; /* Try to find the prison within our visibility. */ sx_slock(&allprison_lock); pr = prison_find_child(td->td_ucred->cr_prison, jid); sx_sunlock(&allprison_lock); if (pr == NULL) return (ENXIO); prison_hold_locked(pr); mtx_unlock(&pr->pr_mtx); /* Make sure the named iface exists in the source prison/vnet. */ CURVNET_SET(pr->pr_vnet); ifp = ifunit(ifname); /* XXX Lock to avoid races. */ if (ifp == NULL) { CURVNET_RESTORE(); prison_free(pr); return (ENXIO); } /* Do not try to move the iface from and to the same prison. */ vnet_dst = TD_TO_VNET(td); if (vnet_dst == ifp->if_vnet) { CURVNET_RESTORE(); prison_free(pr); return (EEXIST); } /* Make sure the VNET is stable. */ shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET && ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0; if (shutdown) { CURVNET_RESTORE(); prison_free(pr); return (EBUSY); } /* Get interface back from child jail/vnet. */ if_vmove(ifp, vnet_dst); CURVNET_RESTORE(); /* Report the new if_xname back to the userland. */ sprintf(ifname, "%s", ifp->if_xname); prison_free(pr); return (0); } #endif /* VIMAGE */ /* * Add a group to an interface */ int if_addgroup(struct ifnet *ifp, const char *groupname) { struct ifg_list *ifgl; struct ifg_group *ifg = NULL; struct ifg_member *ifgm; int new = 0; if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' && groupname[strlen(groupname) - 1] <= '9') return (EINVAL); IFNET_WLOCK(); TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) { IFNET_WUNLOCK(); return (EEXIST); } if ((ifgl = (struct ifg_list *)malloc(sizeof(struct ifg_list), M_TEMP, M_NOWAIT)) == NULL) { IFNET_WUNLOCK(); return (ENOMEM); } if ((ifgm = (struct ifg_member *)malloc(sizeof(struct ifg_member), M_TEMP, M_NOWAIT)) == NULL) { free(ifgl, M_TEMP); IFNET_WUNLOCK(); return (ENOMEM); } TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next) if (!strcmp(ifg->ifg_group, groupname)) break; if (ifg == NULL) { if ((ifg = (struct ifg_group *)malloc(sizeof(struct ifg_group), M_TEMP, M_NOWAIT)) == NULL) { free(ifgl, M_TEMP); free(ifgm, M_TEMP); IFNET_WUNLOCK(); return (ENOMEM); } strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group)); ifg->ifg_refcnt = 0; TAILQ_INIT(&ifg->ifg_members); TAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next); new = 1; } ifg->ifg_refcnt++; ifgl->ifgl_group = ifg; ifgm->ifgm_ifp = ifp; IF_ADDR_WLOCK(ifp); TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next); TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next); IF_ADDR_WUNLOCK(ifp); IFNET_WUNLOCK(); if (new) EVENTHANDLER_INVOKE(group_attach_event, ifg); EVENTHANDLER_INVOKE(group_change_event, groupname); return (0); } /* * Remove a group from an interface */ int if_delgroup(struct ifnet *ifp, const char *groupname) { struct ifg_list *ifgl; struct ifg_member *ifgm; IFNET_WLOCK(); TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) break; if (ifgl == NULL) { IFNET_WUNLOCK(); return (ENOENT); } IF_ADDR_WLOCK(ifp); TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next); IF_ADDR_WUNLOCK(ifp); TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) if (ifgm->ifgm_ifp == ifp) break; if (ifgm != NULL) { TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next); free(ifgm, M_TEMP); } if (--ifgl->ifgl_group->ifg_refcnt == 0) { TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next); IFNET_WUNLOCK(); EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group); free(ifgl->ifgl_group, M_TEMP); } else IFNET_WUNLOCK(); free(ifgl, M_TEMP); EVENTHANDLER_INVOKE(group_change_event, groupname); return (0); } /* * Remove an interface from all groups */ static void if_delgroups(struct ifnet *ifp) { struct ifg_list *ifgl; struct ifg_member *ifgm; char groupname[IFNAMSIZ]; IFNET_WLOCK(); while (!TAILQ_EMPTY(&ifp->if_groups)) { ifgl = TAILQ_FIRST(&ifp->if_groups); strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ); IF_ADDR_WLOCK(ifp); TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next); IF_ADDR_WUNLOCK(ifp); TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) if (ifgm->ifgm_ifp == ifp) break; if (ifgm != NULL) { TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next); free(ifgm, M_TEMP); } if (--ifgl->ifgl_group->ifg_refcnt == 0) { TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next); IFNET_WUNLOCK(); EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group); free(ifgl->ifgl_group, M_TEMP); } else IFNET_WUNLOCK(); free(ifgl, M_TEMP); EVENTHANDLER_INVOKE(group_change_event, groupname); IFNET_WLOCK(); } IFNET_WUNLOCK(); } /* * Stores all groups from an interface in memory pointed * to by data */ static int if_getgroup(struct ifgroupreq *data, struct ifnet *ifp) { int len, error; struct ifg_list *ifgl; struct ifg_req ifgrq, *ifgp; struct ifgroupreq *ifgr = data; if (ifgr->ifgr_len == 0) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) ifgr->ifgr_len += sizeof(struct ifg_req); IF_ADDR_RUNLOCK(ifp); return (0); } len = ifgr->ifgr_len; ifgp = ifgr->ifgr_groups; /* XXX: wire */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { if (len < sizeof(ifgrq)) { IF_ADDR_RUNLOCK(ifp); return (EINVAL); } bzero(&ifgrq, sizeof ifgrq); strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group, sizeof(ifgrq.ifgrq_group)); if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) { IF_ADDR_RUNLOCK(ifp); return (error); } len -= sizeof(ifgrq); ifgp++; } IF_ADDR_RUNLOCK(ifp); return (0); } /* * Stores all members of a group in memory pointed to by data */ static int if_getgroupmembers(struct ifgroupreq *data) { struct ifgroupreq *ifgr = data; struct ifg_group *ifg; struct ifg_member *ifgm; struct ifg_req ifgrq, *ifgp; int len, error; IFNET_RLOCK(); TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next) if (!strcmp(ifg->ifg_group, ifgr->ifgr_name)) break; if (ifg == NULL) { IFNET_RUNLOCK(); return (ENOENT); } if (ifgr->ifgr_len == 0) { TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) ifgr->ifgr_len += sizeof(ifgrq); IFNET_RUNLOCK(); return (0); } len = ifgr->ifgr_len; ifgp = ifgr->ifgr_groups; TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) { if (len < sizeof(ifgrq)) { IFNET_RUNLOCK(); return (EINVAL); } bzero(&ifgrq, sizeof ifgrq); strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname, sizeof(ifgrq.ifgrq_member)); if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) { IFNET_RUNLOCK(); return (error); } len -= sizeof(ifgrq); ifgp++; } IFNET_RUNLOCK(); return (0); } /* * Return counter values from counter(9)s stored in ifnet. */ uint64_t if_get_counter_default(struct ifnet *ifp, ift_counter cnt) { KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt)); return (counter_u64_fetch(ifp->if_counters[cnt])); } /* * Increase an ifnet counter. Usually used for counters shared * between the stack and a driver, but function supports them all. */ void if_inc_counter(struct ifnet *ifp, ift_counter cnt, int64_t inc) { KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt)); counter_u64_add(ifp->if_counters[cnt], inc); } /* * Copy data from ifnet to userland API structure if_data. */ void if_data_copy(struct ifnet *ifp, struct if_data *ifd) { ifd->ifi_type = ifp->if_type; ifd->ifi_physical = 0; ifd->ifi_addrlen = ifp->if_addrlen; ifd->ifi_hdrlen = ifp->if_hdrlen; ifd->ifi_link_state = ifp->if_link_state; ifd->ifi_vhid = 0; ifd->ifi_datalen = sizeof(struct if_data); ifd->ifi_mtu = ifp->if_mtu; ifd->ifi_metric = ifp->if_metric; ifd->ifi_baudrate = ifp->if_baudrate; ifd->ifi_hwassist = ifp->if_hwassist; ifd->ifi_epoch = ifp->if_epoch; ifd->ifi_lastchange = ifp->if_lastchange; ifd->ifi_ipackets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS); ifd->ifi_ierrors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS); ifd->ifi_opackets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS); ifd->ifi_oerrors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS); ifd->ifi_collisions = ifp->if_get_counter(ifp, IFCOUNTER_COLLISIONS); ifd->ifi_ibytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES); ifd->ifi_obytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES); ifd->ifi_imcasts = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS); ifd->ifi_omcasts = ifp->if_get_counter(ifp, IFCOUNTER_OMCASTS); ifd->ifi_iqdrops = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS); ifd->ifi_oqdrops = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS); ifd->ifi_noproto = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO); } /* * Wrapper functions for struct ifnet address list locking macros. These are * used by kernel modules to avoid encoding programming interface or binary * interface assumptions that may be violated when kernel-internal locking * approaches change. */ void if_addr_rlock(struct ifnet *ifp) { IF_ADDR_RLOCK(ifp); } void if_addr_runlock(struct ifnet *ifp) { IF_ADDR_RUNLOCK(ifp); } void if_maddr_rlock(if_t ifp) { IF_ADDR_RLOCK((struct ifnet *)ifp); } void if_maddr_runlock(if_t ifp) { IF_ADDR_RUNLOCK((struct ifnet *)ifp); } /* * Initialization, destruction and refcounting functions for ifaddrs. */ struct ifaddr * ifa_alloc(size_t size, int flags) { struct ifaddr *ifa; KASSERT(size >= sizeof(struct ifaddr), ("%s: invalid size %zu", __func__, size)); ifa = malloc(size, M_IFADDR, M_ZERO | flags); if (ifa == NULL) return (NULL); if ((ifa->ifa_opackets = counter_u64_alloc(flags)) == NULL) goto fail; if ((ifa->ifa_ipackets = counter_u64_alloc(flags)) == NULL) goto fail; if ((ifa->ifa_obytes = counter_u64_alloc(flags)) == NULL) goto fail; if ((ifa->ifa_ibytes = counter_u64_alloc(flags)) == NULL) goto fail; refcount_init(&ifa->ifa_refcnt, 1); return (ifa); fail: /* free(NULL) is okay */ counter_u64_free(ifa->ifa_opackets); counter_u64_free(ifa->ifa_ipackets); counter_u64_free(ifa->ifa_obytes); counter_u64_free(ifa->ifa_ibytes); free(ifa, M_IFADDR); return (NULL); } void ifa_ref(struct ifaddr *ifa) { refcount_acquire(&ifa->ifa_refcnt); } void ifa_free(struct ifaddr *ifa) { if (refcount_release(&ifa->ifa_refcnt)) { counter_u64_free(ifa->ifa_opackets); counter_u64_free(ifa->ifa_ipackets); counter_u64_free(ifa->ifa_obytes); counter_u64_free(ifa->ifa_ibytes); free(ifa, M_IFADDR); } } static int ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa, struct sockaddr *ia) { int error; struct rt_addrinfo info; struct sockaddr_dl null_sdl; struct ifnet *ifp; ifp = ifa->ifa_ifp; bzero(&info, sizeof(info)); if (cmd != RTM_DELETE) info.rti_ifp = V_loif; info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC; info.rti_info[RTAX_DST] = ia; info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl; link_init_sdl(ifp, (struct sockaddr *)&null_sdl, ifp->if_type); error = rtrequest1_fib(cmd, &info, NULL, ifp->if_fib); if (error != 0) log(LOG_DEBUG, "%s: %s failed for interface %s: %u\n", __func__, otype, if_name(ifp), error); return (error); } int ifa_add_loopback_route(struct ifaddr *ifa, struct sockaddr *ia) { return (ifa_maintain_loopback_route(RTM_ADD, "insertion", ifa, ia)); } int ifa_del_loopback_route(struct ifaddr *ifa, struct sockaddr *ia) { return (ifa_maintain_loopback_route(RTM_DELETE, "deletion", ifa, ia)); } int ifa_switch_loopback_route(struct ifaddr *ifa, struct sockaddr *ia) { return (ifa_maintain_loopback_route(RTM_CHANGE, "switch", ifa, ia)); } /* * XXX: Because sockaddr_dl has deeper structure than the sockaddr * structs used to represent other address families, it is necessary * to perform a different comparison. */ #define sa_dl_equal(a1, a2) \ ((((const struct sockaddr_dl *)(a1))->sdl_len == \ ((const struct sockaddr_dl *)(a2))->sdl_len) && \ (bcmp(CLLADDR((const struct sockaddr_dl *)(a1)), \ CLLADDR((const struct sockaddr_dl *)(a2)), \ ((const struct sockaddr_dl *)(a1))->sdl_alen) == 0)) /* * Locate an interface based on a complete address. */ /*ARGSUSED*/ static struct ifaddr * ifa_ifwithaddr_internal(const struct sockaddr *addr, int getref) { struct ifnet *ifp; struct ifaddr *ifa; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if (sa_equal(addr, ifa->ifa_addr)) { if (getref) ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); goto done; } /* IP6 doesn't have broadcast */ if ((ifp->if_flags & IFF_BROADCAST) && ifa->ifa_broadaddr && ifa->ifa_broadaddr->sa_len != 0 && sa_equal(ifa->ifa_broadaddr, addr)) { if (getref) ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); goto done; } } IF_ADDR_RUNLOCK(ifp); } ifa = NULL; done: IFNET_RUNLOCK_NOSLEEP(); return (ifa); } struct ifaddr * ifa_ifwithaddr(const struct sockaddr *addr) { return (ifa_ifwithaddr_internal(addr, 1)); } int ifa_ifwithaddr_check(const struct sockaddr *addr) { return (ifa_ifwithaddr_internal(addr, 0) != NULL); } /* * Locate an interface based on the broadcast address. */ /* ARGSUSED */ struct ifaddr * ifa_ifwithbroadaddr(const struct sockaddr *addr, int fibnum) { struct ifnet *ifp; struct ifaddr *ifa; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) continue; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if ((ifp->if_flags & IFF_BROADCAST) && ifa->ifa_broadaddr && ifa->ifa_broadaddr->sa_len != 0 && sa_equal(ifa->ifa_broadaddr, addr)) { ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); goto done; } } IF_ADDR_RUNLOCK(ifp); } ifa = NULL; done: IFNET_RUNLOCK_NOSLEEP(); return (ifa); } /* * Locate the point to point interface with a given destination address. */ /*ARGSUSED*/ struct ifaddr * ifa_ifwithdstaddr(const struct sockaddr *addr, int fibnum) { struct ifnet *ifp; struct ifaddr *ifa; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if ((ifp->if_flags & IFF_POINTOPOINT) == 0) continue; if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) continue; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if (ifa->ifa_dstaddr != NULL && sa_equal(addr, ifa->ifa_dstaddr)) { ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); goto done; } } IF_ADDR_RUNLOCK(ifp); } ifa = NULL; done: IFNET_RUNLOCK_NOSLEEP(); return (ifa); } /* * Find an interface on a specific network. If many, choice * is most specific found. */ struct ifaddr * ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum) { struct ifnet *ifp; struct ifaddr *ifa; struct ifaddr *ifa_maybe = NULL; u_int af = addr->sa_family; const char *addr_data = addr->sa_data, *cplim; /* * AF_LINK addresses can be looked up directly by their index number, * so do that if we can. */ if (af == AF_LINK) { const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)addr; if (sdl->sdl_index && sdl->sdl_index <= V_if_index) return (ifaddr_byindex(sdl->sdl_index)); } /* * Scan though each interface, looking for ones that have addresses * in this address family and the requested fib. Maintain a reference * on ifa_maybe once we find one, as we release the IF_ADDR_RLOCK() that * kept it stable when we move onto the next interface. */ IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) continue; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { const char *cp, *cp2, *cp3; if (ifa->ifa_addr->sa_family != af) next: continue; if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT && !ignore_ptp) { /* * This is a bit broken as it doesn't * take into account that the remote end may * be a single node in the network we are * looking for. * The trouble is that we don't know the * netmask for the remote end. */ if (ifa->ifa_dstaddr != NULL && sa_equal(addr, ifa->ifa_dstaddr)) { ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); goto done; } } else { /* * Scan all the bits in the ifa's address. * If a bit dissagrees with what we are * looking for, mask it with the netmask * to see if it really matters. * (A byte at a time) */ if (ifa->ifa_netmask == 0) continue; cp = addr_data; cp2 = ifa->ifa_addr->sa_data; cp3 = ifa->ifa_netmask->sa_data; cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; while (cp3 < cplim) if ((*cp++ ^ *cp2++) & *cp3++) goto next; /* next address! */ /* * If the netmask of what we just found * is more specific than what we had before * (if we had one), or if the virtual status * of new prefix is better than of the old one, * then remember the new one before continuing * to search for an even better one. */ if (ifa_maybe == NULL || ifa_preferred(ifa_maybe, ifa) || rn_refines((caddr_t)ifa->ifa_netmask, (caddr_t)ifa_maybe->ifa_netmask)) { if (ifa_maybe != NULL) ifa_free(ifa_maybe); ifa_maybe = ifa; ifa_ref(ifa_maybe); } } } IF_ADDR_RUNLOCK(ifp); } ifa = ifa_maybe; ifa_maybe = NULL; done: IFNET_RUNLOCK_NOSLEEP(); if (ifa_maybe != NULL) ifa_free(ifa_maybe); return (ifa); } /* * Find an interface address specific to an interface best matching * a given address. */ struct ifaddr * ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp) { struct ifaddr *ifa; const char *cp, *cp2, *cp3; char *cplim; struct ifaddr *ifa_maybe = NULL; u_int af = addr->sa_family; if (af >= AF_MAX) return (NULL); IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != af) continue; if (ifa_maybe == NULL) ifa_maybe = ifa; if (ifa->ifa_netmask == 0) { if (sa_equal(addr, ifa->ifa_addr) || (ifa->ifa_dstaddr && sa_equal(addr, ifa->ifa_dstaddr))) goto done; continue; } if (ifp->if_flags & IFF_POINTOPOINT) { if (sa_equal(addr, ifa->ifa_dstaddr)) goto done; } else { cp = addr->sa_data; cp2 = ifa->ifa_addr->sa_data; cp3 = ifa->ifa_netmask->sa_data; cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; for (; cp3 < cplim; cp3++) if ((*cp++ ^ *cp2++) & *cp3) break; if (cp3 == cplim) goto done; } } ifa = ifa_maybe; done: if (ifa != NULL) ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); return (ifa); } /* * See whether new ifa is better than current one: * 1) A non-virtual one is preferred over virtual. * 2) A virtual in master state preferred over any other state. * * Used in several address selecting functions. */ int ifa_preferred(struct ifaddr *cur, struct ifaddr *next) { return (cur->ifa_carp && (!next->ifa_carp || ((*carp_master_p)(next) && !(*carp_master_p)(cur)))); } #include /* * Default action when installing a route with a Link Level gateway. * Lookup an appropriate real ifa to point to. * This should be moved to /sys/net/link.c eventually. */ static void link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info) { struct ifaddr *ifa, *oifa; struct sockaddr *dst; struct ifnet *ifp; if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == NULL) || ((ifp = ifa->ifa_ifp) == NULL) || ((dst = rt_key(rt)) == NULL)) return; ifa = ifaof_ifpforaddr(dst, ifp); if (ifa) { oifa = rt->rt_ifa; rt->rt_ifa = ifa; ifa_free(oifa); if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest) ifa->ifa_rtrequest(cmd, rt, info); } } struct sockaddr_dl * link_alloc_sdl(size_t size, int flags) { return (malloc(size, M_TEMP, flags)); } void link_free_sdl(struct sockaddr *sa) { free(sa, M_TEMP); } /* * Fills in given sdl with interface basic info. * Returns pointer to filled sdl. */ struct sockaddr_dl * link_init_sdl(struct ifnet *ifp, struct sockaddr *paddr, u_char iftype) { struct sockaddr_dl *sdl; sdl = (struct sockaddr_dl *)paddr; memset(sdl, 0, sizeof(struct sockaddr_dl)); sdl->sdl_len = sizeof(struct sockaddr_dl); sdl->sdl_family = AF_LINK; sdl->sdl_index = ifp->if_index; sdl->sdl_type = iftype; return (sdl); } /* * Mark an interface down and notify protocols of * the transition. */ static void if_unroute(struct ifnet *ifp, int flag, int fam) { struct ifaddr *ifa; KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP")); ifp->if_flags &= ~flag; getmicrotime(&ifp->if_lastchange); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) pfctlinput(PRC_IFDOWN, ifa->ifa_addr); ifp->if_qflush(ifp); if (ifp->if_carp) (*carp_linkstate_p)(ifp); rt_ifmsg(ifp); } /* * Mark an interface up and notify protocols of * the transition. */ static void if_route(struct ifnet *ifp, int flag, int fam) { struct ifaddr *ifa; KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP")); ifp->if_flags |= flag; getmicrotime(&ifp->if_lastchange); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) pfctlinput(PRC_IFUP, ifa->ifa_addr); if (ifp->if_carp) (*carp_linkstate_p)(ifp); rt_ifmsg(ifp); #ifdef INET6 in6_if_up(ifp); #endif } void (*vlan_link_state_p)(struct ifnet *); /* XXX: private from if_vlan */ void (*vlan_trunk_cap_p)(struct ifnet *); /* XXX: private from if_vlan */ struct ifnet *(*vlan_trunkdev_p)(struct ifnet *); struct ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t); int (*vlan_tag_p)(struct ifnet *, uint16_t *); int (*vlan_setcookie_p)(struct ifnet *, void *); void *(*vlan_cookie_p)(struct ifnet *); /* * Handle a change in the interface link state. To avoid LORs * between driver lock and upper layer locks, as well as possible * recursions, we post event to taskqueue, and all job * is done in static do_link_state_change(). */ void if_link_state_change(struct ifnet *ifp, int link_state) { /* Return if state hasn't changed. */ if (ifp->if_link_state == link_state) return; ifp->if_link_state = link_state; taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask); } static void do_link_state_change(void *arg, int pending) { struct ifnet *ifp = (struct ifnet *)arg; int link_state = ifp->if_link_state; CURVNET_SET(ifp->if_vnet); /* Notify that the link state has changed. */ rt_ifmsg(ifp); if (ifp->if_vlantrunk != NULL) (*vlan_link_state_p)(ifp); if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) && ifp->if_l2com != NULL) (*ng_ether_link_state_p)(ifp, link_state); if (ifp->if_carp) (*carp_linkstate_p)(ifp); if (ifp->if_bridge) (*bridge_linkstate_p)(ifp); if (ifp->if_lagg) (*lagg_linkstate_p)(ifp, link_state); if (IS_DEFAULT_VNET(curvnet)) devctl_notify("IFNET", ifp->if_xname, (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL); if (pending > 1) if_printf(ifp, "%d link states coalesced\n", pending); if (log_link_state_change) log(LOG_NOTICE, "%s: link state changed to %s\n", ifp->if_xname, (link_state == LINK_STATE_UP) ? "UP" : "DOWN" ); EVENTHANDLER_INVOKE(ifnet_link_event, ifp, ifp->if_link_state); CURVNET_RESTORE(); } /* * Mark an interface down and notify protocols of * the transition. */ void if_down(struct ifnet *ifp) { if_unroute(ifp, IFF_UP, AF_UNSPEC); } /* * Mark an interface up and notify protocols of * the transition. */ void if_up(struct ifnet *ifp) { if_route(ifp, IFF_UP, AF_UNSPEC); } /* * Flush an interface queue. */ void if_qflush(struct ifnet *ifp) { struct mbuf *m, *n; struct ifaltq *ifq; ifq = &ifp->if_snd; IFQ_LOCK(ifq); #ifdef ALTQ if (ALTQ_IS_ENABLED(ifq)) ALTQ_PURGE(ifq); #endif n = ifq->ifq_head; while ((m = n) != NULL) { n = m->m_nextpkt; m_freem(m); } ifq->ifq_head = 0; ifq->ifq_tail = 0; ifq->ifq_len = 0; IFQ_UNLOCK(ifq); } /* * Map interface name to interface structure pointer, with or without * returning a reference. */ struct ifnet * ifunit_ref(const char *name) { struct ifnet *ifp; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 && !(ifp->if_flags & IFF_DYING)) break; } if (ifp != NULL) if_ref(ifp); IFNET_RUNLOCK_NOSLEEP(); return (ifp); } struct ifnet * ifunit(const char *name) { struct ifnet *ifp; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0) break; } IFNET_RUNLOCK_NOSLEEP(); return (ifp); } /* * Hardware specific interface ioctls. */ static int ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) { struct ifreq *ifr; int error = 0; int new_flags, temp_flags; size_t namelen, onamelen; size_t descrlen; char *descrbuf, *odescrbuf; char new_name[IFNAMSIZ]; struct ifaddr *ifa; struct sockaddr_dl *sdl; ifr = (struct ifreq *)data; switch (cmd) { case SIOCGIFINDEX: ifr->ifr_index = ifp->if_index; break; case SIOCGIFFLAGS: temp_flags = ifp->if_flags | ifp->if_drv_flags; ifr->ifr_flags = temp_flags & 0xffff; ifr->ifr_flagshigh = temp_flags >> 16; break; case SIOCGIFCAP: ifr->ifr_reqcap = ifp->if_capabilities; ifr->ifr_curcap = ifp->if_capenable; break; #ifdef MAC case SIOCGIFMAC: error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp); break; #endif case SIOCGIFMETRIC: ifr->ifr_metric = ifp->if_metric; break; case SIOCGIFMTU: ifr->ifr_mtu = ifp->if_mtu; break; case SIOCGIFPHYS: /* XXXGL: did this ever worked? */ ifr->ifr_phys = 0; break; case SIOCGIFDESCR: error = 0; sx_slock(&ifdescr_sx); if (ifp->if_description == NULL) error = ENOMSG; else { /* space for terminating nul */ descrlen = strlen(ifp->if_description) + 1; if (ifr->ifr_buffer.length < descrlen) ifr->ifr_buffer.buffer = NULL; else error = copyout(ifp->if_description, ifr->ifr_buffer.buffer, descrlen); ifr->ifr_buffer.length = descrlen; } sx_sunlock(&ifdescr_sx); break; case SIOCSIFDESCR: error = priv_check(td, PRIV_NET_SETIFDESCR); if (error) return (error); /* * Copy only (length-1) bytes to make sure that * if_description is always nul terminated. The * length parameter is supposed to count the * terminating nul in. */ if (ifr->ifr_buffer.length > ifdescr_maxlen) return (ENAMETOOLONG); else if (ifr->ifr_buffer.length == 0) descrbuf = NULL; else { descrbuf = malloc(ifr->ifr_buffer.length, M_IFDESCR, M_WAITOK | M_ZERO); error = copyin(ifr->ifr_buffer.buffer, descrbuf, ifr->ifr_buffer.length - 1); if (error) { free(descrbuf, M_IFDESCR); break; } } sx_xlock(&ifdescr_sx); odescrbuf = ifp->if_description; ifp->if_description = descrbuf; sx_xunlock(&ifdescr_sx); getmicrotime(&ifp->if_lastchange); free(odescrbuf, M_IFDESCR); break; case SIOCGIFFIB: ifr->ifr_fib = ifp->if_fib; break; case SIOCSIFFIB: error = priv_check(td, PRIV_NET_SETIFFIB); if (error) return (error); if (ifr->ifr_fib >= rt_numfibs) return (EINVAL); ifp->if_fib = ifr->ifr_fib; break; case SIOCSIFFLAGS: error = priv_check(td, PRIV_NET_SETIFFLAGS); if (error) return (error); /* * Currently, no driver owned flags pass the IFF_CANTCHANGE * check, so we don't need special handling here yet. */ new_flags = (ifr->ifr_flags & 0xffff) | (ifr->ifr_flagshigh << 16); if (ifp->if_flags & IFF_UP && (new_flags & IFF_UP) == 0) { if_down(ifp); } else if (new_flags & IFF_UP && (ifp->if_flags & IFF_UP) == 0) { if_up(ifp); } /* See if permanently promiscuous mode bit is about to flip */ if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) { if (new_flags & IFF_PPROMISC) ifp->if_flags |= IFF_PROMISC; else if (ifp->if_pcount == 0) ifp->if_flags &= ~IFF_PROMISC; if (log_promisc_mode_change) log(LOG_INFO, "%s: permanently promiscuous mode %s\n", ifp->if_xname, ((new_flags & IFF_PPROMISC) ? "enabled" : "disabled")); } ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) | (new_flags &~ IFF_CANTCHANGE); if (ifp->if_ioctl) { (void) (*ifp->if_ioctl)(ifp, cmd, data); } getmicrotime(&ifp->if_lastchange); break; case SIOCSIFCAP: error = priv_check(td, PRIV_NET_SETIFCAP); if (error) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); if (ifr->ifr_reqcap & ~ifp->if_capabilities) return (EINVAL); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) getmicrotime(&ifp->if_lastchange); break; #ifdef MAC case SIOCSIFMAC: error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp); break; #endif case SIOCSIFNAME: error = priv_check(td, PRIV_NET_SETIFNAME); if (error) return (error); error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL); if (error != 0) return (error); if (new_name[0] == '\0') return (EINVAL); if (new_name[IFNAMSIZ-1] != '\0') { new_name[IFNAMSIZ-1] = '\0'; if (strlen(new_name) == IFNAMSIZ-1) return (EINVAL); } if (ifunit(new_name) != NULL) return (EEXIST); /* * XXX: Locking. Nothing else seems to lock if_flags, * and there are numerous other races with the * ifunit() checks not being atomic with namespace * changes (renames, vmoves, if_attach, etc). */ ifp->if_flags |= IFF_RENAMING; /* Announce the departure of the interface. */ rt_ifannouncemsg(ifp, IFAN_DEPARTURE); EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); log(LOG_INFO, "%s: changing name to '%s'\n", ifp->if_xname, new_name); IF_ADDR_WLOCK(ifp); strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname)); ifa = ifp->if_addr; sdl = (struct sockaddr_dl *)ifa->ifa_addr; namelen = strlen(new_name); onamelen = sdl->sdl_nlen; /* * Move the address if needed. This is safe because we * allocate space for a name of length IFNAMSIZ when we * create this in if_attach(). */ if (namelen != onamelen) { bcopy(sdl->sdl_data + onamelen, sdl->sdl_data + namelen, sdl->sdl_alen); } bcopy(new_name, sdl->sdl_data, namelen); sdl->sdl_nlen = namelen; sdl = (struct sockaddr_dl *)ifa->ifa_netmask; bzero(sdl->sdl_data, onamelen); while (namelen != 0) sdl->sdl_data[--namelen] = 0xff; IF_ADDR_WUNLOCK(ifp); EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp); /* Announce the return of the interface. */ rt_ifannouncemsg(ifp, IFAN_ARRIVAL); ifp->if_flags &= ~IFF_RENAMING; break; #ifdef VIMAGE case SIOCSIFVNET: error = priv_check(td, PRIV_NET_SETIFVNET); if (error) return (error); error = if_vmove_loan(td, ifp, ifr->ifr_name, ifr->ifr_jid); break; #endif case SIOCSIFMETRIC: error = priv_check(td, PRIV_NET_SETIFMETRIC); if (error) return (error); ifp->if_metric = ifr->ifr_metric; getmicrotime(&ifp->if_lastchange); break; case SIOCSIFPHYS: error = priv_check(td, PRIV_NET_SETIFPHYS); if (error) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCSIFMTU: { u_long oldmtu = ifp->if_mtu; error = priv_check(td, PRIV_NET_SETIFMTU); if (error) return (error); if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) return (EINVAL); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) { getmicrotime(&ifp->if_lastchange); rt_ifmsg(ifp); } /* * If the link MTU changed, do network layer specific procedure. */ if (ifp->if_mtu != oldmtu) { #ifdef INET6 nd6_setmtu(ifp); #endif rt_updatemtu(ifp); } break; } case SIOCADDMULTI: case SIOCDELMULTI: if (cmd == SIOCADDMULTI) error = priv_check(td, PRIV_NET_ADDMULTI); else error = priv_check(td, PRIV_NET_DELMULTI); if (error) return (error); /* Don't allow group membership on non-multicast interfaces. */ if ((ifp->if_flags & IFF_MULTICAST) == 0) return (EOPNOTSUPP); /* Don't let users screw up protocols' entries. */ if (ifr->ifr_addr.sa_family != AF_LINK) return (EINVAL); if (cmd == SIOCADDMULTI) { struct ifmultiaddr *ifma; /* * Userland is only permitted to join groups once * via the if_addmulti() KPI, because it cannot hold * struct ifmultiaddr * between calls. It may also * lose a race while we check if the membership * already exists. */ IF_ADDR_RLOCK(ifp); ifma = if_findmulti(ifp, &ifr->ifr_addr); IF_ADDR_RUNLOCK(ifp); if (ifma != NULL) error = EADDRINUSE; else error = if_addmulti(ifp, &ifr->ifr_addr, &ifma); } else { error = if_delmulti(ifp, &ifr->ifr_addr); } if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCSIFPHYADDR: case SIOCDIFPHYADDR: #ifdef INET6 case SIOCSIFPHYADDR_IN6: #endif case SIOCSIFMEDIA: case SIOCSIFGENERIC: error = priv_check(td, PRIV_NET_HWIOCTL); if (error) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCGIFSTATUS: case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: case SIOCGIFMEDIA: case SIOCGIFXMEDIA: case SIOCGIFGENERIC: if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); break; case SIOCSIFLLADDR: error = priv_check(td, PRIV_NET_SETLLADDR); if (error) return (error); error = if_setlladdr(ifp, ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len); break; case SIOCAIFGROUP: { struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr; error = priv_check(td, PRIV_NET_ADDIFGROUP); if (error) return (error); if ((error = if_addgroup(ifp, ifgr->ifgr_group))) return (error); break; } case SIOCGIFGROUP: if ((error = if_getgroup((struct ifgroupreq *)ifr, ifp))) return (error); break; case SIOCDIFGROUP: { struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr; error = priv_check(td, PRIV_NET_DELIFGROUP); if (error) return (error); if ((error = if_delgroup(ifp, ifgr->ifgr_group))) return (error); break; } default: error = ENOIOCTL; break; } return (error); } #ifdef COMPAT_FREEBSD32 struct ifconf32 { int32_t ifc_len; union { uint32_t ifcu_buf; uint32_t ifcu_req; } ifc_ifcu; }; #define SIOCGIFCONF32 _IOWR('i', 36, struct ifconf32) #endif /* * Interface ioctls. */ int ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td) { struct ifnet *ifp; struct ifreq *ifr; int error; int oif_flags; #ifdef VIMAGE int shutdown; #endif CURVNET_SET(so->so_vnet); #ifdef VIMAGE /* Make sure the VNET is stable. */ shutdown = (so->so_vnet->vnet_state > SI_SUB_VNET && so->so_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0; if (shutdown) { CURVNET_RESTORE(); return (EBUSY); } #endif switch (cmd) { case SIOCGIFCONF: error = ifconf(cmd, data); CURVNET_RESTORE(); return (error); #ifdef COMPAT_FREEBSD32 case SIOCGIFCONF32: { struct ifconf32 *ifc32; struct ifconf ifc; ifc32 = (struct ifconf32 *)data; ifc.ifc_len = ifc32->ifc_len; ifc.ifc_buf = PTRIN(ifc32->ifc_buf); error = ifconf(SIOCGIFCONF, (void *)&ifc); CURVNET_RESTORE(); if (error == 0) ifc32->ifc_len = ifc.ifc_len; return (error); } #endif } ifr = (struct ifreq *)data; switch (cmd) { #ifdef VIMAGE case SIOCSIFRVNET: error = priv_check(td, PRIV_NET_SETIFVNET); if (error == 0) error = if_vmove_reclaim(td, ifr->ifr_name, ifr->ifr_jid); CURVNET_RESTORE(); return (error); #endif case SIOCIFCREATE: case SIOCIFCREATE2: error = priv_check(td, PRIV_NET_IFCREATE); if (error == 0) error = if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name), cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL); CURVNET_RESTORE(); return (error); case SIOCIFDESTROY: error = priv_check(td, PRIV_NET_IFDESTROY); if (error == 0) error = if_clone_destroy(ifr->ifr_name); CURVNET_RESTORE(); return (error); case SIOCIFGCLONERS: error = if_clone_list((struct if_clonereq *)data); CURVNET_RESTORE(); return (error); case SIOCGIFGMEMB: error = if_getgroupmembers((struct ifgroupreq *)data); CURVNET_RESTORE(); return (error); #if defined(INET) || defined(INET6) case SIOCSVH: case SIOCGVH: if (carp_ioctl_p == NULL) error = EPROTONOSUPPORT; else error = (*carp_ioctl_p)(ifr, cmd, td); CURVNET_RESTORE(); return (error); #endif } ifp = ifunit_ref(ifr->ifr_name); if (ifp == NULL) { CURVNET_RESTORE(); return (ENXIO); } error = ifhwioctl(cmd, ifp, data, td); if (error != ENOIOCTL) { if_rele(ifp); CURVNET_RESTORE(); return (error); } oif_flags = ifp->if_flags; if (so->so_proto == NULL) { if_rele(ifp); CURVNET_RESTORE(); return (EOPNOTSUPP); } /* * Pass the request on to the socket control method, and if the * latter returns EOPNOTSUPP, directly to the interface. * * Make an exception for the legacy SIOCSIF* requests. Drivers * trust SIOCSIFADDR et al to come from an already privileged * layer, and do not perform any credentials checks or input * validation. */ error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data, ifp, td)); if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL && cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR && cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK) error = (*ifp->if_ioctl)(ifp, cmd, data); if ((oif_flags ^ ifp->if_flags) & IFF_UP) { #ifdef INET6 if (ifp->if_flags & IFF_UP) in6_if_up(ifp); #endif } if_rele(ifp); CURVNET_RESTORE(); return (error); } /* * The code common to handling reference counted flags, * e.g., in ifpromisc() and if_allmulti(). * The "pflag" argument can specify a permanent mode flag to check, * such as IFF_PPROMISC for promiscuous mode; should be 0 if none. * * Only to be used on stack-owned flags, not driver-owned flags. */ static int if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch) { struct ifreq ifr; int error; int oldflags, oldcount; /* Sanity checks to catch programming errors */ KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0, ("%s: setting driver-owned flag %d", __func__, flag)); if (onswitch) KASSERT(*refcount >= 0, ("%s: increment negative refcount %d for flag %d", __func__, *refcount, flag)); else KASSERT(*refcount > 0, ("%s: decrement non-positive refcount %d for flag %d", __func__, *refcount, flag)); /* In case this mode is permanent, just touch refcount */ if (ifp->if_flags & pflag) { *refcount += onswitch ? 1 : -1; return (0); } /* Save ifnet parameters for if_ioctl() may fail */ oldcount = *refcount; oldflags = ifp->if_flags; /* * See if we aren't the only and touching refcount is enough. * Actually toggle interface flag if we are the first or last. */ if (onswitch) { if ((*refcount)++) return (0); ifp->if_flags |= flag; } else { if (--(*refcount)) return (0); ifp->if_flags &= ~flag; } /* Call down the driver since we've changed interface flags */ if (ifp->if_ioctl == NULL) { error = EOPNOTSUPP; goto recover; } ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); if (error) goto recover; /* Notify userland that interface flags have changed */ rt_ifmsg(ifp); return (0); recover: /* Recover after driver error */ *refcount = oldcount; ifp->if_flags = oldflags; return (error); } /* * Set/clear promiscuous mode on interface ifp based on the truth value * of pswitch. The calls are reference counted so that only the first * "on" request actually has an effect, as does the final "off" request. * Results are undefined if the "off" and "on" requests are not matched. */ int ifpromisc(struct ifnet *ifp, int pswitch) { int error; int oldflags = ifp->if_flags; error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC, &ifp->if_pcount, pswitch); /* If promiscuous mode status has changed, log a message */ if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC) && log_promisc_mode_change) log(LOG_INFO, "%s: promiscuous mode %s\n", ifp->if_xname, (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled"); return (error); } /* * Return interface configuration * of system. List may be used * in later ioctl's (above) to get * other information. */ /*ARGSUSED*/ static int ifconf(u_long cmd, caddr_t data) { struct ifconf *ifc = (struct ifconf *)data; struct ifnet *ifp; struct ifaddr *ifa; struct ifreq ifr; struct sbuf *sb; int error, full = 0, valid_len, max_len; /* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */ max_len = MAXPHYS - 1; /* Prevent hostile input from being able to crash the system */ if (ifc->ifc_len <= 0) return (EINVAL); again: if (ifc->ifc_len <= max_len) { max_len = ifc->ifc_len; full = 1; } sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN); max_len = 0; valid_len = 0; IFNET_RLOCK(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { int addrs; /* * Zero the ifr_name buffer to make sure we don't * disclose the contents of the stack. */ memset(ifr.ifr_name, 0, sizeof(ifr.ifr_name)); if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)) >= sizeof(ifr.ifr_name)) { sbuf_delete(sb); IFNET_RUNLOCK(); return (ENAMETOOLONG); } addrs = 0; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct sockaddr *sa = ifa->ifa_addr; if (prison_if(curthread->td_ucred, sa) != 0) continue; addrs++; if (sa->sa_len <= sizeof(*sa)) { ifr.ifr_addr = *sa; sbuf_bcat(sb, &ifr, sizeof(ifr)); max_len += sizeof(ifr); } else { sbuf_bcat(sb, &ifr, offsetof(struct ifreq, ifr_addr)); max_len += offsetof(struct ifreq, ifr_addr); sbuf_bcat(sb, sa, sa->sa_len); max_len += sa->sa_len; } if (sbuf_error(sb) == 0) valid_len = sbuf_len(sb); } IF_ADDR_RUNLOCK(ifp); if (addrs == 0) { bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr)); sbuf_bcat(sb, &ifr, sizeof(ifr)); max_len += sizeof(ifr); if (sbuf_error(sb) == 0) valid_len = sbuf_len(sb); } } IFNET_RUNLOCK(); /* * If we didn't allocate enough space (uncommon), try again. If * we have already allocated as much space as we are allowed, * return what we've got. */ if (valid_len != max_len && !full) { sbuf_delete(sb); goto again; } ifc->ifc_len = valid_len; sbuf_finish(sb); error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len); sbuf_delete(sb); return (error); } /* * Just like ifpromisc(), but for all-multicast-reception mode. */ int if_allmulti(struct ifnet *ifp, int onswitch) { return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch)); } struct ifmultiaddr * if_findmulti(struct ifnet *ifp, const struct sockaddr *sa) { struct ifmultiaddr *ifma; IF_ADDR_LOCK_ASSERT(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (sa->sa_family == AF_LINK) { if (sa_dl_equal(ifma->ifma_addr, sa)) break; } else { if (sa_equal(ifma->ifma_addr, sa)) break; } } return ifma; } /* * Allocate a new ifmultiaddr and initialize based on passed arguments. We * make copies of passed sockaddrs. The ifmultiaddr will not be added to * the ifnet multicast address list here, so the caller must do that and * other setup work (such as notifying the device driver). The reference * count is initialized to 1. */ static struct ifmultiaddr * if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa, int mflags) { struct ifmultiaddr *ifma; struct sockaddr *dupsa; ifma = malloc(sizeof *ifma, M_IFMADDR, mflags | M_ZERO); if (ifma == NULL) return (NULL); dupsa = malloc(sa->sa_len, M_IFMADDR, mflags); if (dupsa == NULL) { free(ifma, M_IFMADDR); return (NULL); } bcopy(sa, dupsa, sa->sa_len); ifma->ifma_addr = dupsa; ifma->ifma_ifp = ifp; ifma->ifma_refcount = 1; ifma->ifma_protospec = NULL; if (llsa == NULL) { ifma->ifma_lladdr = NULL; return (ifma); } dupsa = malloc(llsa->sa_len, M_IFMADDR, mflags); if (dupsa == NULL) { free(ifma->ifma_addr, M_IFMADDR); free(ifma, M_IFMADDR); return (NULL); } bcopy(llsa, dupsa, llsa->sa_len); ifma->ifma_lladdr = dupsa; return (ifma); } /* * if_freemulti: free ifmultiaddr structure and possibly attached related * addresses. The caller is responsible for implementing reference * counting, notifying the driver, handling routing messages, and releasing * any dependent link layer state. */ static void if_freemulti(struct ifmultiaddr *ifma) { KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d", ifma->ifma_refcount)); if (ifma->ifma_lladdr != NULL) free(ifma->ifma_lladdr, M_IFMADDR); free(ifma->ifma_addr, M_IFMADDR); free(ifma, M_IFMADDR); } /* * Register an additional multicast address with a network interface. * * - If the address is already present, bump the reference count on the * address and return. * - If the address is not link-layer, look up a link layer address. * - Allocate address structures for one or both addresses, and attach to the * multicast address list on the interface. If automatically adding a link * layer address, the protocol address will own a reference to the link * layer address, to be freed when it is freed. * - Notify the network device driver of an addition to the multicast address * list. * * 'sa' points to caller-owned memory with the desired multicast address. * * 'retifma' will be used to return a pointer to the resulting multicast * address reference, if desired. */ int if_addmulti(struct ifnet *ifp, struct sockaddr *sa, struct ifmultiaddr **retifma) { struct ifmultiaddr *ifma, *ll_ifma; struct sockaddr *llsa; struct sockaddr_dl sdl; int error; /* * If the address is already present, return a new reference to it; * otherwise, allocate storage and set up a new address. */ IF_ADDR_WLOCK(ifp); ifma = if_findmulti(ifp, sa); if (ifma != NULL) { ifma->ifma_refcount++; if (retifma != NULL) *retifma = ifma; IF_ADDR_WUNLOCK(ifp); return (0); } /* * The address isn't already present; resolve the protocol address * into a link layer address, and then look that up, bump its * refcount or allocate an ifma for that also. * Most link layer resolving functions returns address data which * fits inside default sockaddr_dl structure. However callback * can allocate another sockaddr structure, in that case we need to * free it later. */ llsa = NULL; ll_ifma = NULL; if (ifp->if_resolvemulti != NULL) { /* Provide called function with buffer size information */ sdl.sdl_len = sizeof(sdl); llsa = (struct sockaddr *)&sdl; error = ifp->if_resolvemulti(ifp, &llsa, sa); if (error) goto unlock_out; } /* * Allocate the new address. Don't hook it up yet, as we may also * need to allocate a link layer multicast address. */ ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT); if (ifma == NULL) { error = ENOMEM; goto free_llsa_out; } /* * If a link layer address is found, we'll need to see if it's * already present in the address list, or allocate is as well. * When this block finishes, the link layer address will be on the * list. */ if (llsa != NULL) { ll_ifma = if_findmulti(ifp, llsa); if (ll_ifma == NULL) { ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT); if (ll_ifma == NULL) { --ifma->ifma_refcount; if_freemulti(ifma); error = ENOMEM; goto free_llsa_out; } TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma, ifma_link); } else ll_ifma->ifma_refcount++; ifma->ifma_llifma = ll_ifma; } /* * We now have a new multicast address, ifma, and possibly a new or * referenced link layer address. Add the primary address to the * ifnet address list. */ TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); if (retifma != NULL) *retifma = ifma; /* * Must generate the message while holding the lock so that 'ifma' * pointer is still valid. */ rt_newmaddrmsg(RTM_NEWMADDR, ifma); IF_ADDR_WUNLOCK(ifp); /* * We are certain we have added something, so call down to the * interface to let them know about it. */ if (ifp->if_ioctl != NULL) { (void) (*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0); } if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl)) link_free_sdl(llsa); return (0); free_llsa_out: if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl)) link_free_sdl(llsa); unlock_out: IF_ADDR_WUNLOCK(ifp); return (error); } /* * Delete a multicast group membership by network-layer group address. * * Returns ENOENT if the entry could not be found. If ifp no longer * exists, results are undefined. This entry point should only be used * from subsystems which do appropriate locking to hold ifp for the * duration of the call. * Network-layer protocol domains must use if_delmulti_ifma(). */ int if_delmulti(struct ifnet *ifp, struct sockaddr *sa) { struct ifmultiaddr *ifma; int lastref; #ifdef INVARIANTS struct ifnet *oifp; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(oifp, &V_ifnet, if_link) if (ifp == oifp) break; if (ifp != oifp) ifp = NULL; IFNET_RUNLOCK_NOSLEEP(); KASSERT(ifp != NULL, ("%s: ifnet went away", __func__)); #endif if (ifp == NULL) return (ENOENT); IF_ADDR_WLOCK(ifp); lastref = 0; ifma = if_findmulti(ifp, sa); if (ifma != NULL) lastref = if_delmulti_locked(ifp, ifma, 0); IF_ADDR_WUNLOCK(ifp); if (ifma == NULL) return (ENOENT); if (lastref && ifp->if_ioctl != NULL) { (void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0); } return (0); } /* * Delete all multicast group membership for an interface. * Should be used to quickly flush all multicast filters. */ void if_delallmulti(struct ifnet *ifp) { struct ifmultiaddr *ifma; struct ifmultiaddr *next; IF_ADDR_WLOCK(ifp); TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) if_delmulti_locked(ifp, ifma, 0); IF_ADDR_WUNLOCK(ifp); } /* * Delete a multicast group membership by group membership pointer. * Network-layer protocol domains must use this routine. * * It is safe to call this routine if the ifp disappeared. */ void if_delmulti_ifma(struct ifmultiaddr *ifma) { struct ifnet *ifp; int lastref; ifp = ifma->ifma_ifp; #ifdef DIAGNOSTIC if (ifp == NULL) { printf("%s: ifma_ifp seems to be detached\n", __func__); } else { struct ifnet *oifp; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(oifp, &V_ifnet, if_link) if (ifp == oifp) break; if (ifp != oifp) { printf("%s: ifnet %p disappeared\n", __func__, ifp); ifp = NULL; } IFNET_RUNLOCK_NOSLEEP(); } #endif /* * If and only if the ifnet instance exists: Acquire the address lock. */ if (ifp != NULL) IF_ADDR_WLOCK(ifp); lastref = if_delmulti_locked(ifp, ifma, 0); if (ifp != NULL) { /* * If and only if the ifnet instance exists: * Release the address lock. * If the group was left: update the hardware hash filter. */ IF_ADDR_WUNLOCK(ifp); if (lastref && ifp->if_ioctl != NULL) { (void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0); } } } /* * Perform deletion of network-layer and/or link-layer multicast address. * * Return 0 if the reference count was decremented. * Return 1 if the final reference was released, indicating that the * hardware hash filter should be reprogrammed. */ static int if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching) { struct ifmultiaddr *ll_ifma; if (ifp != NULL && ifma->ifma_ifp != NULL) { KASSERT(ifma->ifma_ifp == ifp, ("%s: inconsistent ifp %p", __func__, ifp)); IF_ADDR_WLOCK_ASSERT(ifp); } ifp = ifma->ifma_ifp; /* * If the ifnet is detaching, null out references to ifnet, * so that upper protocol layers will notice, and not attempt * to obtain locks for an ifnet which no longer exists. The * routing socket announcement must happen before the ifnet * instance is detached from the system. */ if (detaching) { #ifdef DIAGNOSTIC printf("%s: detaching ifnet instance %p\n", __func__, ifp); #endif /* * ifp may already be nulled out if we are being reentered * to delete the ll_ifma. */ if (ifp != NULL) { rt_newmaddrmsg(RTM_DELMADDR, ifma); ifma->ifma_ifp = NULL; } } if (--ifma->ifma_refcount > 0) return 0; /* * If this ifma is a network-layer ifma, a link-layer ifma may * have been associated with it. Release it first if so. */ ll_ifma = ifma->ifma_llifma; if (ll_ifma != NULL) { KASSERT(ifma->ifma_lladdr != NULL, ("%s: llifma w/o lladdr", __func__)); if (detaching) ll_ifma->ifma_ifp = NULL; /* XXX */ if (--ll_ifma->ifma_refcount == 0) { if (ifp != NULL) { TAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifma_link); } if_freemulti(ll_ifma); } } if (ifp != NULL) TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); if_freemulti(ifma); /* * The last reference to this instance of struct ifmultiaddr * was released; the hardware should be notified of this change. */ return 1; } /* * Set the link layer address on an interface. * * At this time we only support certain types of interfaces, * and we don't allow the length of the address to change. * * Set noinline to be dtrace-friendly */ __noinline int if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len) { struct sockaddr_dl *sdl; struct ifaddr *ifa; struct ifreq ifr; IF_ADDR_RLOCK(ifp); ifa = ifp->if_addr; if (ifa == NULL) { IF_ADDR_RUNLOCK(ifp); return (EINVAL); } ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); sdl = (struct sockaddr_dl *)ifa->ifa_addr; if (sdl == NULL) { ifa_free(ifa); return (EINVAL); } if (len != sdl->sdl_alen) { /* don't allow length to change */ ifa_free(ifa); return (EINVAL); } switch (ifp->if_type) { case IFT_ETHER: case IFT_FDDI: case IFT_XETHER: case IFT_ISO88025: case IFT_L2VLAN: case IFT_BRIDGE: case IFT_ARCNET: case IFT_IEEE8023ADLAG: case IFT_IEEE80211: bcopy(lladdr, LLADDR(sdl), len); ifa_free(ifa); break; default: ifa_free(ifa); return (ENODEV); } /* * If the interface is already up, we need * to re-init it in order to reprogram its * address filter. */ if ((ifp->if_flags & IFF_UP) != 0) { if (ifp->if_ioctl) { ifp->if_flags &= ~IFF_UP; ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); ifp->if_flags |= IFF_UP; ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); } } EVENTHANDLER_INVOKE(iflladdr_event, ifp); return (0); } /* * Compat function for handling basic encapsulation requests. * Not converted stacks (FDDI, IB, ..) supports traditional * output model: ARP (and other similar L2 protocols) are handled * inside output routine, arpresolve/nd6_resolve() returns MAC * address instead of full prepend. * * This function creates calculated header==MAC for IPv4/IPv6 and * returns EAFNOSUPPORT (which is then handled in ARP code) for other * address families. */ static int if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req) { if (req->rtype != IFENCAP_LL) return (EOPNOTSUPP); if (req->bufsize < req->lladdr_len) return (ENOMEM); switch (req->family) { case AF_INET: case AF_INET6: break; default: return (EAFNOSUPPORT); } /* Copy lladdr to storage as is */ memmove(req->buf, req->lladdr, req->lladdr_len); req->bufsize = req->lladdr_len; req->lladdr_off = 0; return (0); } /* * The name argument must be a pointer to storage which will last as * long as the interface does. For physical devices, the result of * device_get_name(dev) is a good choice and for pseudo-devices a * static string works well. */ void if_initname(struct ifnet *ifp, const char *name, int unit) { ifp->if_dname = name; ifp->if_dunit = unit; if (unit != IF_DUNIT_NONE) snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit); else strlcpy(ifp->if_xname, name, IFNAMSIZ); } int if_printf(struct ifnet *ifp, const char * fmt, ...) { va_list ap; int retval; retval = printf("%s: ", ifp->if_xname); va_start(ap, fmt); retval += vprintf(fmt, ap); va_end(ap); return (retval); } void if_start(struct ifnet *ifp) { (*(ifp)->if_start)(ifp); } /* * Backwards compatibility interface for drivers * that have not implemented it */ static int if_transmit(struct ifnet *ifp, struct mbuf *m) { int error; IFQ_HANDOFF(ifp, m, error); return (error); } static void if_input_default(struct ifnet *ifp __unused, struct mbuf *m) { m_freem(m); } int if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust) { int active = 0; IF_LOCK(ifq); if (_IF_QFULL(ifq)) { IF_UNLOCK(ifq); if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); m_freem(m); return (0); } if (ifp != NULL) { if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len + adjust); if (m->m_flags & (M_BCAST|M_MCAST)) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); active = ifp->if_drv_flags & IFF_DRV_OACTIVE; } _IF_ENQUEUE(ifq, m); IF_UNLOCK(ifq); if (ifp != NULL && !active) (*(ifp)->if_start)(ifp); return (1); } void if_register_com_alloc(u_char type, if_com_alloc_t *a, if_com_free_t *f) { KASSERT(if_com_alloc[type] == NULL, ("if_register_com_alloc: %d already registered", type)); KASSERT(if_com_free[type] == NULL, ("if_register_com_alloc: %d free already registered", type)); if_com_alloc[type] = a; if_com_free[type] = f; } void if_deregister_com_alloc(u_char type) { KASSERT(if_com_alloc[type] != NULL, ("if_deregister_com_alloc: %d not registered", type)); KASSERT(if_com_free[type] != NULL, ("if_deregister_com_alloc: %d free not registered", type)); if_com_alloc[type] = NULL; if_com_free[type] = NULL; } /* API for driver access to network stack owned ifnet.*/ uint64_t if_setbaudrate(struct ifnet *ifp, uint64_t baudrate) { uint64_t oldbrate; oldbrate = ifp->if_baudrate; ifp->if_baudrate = baudrate; return (oldbrate); } uint64_t if_getbaudrate(if_t ifp) { return (((struct ifnet *)ifp)->if_baudrate); } int if_setcapabilities(if_t ifp, int capabilities) { ((struct ifnet *)ifp)->if_capabilities = capabilities; return (0); } int if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit) { ((struct ifnet *)ifp)->if_capabilities |= setbit; ((struct ifnet *)ifp)->if_capabilities &= ~clearbit; return (0); } int if_getcapabilities(if_t ifp) { return ((struct ifnet *)ifp)->if_capabilities; } int if_setcapenable(if_t ifp, int capabilities) { ((struct ifnet *)ifp)->if_capenable = capabilities; return (0); } int if_setcapenablebit(if_t ifp, int setcap, int clearcap) { if(setcap) ((struct ifnet *)ifp)->if_capenable |= setcap; if(clearcap) ((struct ifnet *)ifp)->if_capenable &= ~clearcap; return (0); } const char * if_getdname(if_t ifp) { return ((struct ifnet *)ifp)->if_dname; } int if_togglecapenable(if_t ifp, int togglecap) { ((struct ifnet *)ifp)->if_capenable ^= togglecap; return (0); } int if_getcapenable(if_t ifp) { return ((struct ifnet *)ifp)->if_capenable; } /* * This is largely undesirable because it ties ifnet to a device, but does * provide flexiblity for an embedded product vendor. Should be used with * the understanding that it violates the interface boundaries, and should be * a last resort only. */ int if_setdev(if_t ifp, void *dev) { return (0); } int if_setdrvflagbits(if_t ifp, int set_flags, int clear_flags) { ((struct ifnet *)ifp)->if_drv_flags |= set_flags; ((struct ifnet *)ifp)->if_drv_flags &= ~clear_flags; return (0); } int if_getdrvflags(if_t ifp) { return ((struct ifnet *)ifp)->if_drv_flags; } int if_setdrvflags(if_t ifp, int flags) { ((struct ifnet *)ifp)->if_drv_flags = flags; return (0); } int if_setflags(if_t ifp, int flags) { ((struct ifnet *)ifp)->if_flags = flags; return (0); } int if_setflagbits(if_t ifp, int set, int clear) { ((struct ifnet *)ifp)->if_flags |= set; ((struct ifnet *)ifp)->if_flags &= ~clear; return (0); } int if_getflags(if_t ifp) { return ((struct ifnet *)ifp)->if_flags; } int if_clearhwassist(if_t ifp) { ((struct ifnet *)ifp)->if_hwassist = 0; return (0); } int if_sethwassistbits(if_t ifp, int toset, int toclear) { ((struct ifnet *)ifp)->if_hwassist |= toset; ((struct ifnet *)ifp)->if_hwassist &= ~toclear; return (0); } int if_sethwassist(if_t ifp, int hwassist_bit) { ((struct ifnet *)ifp)->if_hwassist = hwassist_bit; return (0); } int if_gethwassist(if_t ifp) { return ((struct ifnet *)ifp)->if_hwassist; } int if_setmtu(if_t ifp, int mtu) { ((struct ifnet *)ifp)->if_mtu = mtu; return (0); } int if_getmtu(if_t ifp) { return ((struct ifnet *)ifp)->if_mtu; } int if_getmtu_family(if_t ifp, int family) { struct domain *dp; for (dp = domains; dp; dp = dp->dom_next) { if (dp->dom_family == family && dp->dom_ifmtu != NULL) return (dp->dom_ifmtu((struct ifnet *)ifp)); } return (((struct ifnet *)ifp)->if_mtu); } int if_setsoftc(if_t ifp, void *softc) { ((struct ifnet *)ifp)->if_softc = softc; return (0); } void * if_getsoftc(if_t ifp) { return ((struct ifnet *)ifp)->if_softc; } void if_setrcvif(struct mbuf *m, if_t ifp) { m->m_pkthdr.rcvif = (struct ifnet *)ifp; } void if_setvtag(struct mbuf *m, uint16_t tag) { m->m_pkthdr.ether_vtag = tag; } uint16_t if_getvtag(struct mbuf *m) { return (m->m_pkthdr.ether_vtag); } int if_sendq_empty(if_t ifp) { return IFQ_DRV_IS_EMPTY(&((struct ifnet *)ifp)->if_snd); } struct ifaddr * if_getifaddr(if_t ifp) { return ((struct ifnet *)ifp)->if_addr; } int if_getamcount(if_t ifp) { return ((struct ifnet *)ifp)->if_amcount; } int if_setsendqready(if_t ifp) { IFQ_SET_READY(&((struct ifnet *)ifp)->if_snd); return (0); } int if_setsendqlen(if_t ifp, int tx_desc_count) { IFQ_SET_MAXLEN(&((struct ifnet *)ifp)->if_snd, tx_desc_count); ((struct ifnet *)ifp)->if_snd.ifq_drv_maxlen = tx_desc_count; return (0); } int if_vlantrunkinuse(if_t ifp) { return ((struct ifnet *)ifp)->if_vlantrunk != NULL?1:0; } int if_input(if_t ifp, struct mbuf* sendmp) { (*((struct ifnet *)ifp)->if_input)((struct ifnet *)ifp, sendmp); return (0); } /* XXX */ #ifndef ETH_ADDR_LEN #define ETH_ADDR_LEN 6 #endif int if_setupmultiaddr(if_t ifp, void *mta, int *cnt, int max) { struct ifmultiaddr *ifma; uint8_t *lmta = (uint8_t *)mta; int mcnt = 0; TAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == max) break; bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), &lmta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN); mcnt++; } *cnt = mcnt; return (0); } int if_multiaddr_array(if_t ifp, void *mta, int *cnt, int max) { int error; if_maddr_rlock(ifp); error = if_setupmultiaddr(ifp, mta, cnt, max); if_maddr_runlock(ifp); return (error); } int if_multiaddr_count(if_t ifp, int max) { struct ifmultiaddr *ifma; int count; count = 0; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; count++; if (count == max) break; } if_maddr_runlock(ifp); return (count); } int if_multi_apply(struct ifnet *ifp, int (*filter)(void *, struct ifmultiaddr *, int), void *arg) { struct ifmultiaddr *ifma; int cnt = 0; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) cnt += filter(arg, ifma, cnt); if_maddr_runlock(ifp); return (cnt); } struct mbuf * if_dequeue(if_t ifp) { struct mbuf *m; IFQ_DRV_DEQUEUE(&((struct ifnet *)ifp)->if_snd, m); return (m); } int if_sendq_prepend(if_t ifp, struct mbuf *m) { IFQ_DRV_PREPEND(&((struct ifnet *)ifp)->if_snd, m); return (0); } int if_setifheaderlen(if_t ifp, int len) { ((struct ifnet *)ifp)->if_hdrlen = len; return (0); } caddr_t if_getlladdr(if_t ifp) { return (IF_LLADDR((struct ifnet *)ifp)); } void * if_gethandle(u_char type) { return (if_alloc(type)); } void if_bpfmtap(if_t ifh, struct mbuf *m) { struct ifnet *ifp = (struct ifnet *)ifh; BPF_MTAP(ifp, m); } void if_etherbpfmtap(if_t ifh, struct mbuf *m) { struct ifnet *ifp = (struct ifnet *)ifh; ETHER_BPF_MTAP(ifp, m); } void if_vlancap(if_t ifh) { struct ifnet *ifp = (struct ifnet *)ifh; VLAN_CAPABILITIES(ifp); } void if_setinitfn(if_t ifp, void (*init_fn)(void *)) { ((struct ifnet *)ifp)->if_init = init_fn; } void if_setioctlfn(if_t ifp, int (*ioctl_fn)(if_t, u_long, caddr_t)) { ((struct ifnet *)ifp)->if_ioctl = (void *)ioctl_fn; } void if_setstartfn(if_t ifp, void (*start_fn)(if_t)) { ((struct ifnet *)ifp)->if_start = (void *)start_fn; } void if_settransmitfn(if_t ifp, if_transmit_fn_t start_fn) { ((struct ifnet *)ifp)->if_transmit = start_fn; } void if_setqflushfn(if_t ifp, if_qflush_fn_t flush_fn) { ((struct ifnet *)ifp)->if_qflush = flush_fn; } void if_setgetcounterfn(if_t ifp, if_get_counter_t fn) { ifp->if_get_counter = fn; } /* Revisit these - These are inline functions originally. */ int drbr_inuse_drv(if_t ifh, struct buf_ring *br) { return drbr_inuse(ifh, br); } struct mbuf* drbr_dequeue_drv(if_t ifh, struct buf_ring *br) { return drbr_dequeue(ifh, br); } int drbr_needs_enqueue_drv(if_t ifh, struct buf_ring *br) { return drbr_needs_enqueue(ifh, br); } int drbr_enqueue_drv(if_t ifh, struct buf_ring *br, struct mbuf *m) { return drbr_enqueue(ifh, br, m); } Index: projects/vnet/sys/sys/event.h =================================================================== --- projects/vnet/sys/sys/event.h (revision 302276) +++ projects/vnet/sys/sys/event.h (revision 302277) @@ -1,297 +1,299 @@ /*- * Copyright (c) 1999,2000,2001 Jonathan Lemon * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_EVENT_H_ #define _SYS_EVENT_H_ #include #define EVFILT_READ (-1) #define EVFILT_WRITE (-2) #define EVFILT_AIO (-3) /* attached to aio requests */ #define EVFILT_VNODE (-4) /* attached to vnodes */ #define EVFILT_PROC (-5) /* attached to struct proc */ #define EVFILT_SIGNAL (-6) /* attached to struct proc */ #define EVFILT_TIMER (-7) /* timers */ #define EVFILT_PROCDESC (-8) /* attached to process descriptors */ #define EVFILT_FS (-9) /* filesystem events */ #define EVFILT_LIO (-10) /* attached to lio requests */ #define EVFILT_USER (-11) /* User events */ #define EVFILT_SENDFILE (-12) /* attached to sendfile requests */ #define EVFILT_SYSCOUNT 12 #define EV_SET(kevp_, a, b, c, d, e, f) do { \ struct kevent *kevp = (kevp_); \ (kevp)->ident = (a); \ (kevp)->filter = (b); \ (kevp)->flags = (c); \ (kevp)->fflags = (d); \ (kevp)->data = (e); \ (kevp)->udata = (f); \ } while(0) struct kevent { uintptr_t ident; /* identifier for this event */ short filter; /* filter for event */ u_short flags; u_int fflags; intptr_t data; void *udata; /* opaque user data identifier */ }; /* actions */ #define EV_ADD 0x0001 /* add event to kq (implies enable) */ #define EV_DELETE 0x0002 /* delete event from kq */ #define EV_ENABLE 0x0004 /* enable event */ #define EV_DISABLE 0x0008 /* disable event (not reported) */ #define EV_FORCEONESHOT 0x0100 /* enable _ONESHOT and force trigger */ /* flags */ #define EV_ONESHOT 0x0010 /* only report one occurrence */ #define EV_CLEAR 0x0020 /* clear event state after reporting */ #define EV_RECEIPT 0x0040 /* force EV_ERROR on success, data=0 */ #define EV_DISPATCH 0x0080 /* disable event after reporting */ #define EV_SYSFLAGS 0xF000 /* reserved by system */ #define EV_DROP 0x1000 /* note should be dropped */ #define EV_FLAG1 0x2000 /* filter-specific flag */ #define EV_FLAG2 0x4000 /* filter-specific flag */ /* returned values */ #define EV_EOF 0x8000 /* EOF detected */ #define EV_ERROR 0x4000 /* error, data contains errno */ /* * data/hint flags/masks for EVFILT_USER, shared with userspace * * On input, the top two bits of fflags specifies how the lower twenty four * bits should be applied to the stored value of fflags. * * On output, the top two bits will always be set to NOTE_FFNOP and the * remaining twenty four bits will contain the stored fflags value. */ #define NOTE_FFNOP 0x00000000 /* ignore input fflags */ #define NOTE_FFAND 0x40000000 /* AND fflags */ #define NOTE_FFOR 0x80000000 /* OR fflags */ #define NOTE_FFCOPY 0xc0000000 /* copy fflags */ #define NOTE_FFCTRLMASK 0xc0000000 /* masks for operations */ #define NOTE_FFLAGSMASK 0x00ffffff #define NOTE_TRIGGER 0x01000000 /* Cause the event to be triggered for output. */ /* * data/hint flags for EVFILT_{READ|WRITE}, shared with userspace */ #define NOTE_LOWAT 0x0001 /* low water mark */ #define NOTE_FILE_POLL 0x0002 /* behave like poll() */ /* * data/hint flags for EVFILT_VNODE, shared with userspace */ #define NOTE_DELETE 0x0001 /* vnode was removed */ #define NOTE_WRITE 0x0002 /* data contents changed */ #define NOTE_EXTEND 0x0004 /* size increased */ #define NOTE_ATTRIB 0x0008 /* attributes changed */ #define NOTE_LINK 0x0010 /* link count changed */ #define NOTE_RENAME 0x0020 /* vnode was renamed */ #define NOTE_REVOKE 0x0040 /* vnode access was revoked */ #define NOTE_OPEN 0x0080 /* vnode was opened */ #define NOTE_CLOSE 0x0100 /* file closed, fd did not allowed write */ #define NOTE_CLOSE_WRITE 0x0200 /* file closed, fd did allowed write */ #define NOTE_READ 0x0400 /* file was read */ /* * data/hint flags for EVFILT_PROC and EVFILT_PROCDESC, shared with userspace */ #define NOTE_EXIT 0x80000000 /* process exited */ #define NOTE_FORK 0x40000000 /* process forked */ #define NOTE_EXEC 0x20000000 /* process exec'd */ #define NOTE_PCTRLMASK 0xf0000000 /* mask for hint bits */ #define NOTE_PDATAMASK 0x000fffff /* mask for pid */ /* additional flags for EVFILT_PROC */ #define NOTE_TRACK 0x00000001 /* follow across forks */ #define NOTE_TRACKERR 0x00000002 /* could not track child */ #define NOTE_CHILD 0x00000004 /* am a child process */ /* additional flags for EVFILT_TIMER */ #define NOTE_SECONDS 0x00000001 /* data is seconds */ #define NOTE_MSECONDS 0x00000002 /* data is milliseconds */ #define NOTE_USECONDS 0x00000004 /* data is microseconds */ #define NOTE_NSECONDS 0x00000008 /* data is nanoseconds */ struct knote; SLIST_HEAD(klist, knote); struct kqueue; TAILQ_HEAD(kqlist, kqueue); struct knlist { struct klist kl_list; void (*kl_lock)(void *); /* lock function */ void (*kl_unlock)(void *); void (*kl_assert_locked)(void *); void (*kl_assert_unlocked)(void *); - void *kl_lockarg; /* argument passed to kl_lockf() */ + void *kl_lockarg; /* argument passed to lock functions */ + int kl_autodestroy; }; #ifdef _KERNEL /* * Flags for knote call */ #define KNF_LISTLOCKED 0x0001 /* knlist is locked */ #define KNF_NOKQLOCK 0x0002 /* do not keep KQ_LOCK */ #define KNOTE(list, hist, flags) knote(list, hist, flags) #define KNOTE_LOCKED(list, hint) knote(list, hint, KNF_LISTLOCKED) #define KNOTE_UNLOCKED(list, hint) knote(list, hint, 0) #define KNLIST_EMPTY(list) SLIST_EMPTY(&(list)->kl_list) /* * Flag indicating hint is a signal. Used by EVFILT_SIGNAL, and also * shared by EVFILT_PROC (all knotes attached to p->p_klist) */ #define NOTE_SIGNAL 0x08000000 /* * Hint values for the optional f_touch event filter. If f_touch is not set * to NULL and f_isfd is zero the f_touch filter will be called with the type * argument set to EVENT_REGISTER during a kevent() system call. It is also * called under the same conditions with the type argument set to EVENT_PROCESS * when the event has been triggered. */ #define EVENT_REGISTER 1 #define EVENT_PROCESS 2 struct filterops { int f_isfd; /* true if ident == filedescriptor */ int (*f_attach)(struct knote *kn); void (*f_detach)(struct knote *kn); int (*f_event)(struct knote *kn, long hint); void (*f_touch)(struct knote *kn, struct kevent *kev, u_long type); }; /* * Setting the KN_INFLUX flag enables you to unlock the kq that this knote * is on, and modify kn_status as if you had the KQ lock. * * kn_sfflags, kn_sdata, and kn_kevent are protected by the knlist lock. */ struct knote { SLIST_ENTRY(knote) kn_link; /* for kq */ SLIST_ENTRY(knote) kn_selnext; /* for struct selinfo */ struct knlist *kn_knlist; /* f_attach populated */ TAILQ_ENTRY(knote) kn_tqe; struct kqueue *kn_kq; /* which queue we are on */ struct kevent kn_kevent; int kn_status; /* protected by kq lock */ #define KN_ACTIVE 0x01 /* event has been triggered */ #define KN_QUEUED 0x02 /* event is on queue */ #define KN_DISABLED 0x04 /* event is disabled */ #define KN_DETACHED 0x08 /* knote is detached */ #define KN_INFLUX 0x10 /* knote is in flux */ #define KN_MARKER 0x20 /* ignore this knote */ #define KN_KQUEUE 0x40 /* this knote belongs to a kq */ #define KN_HASKQLOCK 0x80 /* for _inevent */ #define KN_SCAN 0x100 /* flux set in kqueue_scan() */ int kn_sfflags; /* saved filter flags */ intptr_t kn_sdata; /* saved data field */ union { struct file *p_fp; /* file data pointer */ struct proc *p_proc; /* proc pointer */ struct kaiocb *p_aio; /* AIO job pointer */ struct aioliojob *p_lio; /* LIO job pointer */ sbintime_t *p_nexttime; /* next timer event fires at */ void *p_v; /* generic other pointer */ } kn_ptr; struct filterops *kn_fop; void *kn_hook; int kn_hookid; #define kn_id kn_kevent.ident #define kn_filter kn_kevent.filter #define kn_flags kn_kevent.flags #define kn_fflags kn_kevent.fflags #define kn_data kn_kevent.data #define kn_fp kn_ptr.p_fp }; struct kevent_copyops { void *arg; int (*k_copyout)(void *arg, struct kevent *kevp, int count); int (*k_copyin)(void *arg, struct kevent *kevp, int count); }; struct thread; struct proc; struct knlist; struct mtx; struct rwlock; extern void knote(struct knlist *list, long hint, int lockflags); extern void knote_fork(struct knlist *list, int pid); +extern struct knlist *knlist_alloc(struct mtx *lock); +extern void knlist_detach(struct knlist *knl); extern void knlist_add(struct knlist *knl, struct knote *kn, int islocked); extern void knlist_remove(struct knlist *knl, struct knote *kn, int islocked); -extern void knlist_remove_inevent(struct knlist *knl, struct knote *kn); extern int knlist_empty(struct knlist *knl); extern void knlist_init(struct knlist *knl, void *lock, void (*kl_lock)(void *), void (*kl_unlock)(void *), void (*kl_assert_locked)(void *), void (*kl_assert_unlocked)(void *)); extern void knlist_init_mtx(struct knlist *knl, struct mtx *lock); extern void knlist_init_rw_reader(struct knlist *knl, struct rwlock *lock); extern void knlist_destroy(struct knlist *knl); extern void knlist_cleardel(struct knlist *knl, struct thread *td, int islocked, int killkn); #define knlist_clear(knl, islocked) \ knlist_cleardel((knl), NULL, (islocked), 0) #define knlist_delete(knl, td, islocked) \ knlist_cleardel((knl), (td), (islocked), 1) extern void knote_fdclose(struct thread *p, int fd); extern int kqfd_register(int fd, struct kevent *kev, struct thread *p, int waitok); extern int kqueue_add_filteropts(int filt, struct filterops *filtops); extern int kqueue_del_filteropts(int filt); #else /* !_KERNEL */ #include struct timespec; __BEGIN_DECLS int kqueue(void); int kevent(int kq, const struct kevent *changelist, int nchanges, struct kevent *eventlist, int nevents, const struct timespec *timeout); __END_DECLS #endif /* !_KERNEL */ #endif /* !_SYS_EVENT_H_ */ Index: projects/vnet/sys/sys/proc.h =================================================================== --- projects/vnet/sys/sys/proc.h (revision 302276) +++ projects/vnet/sys/sys/proc.h (revision 302277) @@ -1,1100 +1,1100 @@ /*- * Copyright (c) 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)proc.h 8.15 (Berkeley) 5/19/95 * $FreeBSD$ */ #ifndef _SYS_PROC_H_ #define _SYS_PROC_H_ #include /* For struct callout. */ #include /* For struct klist. */ #include #ifndef _KERNEL #include #endif #include #include #include #include #include #include #include /* XXX. */ #include #include #include #include #include #ifndef _KERNEL #include /* For structs itimerval, timeval. */ #else #include #endif #include #include #include #include /* Machine-dependent proc substruct. */ /* * One structure allocated per session. * * List of locks * (m) locked by s_mtx mtx * (e) locked by proctree_lock sx * (c) const until freeing */ struct session { u_int s_count; /* Ref cnt; pgrps in session - atomic. */ struct proc *s_leader; /* (m + e) Session leader. */ struct vnode *s_ttyvp; /* (m) Vnode of controlling tty. */ struct cdev_priv *s_ttydp; /* (m) Device of controlling tty. */ struct tty *s_ttyp; /* (e) Controlling tty. */ pid_t s_sid; /* (c) Session ID. */ /* (m) Setlogin() name: */ char s_login[roundup(MAXLOGNAME, sizeof(long))]; struct mtx s_mtx; /* Mutex to protect members. */ }; /* * One structure allocated per process group. * * List of locks * (m) locked by pg_mtx mtx * (e) locked by proctree_lock sx * (c) const until freeing */ struct pgrp { LIST_ENTRY(pgrp) pg_hash; /* (e) Hash chain. */ LIST_HEAD(, proc) pg_members; /* (m + e) Pointer to pgrp members. */ struct session *pg_session; /* (c) Pointer to session. */ struct sigiolst pg_sigiolst; /* (m) List of sigio sources. */ pid_t pg_id; /* (c) Process group id. */ int pg_jobc; /* (m) Job control process count. */ struct mtx pg_mtx; /* Mutex to protect members */ }; /* * pargs, used to hold a copy of the command line, if it had a sane length. */ struct pargs { u_int ar_ref; /* Reference count. */ u_int ar_length; /* Length. */ u_char ar_args[1]; /* Arguments. */ }; /*- * Description of a process. * * This structure contains the information needed to manage a thread of * control, known in UN*X as a process; it has references to substructures * containing descriptions of things that the process uses, but may share * with related processes. The process structure and the substructures * are always addressable except for those marked "(CPU)" below, * which might be addressable only on a processor on which the process * is running. * * Below is a key of locks used to protect each member of struct proc. The * lock is indicated by a reference to a specific character in parens in the * associated comment. * * - not yet protected * a - only touched by curproc or parent during fork/wait * b - created at fork, never changes * (exception aiods switch vmspaces, but they are also * marked 'P_SYSTEM' so hopefully it will be left alone) * c - locked by proc mtx * d - locked by allproc_lock lock * e - locked by proctree_lock lock * f - session mtx * g - process group mtx * h - callout_lock mtx * i - by curproc or the master session mtx * j - locked by proc slock * k - only accessed by curthread * k*- only accessed by curthread and from an interrupt * l - the attaching proc or attaching proc parent * m - Giant * n - not locked, lazy * o - ktrace lock * q - td_contested lock * r - p_peers lock * t - thread lock * u - process stat lock * w - process timer lock * x - created at fork, only changes during single threading in exec * y - created at first aio, doesn't change until exit or exec at which * point we are single-threaded and only curthread changes it * z - zombie threads lock * * If the locking key specifies two identifiers (for example, p_pptr) then * either lock is sufficient for read access, but both locks must be held * for write access. */ struct cpuset; struct filecaps; struct filemon; struct kaioinfo; struct kaudit_record; struct kdtrace_proc; struct kdtrace_thread; struct mqueue_notifier; struct nlminfo; struct p_sched; struct proc; struct procdesc; struct racct; struct sbuf; struct sleepqueue; struct syscall_args; struct td_sched; struct thread; struct trapframe; struct turnstile; /* * XXX: Does this belong in resource.h or resourcevar.h instead? * Resource usage extension. The times in rusage structs in the kernel are * never up to date. The actual times are kept as runtimes and tick counts * (with control info in the "previous" times), and are converted when * userland asks for rusage info. Backwards compatibility prevents putting * this directly in the user-visible rusage struct. * * Locking for p_rux: (cu) means (u) for p_rux and (c) for p_crux. * Locking for td_rux: (t) for all fields. */ struct rusage_ext { uint64_t rux_runtime; /* (cu) Real time. */ uint64_t rux_uticks; /* (cu) Statclock hits in user mode. */ uint64_t rux_sticks; /* (cu) Statclock hits in sys mode. */ uint64_t rux_iticks; /* (cu) Statclock hits in intr mode. */ uint64_t rux_uu; /* (c) Previous user time in usec. */ uint64_t rux_su; /* (c) Previous sys time in usec. */ uint64_t rux_tu; /* (c) Previous total time in usec. */ }; /* * Kernel runnable context (thread). * This is what is put to sleep and reactivated. * Thread context. Processes may have multiple threads. */ struct thread { struct mtx *volatile td_lock; /* replaces sched lock */ struct proc *td_proc; /* (*) Associated process. */ TAILQ_ENTRY(thread) td_plist; /* (*) All threads in this proc. */ TAILQ_ENTRY(thread) td_runq; /* (t) Run queue. */ TAILQ_ENTRY(thread) td_slpq; /* (t) Sleep queue. */ TAILQ_ENTRY(thread) td_lockq; /* (t) Lock queue. */ LIST_ENTRY(thread) td_hash; /* (d) Hash chain. */ struct cpuset *td_cpuset; /* (t) CPU affinity mask. */ struct seltd *td_sel; /* Select queue/channel. */ struct sleepqueue *td_sleepqueue; /* (k) Associated sleep queue. */ struct turnstile *td_turnstile; /* (k) Associated turnstile. */ struct rl_q_entry *td_rlqe; /* (k) Associated range lock entry. */ struct umtx_q *td_umtxq; /* (c?) Link for when we're blocked. */ struct vm_domain_policy td_vm_dom_policy; /* (c) current numa domain policy */ lwpid_t td_tid; /* (b) Thread ID. */ sigqueue_t td_sigqueue; /* (c) Sigs arrived, not delivered. */ #define td_siglist td_sigqueue.sq_signals u_char td_lend_user_pri; /* (t) Lend user pri. */ /* Cleared during fork1() */ #define td_startzero td_flags int td_flags; /* (t) TDF_* flags. */ int td_inhibitors; /* (t) Why can not run. */ int td_pflags; /* (k) Private thread (TDP_*) flags. */ int td_dupfd; /* (k) Ret value from fdopen. XXX */ int td_sqqueue; /* (t) Sleepqueue queue blocked on. */ void *td_wchan; /* (t) Sleep address. */ const char *td_wmesg; /* (t) Reason for sleep. */ volatile u_char td_owepreempt; /* (k*) Preempt on last critical_exit */ u_char td_tsqueue; /* (t) Turnstile queue blocked on. */ short td_locks; /* (k) Debug: count of non-spin locks */ short td_rw_rlocks; /* (k) Count of rwlock read locks. */ short td_lk_slocks; /* (k) Count of lockmgr shared locks. */ short td_stopsched; /* (k) Scheduler stopped. */ struct turnstile *td_blocked; /* (t) Lock thread is blocked on. */ const char *td_lockname; /* (t) Name of lock blocked on. */ LIST_HEAD(, turnstile) td_contested; /* (q) Contested locks. */ struct lock_list_entry *td_sleeplocks; /* (k) Held sleep locks. */ int td_intr_nesting_level; /* (k) Interrupt recursion. */ int td_pinned; /* (k) Temporary cpu pin count. */ struct ucred *td_ucred; /* (k) Reference to credentials. */ struct plimit *td_limit; /* (k) Resource limits. */ int td_slptick; /* (t) Time at sleep. */ int td_blktick; /* (t) Time spent blocked. */ int td_swvoltick; /* (t) Time at last SW_VOL switch. */ int td_swinvoltick; /* (t) Time at last SW_INVOL switch. */ u_int td_cow; /* (*) Number of copy-on-write faults */ struct rusage td_ru; /* (t) rusage information. */ struct rusage_ext td_rux; /* (t) Internal rusage information. */ uint64_t td_incruntime; /* (t) Cpu ticks to transfer to proc. */ uint64_t td_runtime; /* (t) How many cpu ticks we've run. */ u_int td_pticks; /* (t) Statclock hits for profiling */ u_int td_sticks; /* (t) Statclock hits in system mode. */ u_int td_iticks; /* (t) Statclock hits in intr mode. */ u_int td_uticks; /* (t) Statclock hits in user mode. */ int td_intrval; /* (t) Return value for sleepq. */ sigset_t td_oldsigmask; /* (k) Saved mask from pre sigpause. */ volatile u_int td_generation; /* (k) For detection of preemption */ stack_t td_sigstk; /* (k) Stack ptr and on-stack flag. */ int td_xsig; /* (c) Signal for ptrace */ u_long td_profil_addr; /* (k) Temporary addr until AST. */ u_int td_profil_ticks; /* (k) Temporary ticks until AST. */ char td_name[MAXCOMLEN + 1]; /* (*) Thread name. */ struct file *td_fpop; /* (k) file referencing cdev under op */ int td_dbgflags; /* (c) Userland debugger flags */ struct ksiginfo td_dbgksi; /* (c) ksi reflected to debugger. */ int td_ng_outbound; /* (k) Thread entered ng from above. */ struct osd td_osd; /* (k) Object specific data. */ struct vm_map_entry *td_map_def_user; /* (k) Deferred entries. */ pid_t td_dbg_forked; /* (c) Child pid for debugger. */ u_int td_vp_reserv; /* (k) Count of reserved vnodes. */ int td_no_sleeping; /* (k) Sleeping disabled count. */ int td_dom_rr_idx; /* (k) RR Numa domain selection. */ void *td_su; /* (k) FFS SU private */ #define td_endzero td_sigmask /* Copied during fork1() or create_thread(). */ #define td_startcopy td_endzero sigset_t td_sigmask; /* (c) Current signal mask. */ u_char td_rqindex; /* (t) Run queue index. */ u_char td_base_pri; /* (t) Thread base kernel priority. */ u_char td_priority; /* (t) Thread active priority. */ u_char td_pri_class; /* (t) Scheduling class. */ u_char td_user_pri; /* (t) User pri from estcpu and nice. */ u_char td_base_user_pri; /* (t) Base user pri */ u_int td_dbg_sc_code; /* (c) Syscall code to debugger. */ u_int td_dbg_sc_narg; /* (c) Syscall arg count to debugger.*/ uintptr_t td_rb_list; /* (k) Robust list head. */ uintptr_t td_rbp_list; /* (k) Robust priv list head. */ uintptr_t td_rb_inact; /* (k) Current in-action mutex loc. */ #define td_endcopy td_pcb /* * Fields that must be manually set in fork1() or create_thread() * or already have been set in the allocator, constructor, etc. */ struct pcb *td_pcb; /* (k) Kernel VA of pcb and kstack. */ enum { TDS_INACTIVE = 0x0, TDS_INHIBITED, TDS_CAN_RUN, TDS_RUNQ, TDS_RUNNING } td_state; /* (t) thread state */ union { register_t tdu_retval[2]; off_t tdu_off; } td_uretoff; /* (k) Syscall aux returns. */ #define td_retval td_uretoff.tdu_retval u_int td_cowgen; /* (k) Generation of COW pointers. */ struct callout td_slpcallout; /* (h) Callout for sleep. */ struct trapframe *td_frame; /* (k) */ struct vm_object *td_kstack_obj;/* (a) Kstack object. */ vm_offset_t td_kstack; /* (a) Kernel VA of kstack. */ int td_kstack_pages; /* (a) Size of the kstack. */ volatile u_int td_critnest; /* (k*) Critical section nest level. */ struct mdthread td_md; /* (k) Any machine-dependent fields. */ struct kaudit_record *td_ar; /* (k) Active audit record, if any. */ struct lpohead td_lprof[2]; /* (a) lock profiling objects. */ struct kdtrace_thread *td_dtrace; /* (*) DTrace-specific data. */ int td_errno; /* Error returned by last syscall. */ struct vnet *td_vnet; /* (k) Effective vnet. */ const char *td_vnet_lpush; /* (k) Debugging vnet push / pop. */ struct trapframe *td_intr_frame;/* (k) Frame of the current irq */ struct proc *td_rfppwait_p; /* (k) The vforked child */ struct vm_page **td_ma; /* (k) uio pages held */ int td_ma_cnt; /* (k) size of *td_ma */ void *td_emuldata; /* Emulator state data */ int td_lastcpu; /* (t) Last cpu we were on. */ int td_oncpu; /* (t) Which cpu we are on. */ }; struct thread0_storage { struct thread t0st_thread; uint64_t t0st_sched[10]; }; struct mtx *thread_lock_block(struct thread *); void thread_lock_unblock(struct thread *, struct mtx *); void thread_lock_set(struct thread *, struct mtx *); #define THREAD_LOCK_ASSERT(td, type) \ do { \ struct mtx *__m = (td)->td_lock; \ if (__m != &blocked_lock) \ mtx_assert(__m, (type)); \ } while (0) #ifdef INVARIANTS #define THREAD_LOCKPTR_ASSERT(td, lock) \ do { \ struct mtx *__m = (td)->td_lock; \ KASSERT((__m == &blocked_lock || __m == (lock)), \ ("Thread %p lock %p does not match %p", td, __m, (lock))); \ } while (0) #define TD_LOCKS_INC(td) ((td)->td_locks++) #define TD_LOCKS_DEC(td) ((td)->td_locks--) #else #define THREAD_LOCKPTR_ASSERT(td, lock) #define TD_LOCKS_INC(td) #define TD_LOCKS_DEC(td) #endif /* * Flags kept in td_flags: * To change these you MUST have the scheduler lock. */ #define TDF_BORROWING 0x00000001 /* Thread is borrowing pri from another. */ #define TDF_INPANIC 0x00000002 /* Caused a panic, let it drive crashdump. */ #define TDF_INMEM 0x00000004 /* Thread's stack is in memory. */ #define TDF_SINTR 0x00000008 /* Sleep is interruptible. */ #define TDF_TIMEOUT 0x00000010 /* Timing out during sleep. */ #define TDF_IDLETD 0x00000020 /* This is a per-CPU idle thread. */ #define TDF_CANSWAP 0x00000040 /* Thread can be swapped. */ #define TDF_SLEEPABORT 0x00000080 /* sleepq_abort was called. */ #define TDF_KTH_SUSP 0x00000100 /* kthread is suspended */ #define TDF_ALLPROCSUSP 0x00000200 /* suspended by SINGLE_ALLPROC */ #define TDF_BOUNDARY 0x00000400 /* Thread suspended at user boundary */ #define TDF_ASTPENDING 0x00000800 /* Thread has some asynchronous events. */ #define TDF_TIMOFAIL 0x00001000 /* Timeout from sleep after we were awake. */ #define TDF_SBDRY 0x00002000 /* Stop only on usermode boundary. */ #define TDF_UPIBLOCKED 0x00004000 /* Thread blocked on user PI mutex. */ #define TDF_NEEDSUSPCHK 0x00008000 /* Thread may need to suspend. */ #define TDF_NEEDRESCHED 0x00010000 /* Thread needs to yield. */ #define TDF_NEEDSIGCHK 0x00020000 /* Thread may need signal delivery. */ #define TDF_NOLOAD 0x00040000 /* Ignore during load avg calculations. */ #define TDF_SERESTART 0x00080000 /* ERESTART on stop attempts. */ #define TDF_THRWAKEUP 0x00100000 /* Libthr thread must not suspend itself. */ #define TDF_SEINTR 0x00200000 /* EINTR on stop attempts. */ #define TDF_SWAPINREQ 0x00400000 /* Swapin request due to wakeup. */ #define TDF_UNUSED23 0x00800000 /* --available-- */ #define TDF_SCHED0 0x01000000 /* Reserved for scheduler private use */ #define TDF_SCHED1 0x02000000 /* Reserved for scheduler private use */ #define TDF_SCHED2 0x04000000 /* Reserved for scheduler private use */ #define TDF_SCHED3 0x08000000 /* Reserved for scheduler private use */ #define TDF_ALRMPEND 0x10000000 /* Pending SIGVTALRM needs to be posted. */ #define TDF_PROFPEND 0x20000000 /* Pending SIGPROF needs to be posted. */ #define TDF_MACPEND 0x40000000 /* AST-based MAC event pending. */ /* Userland debug flags */ #define TDB_SUSPEND 0x00000001 /* Thread is suspended by debugger */ #define TDB_XSIG 0x00000002 /* Thread is exchanging signal under trace */ #define TDB_USERWR 0x00000004 /* Debugger modified memory or registers */ #define TDB_SCE 0x00000008 /* Thread performs syscall enter */ #define TDB_SCX 0x00000010 /* Thread performs syscall exit */ #define TDB_EXEC 0x00000020 /* TDB_SCX from exec(2) family */ #define TDB_FORK 0x00000040 /* TDB_SCX from fork(2) that created new process */ #define TDB_STOPATFORK 0x00000080 /* Stop at the return from fork (child only) */ #define TDB_CHILD 0x00000100 /* New child indicator for ptrace() */ #define TDB_BORN 0x00000200 /* New LWP indicator for ptrace() */ #define TDB_EXIT 0x00000400 /* Exiting LWP indicator for ptrace() */ /* * "Private" flags kept in td_pflags: * These are only written by curthread and thus need no locking. */ #define TDP_OLDMASK 0x00000001 /* Need to restore mask after suspend. */ #define TDP_INKTR 0x00000002 /* Thread is currently in KTR code. */ #define TDP_INKTRACE 0x00000004 /* Thread is currently in KTRACE code. */ #define TDP_BUFNEED 0x00000008 /* Do not recurse into the buf flush */ #define TDP_COWINPROGRESS 0x00000010 /* Snapshot copy-on-write in progress. */ #define TDP_ALTSTACK 0x00000020 /* Have alternate signal stack. */ #define TDP_DEADLKTREAT 0x00000040 /* Lock acquisition - deadlock treatment. */ #define TDP_NOFAULTING 0x00000080 /* Do not handle page faults. */ #define TDP_UNUSED9 0x00000100 /* --available-- */ #define TDP_OWEUPC 0x00000200 /* Call addupc() at next AST. */ #define TDP_ITHREAD 0x00000400 /* Thread is an interrupt thread. */ #define TDP_SYNCIO 0x00000800 /* Local override, disable async i/o. */ #define TDP_SCHED1 0x00001000 /* Reserved for scheduler private use */ #define TDP_SCHED2 0x00002000 /* Reserved for scheduler private use */ #define TDP_SCHED3 0x00004000 /* Reserved for scheduler private use */ #define TDP_SCHED4 0x00008000 /* Reserved for scheduler private use */ #define TDP_GEOM 0x00010000 /* Settle GEOM before finishing syscall */ #define TDP_SOFTDEP 0x00020000 /* Stuck processing softdep worklist */ #define TDP_NORUNNINGBUF 0x00040000 /* Ignore runningbufspace check */ #define TDP_WAKEUP 0x00080000 /* Don't sleep in umtx cond_wait */ #define TDP_INBDFLUSH 0x00100000 /* Already in BO_BDFLUSH, do not recurse */ #define TDP_KTHREAD 0x00200000 /* This is an official kernel thread */ #define TDP_CALLCHAIN 0x00400000 /* Capture thread's callchain */ #define TDP_IGNSUSP 0x00800000 /* Permission to ignore the MNTK_SUSPEND* */ #define TDP_AUDITREC 0x01000000 /* Audit record pending on thread */ #define TDP_RFPPWAIT 0x02000000 /* Handle RFPPWAIT on syscall exit */ #define TDP_RESETSPUR 0x04000000 /* Reset spurious page fault history. */ #define TDP_NERRNO 0x08000000 /* Last errno is already in td_errno */ #define TDP_UIOHELD 0x10000000 /* Current uio has pages held in td_ma */ #define TDP_FORKING 0x20000000 /* Thread is being created through fork() */ #define TDP_EXECVMSPC 0x40000000 /* Execve destroyed old vmspace */ /* * Reasons that the current thread can not be run yet. * More than one may apply. */ #define TDI_SUSPENDED 0x0001 /* On suspension queue. */ #define TDI_SLEEPING 0x0002 /* Actually asleep! (tricky). */ #define TDI_SWAPPED 0x0004 /* Stack not in mem. Bad juju if run. */ #define TDI_LOCK 0x0008 /* Stopped on a lock. */ #define TDI_IWAIT 0x0010 /* Awaiting interrupt. */ #define TD_IS_SLEEPING(td) ((td)->td_inhibitors & TDI_SLEEPING) #define TD_ON_SLEEPQ(td) ((td)->td_wchan != NULL) #define TD_IS_SUSPENDED(td) ((td)->td_inhibitors & TDI_SUSPENDED) #define TD_IS_SWAPPED(td) ((td)->td_inhibitors & TDI_SWAPPED) #define TD_ON_LOCK(td) ((td)->td_inhibitors & TDI_LOCK) #define TD_AWAITING_INTR(td) ((td)->td_inhibitors & TDI_IWAIT) #define TD_IS_RUNNING(td) ((td)->td_state == TDS_RUNNING) #define TD_ON_RUNQ(td) ((td)->td_state == TDS_RUNQ) #define TD_CAN_RUN(td) ((td)->td_state == TDS_CAN_RUN) #define TD_IS_INHIBITED(td) ((td)->td_state == TDS_INHIBITED) #define TD_ON_UPILOCK(td) ((td)->td_flags & TDF_UPIBLOCKED) #define TD_IS_IDLETHREAD(td) ((td)->td_flags & TDF_IDLETD) #define TD_SET_INHIB(td, inhib) do { \ (td)->td_state = TDS_INHIBITED; \ (td)->td_inhibitors |= (inhib); \ } while (0) #define TD_CLR_INHIB(td, inhib) do { \ if (((td)->td_inhibitors & (inhib)) && \ (((td)->td_inhibitors &= ~(inhib)) == 0)) \ (td)->td_state = TDS_CAN_RUN; \ } while (0) #define TD_SET_SLEEPING(td) TD_SET_INHIB((td), TDI_SLEEPING) #define TD_SET_SWAPPED(td) TD_SET_INHIB((td), TDI_SWAPPED) #define TD_SET_LOCK(td) TD_SET_INHIB((td), TDI_LOCK) #define TD_SET_SUSPENDED(td) TD_SET_INHIB((td), TDI_SUSPENDED) #define TD_SET_IWAIT(td) TD_SET_INHIB((td), TDI_IWAIT) #define TD_SET_EXITING(td) TD_SET_INHIB((td), TDI_EXITING) #define TD_CLR_SLEEPING(td) TD_CLR_INHIB((td), TDI_SLEEPING) #define TD_CLR_SWAPPED(td) TD_CLR_INHIB((td), TDI_SWAPPED) #define TD_CLR_LOCK(td) TD_CLR_INHIB((td), TDI_LOCK) #define TD_CLR_SUSPENDED(td) TD_CLR_INHIB((td), TDI_SUSPENDED) #define TD_CLR_IWAIT(td) TD_CLR_INHIB((td), TDI_IWAIT) #define TD_SET_RUNNING(td) (td)->td_state = TDS_RUNNING #define TD_SET_RUNQ(td) (td)->td_state = TDS_RUNQ #define TD_SET_CAN_RUN(td) (td)->td_state = TDS_CAN_RUN /* * Process structure. */ struct proc { LIST_ENTRY(proc) p_list; /* (d) List of all processes. */ TAILQ_HEAD(, thread) p_threads; /* (c) all threads. */ struct mtx p_slock; /* process spin lock */ struct ucred *p_ucred; /* (c) Process owner's identity. */ struct filedesc *p_fd; /* (b) Open files. */ struct filedesc_to_leader *p_fdtol; /* (b) Tracking node */ struct pstats *p_stats; /* (b) Accounting/statistics (CPU). */ struct plimit *p_limit; /* (c) Resource limits. */ struct callout p_limco; /* (c) Limit callout handle */ struct sigacts *p_sigacts; /* (x) Signal actions, state (CPU). */ int p_flag; /* (c) P_* flags. */ int p_flag2; /* (c) P2_* flags. */ enum { PRS_NEW = 0, /* In creation */ PRS_NORMAL, /* threads can be run. */ PRS_ZOMBIE } p_state; /* (j/c) Process status. */ pid_t p_pid; /* (b) Process identifier. */ LIST_ENTRY(proc) p_hash; /* (d) Hash chain. */ LIST_ENTRY(proc) p_pglist; /* (g + e) List of processes in pgrp. */ struct proc *p_pptr; /* (c + e) Pointer to parent process. */ LIST_ENTRY(proc) p_sibling; /* (e) List of sibling processes. */ LIST_HEAD(, proc) p_children; /* (e) Pointer to list of children. */ struct proc *p_reaper; /* (e) My reaper. */ LIST_HEAD(, proc) p_reaplist; /* (e) List of my descendants (if I am reaper). */ LIST_ENTRY(proc) p_reapsibling; /* (e) List of siblings - descendants of the same reaper. */ struct mtx p_mtx; /* (n) Lock for this struct. */ struct mtx p_statmtx; /* Lock for the stats */ struct mtx p_itimmtx; /* Lock for the virt/prof timers */ struct mtx p_profmtx; /* Lock for the profiling */ struct ksiginfo *p_ksi; /* Locked by parent proc lock */ sigqueue_t p_sigqueue; /* (c) Sigs not delivered to a td. */ #define p_siglist p_sigqueue.sq_signals /* The following fields are all zeroed upon creation in fork. */ #define p_startzero p_oppid pid_t p_oppid; /* (c + e) Save ppid in ptrace. XXX */ struct vmspace *p_vmspace; /* (b) Address space. */ u_int p_swtick; /* (c) Tick when swapped in or out. */ u_int p_cowgen; /* (c) Generation of COW pointers. */ struct itimerval p_realtimer; /* (c) Alarm timer. */ struct rusage p_ru; /* (a) Exit information. */ struct rusage_ext p_rux; /* (cu) Internal resource usage. */ struct rusage_ext p_crux; /* (c) Internal child resource usage. */ int p_profthreads; /* (c) Num threads in addupc_task. */ volatile int p_exitthreads; /* (j) Number of threads exiting */ int p_traceflag; /* (o) Kernel trace points. */ struct vnode *p_tracevp; /* (c + o) Trace to vnode. */ struct ucred *p_tracecred; /* (o) Credentials to trace with. */ struct vnode *p_textvp; /* (b) Vnode of executable. */ u_int p_lock; /* (c) Proclock (prevent swap) count. */ struct sigiolst p_sigiolst; /* (c) List of sigio sources. */ int p_sigparent; /* (c) Signal to parent on exit. */ int p_sig; /* (n) For core dump/debugger XXX. */ u_long p_code; /* (n) For core dump/debugger XXX. */ u_int p_stops; /* (c) Stop event bitmask. */ u_int p_stype; /* (c) Stop event type. */ char p_step; /* (c) Process is stopped. */ u_char p_pfsflags; /* (c) Procfs flags. */ struct nlminfo *p_nlminfo; /* (?) Only used by/for lockd. */ struct kaioinfo *p_aioinfo; /* (y) ASYNC I/O info. */ struct thread *p_singlethread;/* (c + j) If single threading this is it */ int p_suspcount; /* (j) Num threads in suspended mode. */ struct thread *p_xthread; /* (c) Trap thread */ int p_boundary_count;/* (j) Num threads at user boundary */ int p_pendingcnt; /* how many signals are pending */ struct itimers *p_itimers; /* (c) POSIX interval timers. */ struct procdesc *p_procdesc; /* (e) Process descriptor, if any. */ u_int p_treeflag; /* (e) P_TREE flags */ int p_pendingexits; /* (c) Count of pending thread exits. */ struct filemon *p_filemon; /* (c) filemon-specific data. */ /* End area that is zeroed on creation. */ #define p_endzero p_magic /* The following fields are all copied upon creation in fork. */ #define p_startcopy p_endzero u_int p_magic; /* (b) Magic number. */ int p_osrel; /* (x) osreldate for the binary (from ELF note, if any) */ char p_comm[MAXCOMLEN + 1]; /* (b) Process name. */ struct sysentvec *p_sysent; /* (b) Syscall dispatch info. */ struct pargs *p_args; /* (c) Process arguments. */ rlim_t p_cpulimit; /* (c) Current CPU limit in seconds. */ signed char p_nice; /* (c) Process "nice" value. */ int p_fibnum; /* in this routing domain XXX MRT */ pid_t p_reapsubtree; /* (e) Pid of the direct child of the reaper which spawned our subtree. */ u_int p_xexit; /* (c) Exit code. */ u_int p_xsig; /* (c) Stop/kill sig. */ /* End area that is copied on creation. */ #define p_endcopy p_xsig struct pgrp *p_pgrp; /* (c + e) Pointer to process group. */ - struct knlist p_klist; /* (c) Knotes attached to this proc. */ + struct knlist *p_klist; /* (c) Knotes attached to this proc. */ int p_numthreads; /* (c) Number of threads. */ struct mdproc p_md; /* Any machine-dependent fields. */ struct callout p_itcallout; /* (h + c) Interval timer callout. */ u_short p_acflag; /* (c) Accounting flags. */ struct proc *p_peers; /* (r) */ struct proc *p_leader; /* (b) */ void *p_emuldata; /* (c) Emulator state data. */ struct label *p_label; /* (*) Proc (not subject) MAC label. */ STAILQ_HEAD(, ktr_request) p_ktr; /* (o) KTR event queue. */ LIST_HEAD(, mqueue_notifier) p_mqnotifier; /* (c) mqueue notifiers.*/ struct kdtrace_proc *p_dtrace; /* (*) DTrace-specific data. */ struct cv p_pwait; /* (*) wait cv for exit/exec. */ struct cv p_dbgwait; /* (*) wait cv for debugger attach after fork. */ uint64_t p_prev_runtime; /* (c) Resource usage accounting. */ struct racct *p_racct; /* (b) Resource accounting. */ int p_throttled; /* (c) Flag for racct pcpu throttling */ struct vm_domain_policy p_vm_dom_policy; /* (c) process default VM domain, or -1 */ /* * An orphan is the child that has beed re-parented to the * debugger as a result of attaching to it. Need to keep * track of them for parent to be able to collect the exit * status of what used to be children. */ LIST_ENTRY(proc) p_orphan; /* (e) List of orphan processes. */ LIST_HEAD(, proc) p_orphans; /* (e) Pointer to list of orphans. */ }; #define p_session p_pgrp->pg_session #define p_pgid p_pgrp->pg_id #define NOCPU (-1) /* For when we aren't on a CPU. */ #define NOCPU_OLD (255) #define MAXCPU_OLD (254) #define PROC_SLOCK(p) mtx_lock_spin(&(p)->p_slock) #define PROC_SUNLOCK(p) mtx_unlock_spin(&(p)->p_slock) #define PROC_SLOCK_ASSERT(p, type) mtx_assert(&(p)->p_slock, (type)) #define PROC_STATLOCK(p) mtx_lock_spin(&(p)->p_statmtx) #define PROC_STATUNLOCK(p) mtx_unlock_spin(&(p)->p_statmtx) #define PROC_STATLOCK_ASSERT(p, type) mtx_assert(&(p)->p_statmtx, (type)) #define PROC_ITIMLOCK(p) mtx_lock_spin(&(p)->p_itimmtx) #define PROC_ITIMUNLOCK(p) mtx_unlock_spin(&(p)->p_itimmtx) #define PROC_ITIMLOCK_ASSERT(p, type) mtx_assert(&(p)->p_itimmtx, (type)) #define PROC_PROFLOCK(p) mtx_lock_spin(&(p)->p_profmtx) #define PROC_PROFUNLOCK(p) mtx_unlock_spin(&(p)->p_profmtx) #define PROC_PROFLOCK_ASSERT(p, type) mtx_assert(&(p)->p_profmtx, (type)) /* These flags are kept in p_flag. */ #define P_ADVLOCK 0x00001 /* Process may hold a POSIX advisory lock. */ #define P_CONTROLT 0x00002 /* Has a controlling terminal. */ #define P_KPROC 0x00004 /* Kernel process. */ #define P_FOLLOWFORK 0x00008 /* Attach parent debugger to children. */ #define P_PPWAIT 0x00010 /* Parent is waiting for child to exec/exit. */ #define P_PROFIL 0x00020 /* Has started profiling. */ #define P_STOPPROF 0x00040 /* Has thread requesting to stop profiling. */ #define P_HADTHREADS 0x00080 /* Has had threads (no cleanup shortcuts) */ #define P_SUGID 0x00100 /* Had set id privileges since last exec. */ #define P_SYSTEM 0x00200 /* System proc: no sigs, stats or swapping. */ #define P_SINGLE_EXIT 0x00400 /* Threads suspending should exit, not wait. */ #define P_TRACED 0x00800 /* Debugged process being traced. */ #define P_WAITED 0x01000 /* Someone is waiting for us. */ #define P_WEXIT 0x02000 /* Working on exiting. */ #define P_EXEC 0x04000 /* Process called exec. */ #define P_WKILLED 0x08000 /* Killed, go to kernel/user boundary ASAP. */ #define P_CONTINUED 0x10000 /* Proc has continued from a stopped state. */ #define P_STOPPED_SIG 0x20000 /* Stopped due to SIGSTOP/SIGTSTP. */ #define P_STOPPED_TRACE 0x40000 /* Stopped because of tracing. */ #define P_STOPPED_SINGLE 0x80000 /* Only 1 thread can continue (not to user). */ #define P_PROTECTED 0x100000 /* Do not kill on memory overcommit. */ #define P_SIGEVENT 0x200000 /* Process pending signals changed. */ #define P_SINGLE_BOUNDARY 0x400000 /* Threads should suspend at user boundary. */ #define P_HWPMC 0x800000 /* Process is using HWPMCs */ #define P_JAILED 0x1000000 /* Process is in jail. */ #define P_TOTAL_STOP 0x2000000 /* Stopped in stop_all_proc. */ #define P_INEXEC 0x4000000 /* Process is in execve(). */ #define P_STATCHILD 0x8000000 /* Child process stopped or exited. */ #define P_INMEM 0x10000000 /* Loaded into memory. */ #define P_SWAPPINGOUT 0x20000000 /* Process is being swapped out. */ #define P_SWAPPINGIN 0x40000000 /* Process is being swapped in. */ #define P_PPTRACE 0x80000000 /* PT_TRACEME by vforked child. */ #define P_STOPPED (P_STOPPED_SIG|P_STOPPED_SINGLE|P_STOPPED_TRACE) #define P_SHOULDSTOP(p) ((p)->p_flag & P_STOPPED) #define P_KILLED(p) ((p)->p_flag & P_WKILLED) /* These flags are kept in p_flag2. */ #define P2_INHERIT_PROTECTED 0x00000001 /* New children get P_PROTECTED. */ #define P2_NOTRACE 0x00000002 /* No ptrace(2) attach or coredumps. */ #define P2_NOTRACE_EXEC 0x00000004 /* Keep P2_NOPTRACE on exec(2). */ #define P2_AST_SU 0x00000008 /* Handles SU ast for kthreads. */ #define P2_LWP_EVENTS 0x00000010 /* Report LWP events via ptrace(2). */ /* Flags protected by proctree_lock, kept in p_treeflags. */ #define P_TREE_ORPHANED 0x00000001 /* Reparented, on orphan list */ #define P_TREE_FIRST_ORPHAN 0x00000002 /* First element of orphan list */ #define P_TREE_REAPER 0x00000004 /* Reaper of subtree */ /* * These were process status values (p_stat), now they are only used in * legacy conversion code. */ #define SIDL 1 /* Process being created by fork. */ #define SRUN 2 /* Currently runnable. */ #define SSLEEP 3 /* Sleeping on an address. */ #define SSTOP 4 /* Process debugging or suspension. */ #define SZOMB 5 /* Awaiting collection by parent. */ #define SWAIT 6 /* Waiting for interrupt. */ #define SLOCK 7 /* Blocked on a lock. */ #define P_MAGIC 0xbeefface #ifdef _KERNEL /* Types and flags for mi_switch(). */ #define SW_TYPE_MASK 0xff /* First 8 bits are switch type */ #define SWT_NONE 0 /* Unspecified switch. */ #define SWT_PREEMPT 1 /* Switching due to preemption. */ #define SWT_OWEPREEMPT 2 /* Switching due to opepreempt. */ #define SWT_TURNSTILE 3 /* Turnstile contention. */ #define SWT_SLEEPQ 4 /* Sleepq wait. */ #define SWT_SLEEPQTIMO 5 /* Sleepq timeout wait. */ #define SWT_RELINQUISH 6 /* yield call. */ #define SWT_NEEDRESCHED 7 /* NEEDRESCHED was set. */ #define SWT_IDLE 8 /* Switching from the idle thread. */ #define SWT_IWAIT 9 /* Waiting for interrupts. */ #define SWT_SUSPEND 10 /* Thread suspended. */ #define SWT_REMOTEPREEMPT 11 /* Remote processor preempted. */ #define SWT_REMOTEWAKEIDLE 12 /* Remote processor preempted idle. */ #define SWT_COUNT 13 /* Number of switch types. */ /* Flags */ #define SW_VOL 0x0100 /* Voluntary switch. */ #define SW_INVOL 0x0200 /* Involuntary switch. */ #define SW_PREEMPT 0x0400 /* The invol switch is a preemption */ /* How values for thread_single(). */ #define SINGLE_NO_EXIT 0 #define SINGLE_EXIT 1 #define SINGLE_BOUNDARY 2 #define SINGLE_ALLPROC 3 #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_PARGS); MALLOC_DECLARE(M_PGRP); MALLOC_DECLARE(M_SESSION); MALLOC_DECLARE(M_SUBPROC); #endif #define FOREACH_PROC_IN_SYSTEM(p) \ LIST_FOREACH((p), &allproc, p_list) #define FOREACH_THREAD_IN_PROC(p, td) \ TAILQ_FOREACH((td), &(p)->p_threads, td_plist) #define FIRST_THREAD_IN_PROC(p) TAILQ_FIRST(&(p)->p_threads) /* * We use process IDs <= pid_max <= PID_MAX; PID_MAX + 1 must also fit * in a pid_t, as it is used to represent "no process group". */ #define PID_MAX 99999 #define NO_PID 100000 extern pid_t pid_max; #define SESS_LEADER(p) ((p)->p_session->s_leader == (p)) #define STOPEVENT(p, e, v) do { \ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, \ "checking stopevent %d", (e)); \ if ((p)->p_stops & (e)) { \ PROC_LOCK(p); \ stopevent((p), (e), (v)); \ PROC_UNLOCK(p); \ } \ } while (0) #define _STOPEVENT(p, e, v) do { \ PROC_LOCK_ASSERT(p, MA_OWNED); \ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, &p->p_mtx.lock_object, \ "checking stopevent %d", (e)); \ if ((p)->p_stops & (e)) \ stopevent((p), (e), (v)); \ } while (0) /* Lock and unlock a process. */ #define PROC_LOCK(p) mtx_lock(&(p)->p_mtx) #define PROC_TRYLOCK(p) mtx_trylock(&(p)->p_mtx) #define PROC_UNLOCK(p) mtx_unlock(&(p)->p_mtx) #define PROC_LOCKED(p) mtx_owned(&(p)->p_mtx) #define PROC_LOCK_ASSERT(p, type) mtx_assert(&(p)->p_mtx, (type)) /* Lock and unlock a process group. */ #define PGRP_LOCK(pg) mtx_lock(&(pg)->pg_mtx) #define PGRP_UNLOCK(pg) mtx_unlock(&(pg)->pg_mtx) #define PGRP_LOCKED(pg) mtx_owned(&(pg)->pg_mtx) #define PGRP_LOCK_ASSERT(pg, type) mtx_assert(&(pg)->pg_mtx, (type)) #define PGRP_LOCK_PGSIGNAL(pg) do { \ if ((pg) != NULL) \ PGRP_LOCK(pg); \ } while (0) #define PGRP_UNLOCK_PGSIGNAL(pg) do { \ if ((pg) != NULL) \ PGRP_UNLOCK(pg); \ } while (0) /* Lock and unlock a session. */ #define SESS_LOCK(s) mtx_lock(&(s)->s_mtx) #define SESS_UNLOCK(s) mtx_unlock(&(s)->s_mtx) #define SESS_LOCKED(s) mtx_owned(&(s)->s_mtx) #define SESS_LOCK_ASSERT(s, type) mtx_assert(&(s)->s_mtx, (type)) /* * Non-zero p_lock ensures that: * - exit1() is not performed until p_lock reaches zero; * - the process' threads stack are not swapped out if they are currently * not (P_INMEM). * * PHOLD() asserts that the process (except the current process) is * not exiting, increments p_lock and swaps threads stacks into memory, * if needed. * _PHOLD() is same as PHOLD(), it takes the process locked. * _PHOLD_LITE() also takes the process locked, but comparing with * _PHOLD(), it only guarantees that exit1() is not executed, * faultin() is not called. */ #define PHOLD(p) do { \ PROC_LOCK(p); \ _PHOLD(p); \ PROC_UNLOCK(p); \ } while (0) #define _PHOLD(p) do { \ PROC_LOCK_ASSERT((p), MA_OWNED); \ KASSERT(!((p)->p_flag & P_WEXIT) || (p) == curproc, \ ("PHOLD of exiting process %p", p)); \ (p)->p_lock++; \ if (((p)->p_flag & P_INMEM) == 0) \ faultin((p)); \ } while (0) #define _PHOLD_LITE(p) do { \ PROC_LOCK_ASSERT((p), MA_OWNED); \ KASSERT(!((p)->p_flag & P_WEXIT) || (p) == curproc, \ ("PHOLD of exiting process %p", p)); \ (p)->p_lock++; \ } while (0) #define PROC_ASSERT_HELD(p) do { \ KASSERT((p)->p_lock > 0, ("process %p not held", p)); \ } while (0) #define PRELE(p) do { \ PROC_LOCK((p)); \ _PRELE((p)); \ PROC_UNLOCK((p)); \ } while (0) #define _PRELE(p) do { \ PROC_LOCK_ASSERT((p), MA_OWNED); \ PROC_ASSERT_HELD(p); \ (--(p)->p_lock); \ if (((p)->p_flag & P_WEXIT) && (p)->p_lock == 0) \ wakeup(&(p)->p_lock); \ } while (0) #define PROC_ASSERT_NOT_HELD(p) do { \ KASSERT((p)->p_lock == 0, ("process %p held", p)); \ } while (0) #define PROC_UPDATE_COW(p) do { \ PROC_LOCK_ASSERT((p), MA_OWNED); \ (p)->p_cowgen++; \ } while (0) /* Check whether a thread is safe to be swapped out. */ #define thread_safetoswapout(td) ((td)->td_flags & TDF_CANSWAP) /* Control whether or not it is safe for curthread to sleep. */ #define THREAD_NO_SLEEPING() ((curthread)->td_no_sleeping++) #define THREAD_SLEEPING_OK() ((curthread)->td_no_sleeping--) #define THREAD_CAN_SLEEP() ((curthread)->td_no_sleeping == 0) #define PIDHASH(pid) (&pidhashtbl[(pid) & pidhash]) extern LIST_HEAD(pidhashhead, proc) *pidhashtbl; extern u_long pidhash; #define TIDHASH(tid) (&tidhashtbl[(tid) & tidhash]) extern LIST_HEAD(tidhashhead, thread) *tidhashtbl; extern u_long tidhash; extern struct rwlock tidhash_lock; #define PGRPHASH(pgid) (&pgrphashtbl[(pgid) & pgrphash]) extern LIST_HEAD(pgrphashhead, pgrp) *pgrphashtbl; extern u_long pgrphash; extern struct sx allproc_lock; extern int allproc_gen; extern struct sx proctree_lock; extern struct mtx ppeers_lock; extern struct proc proc0; /* Process slot for swapper. */ extern struct thread0_storage thread0_st; /* Primary thread in proc0. */ #define thread0 (thread0_st.t0st_thread) extern struct vmspace vmspace0; /* VM space for proc0. */ extern int hogticks; /* Limit on kernel cpu hogs. */ extern int lastpid; extern int nprocs, maxproc; /* Current and max number of procs. */ extern int maxprocperuid; /* Max procs per uid. */ extern u_long ps_arg_cache_limit; LIST_HEAD(proclist, proc); TAILQ_HEAD(procqueue, proc); TAILQ_HEAD(threadqueue, thread); extern struct proclist allproc; /* List of all processes. */ extern struct proclist zombproc; /* List of zombie processes. */ extern struct proc *initproc, *pageproc; /* Process slots for init, pager. */ extern struct uma_zone *proc_zone; struct proc *pfind(pid_t); /* Find process by id. */ struct proc *pfind_locked(pid_t pid); struct pgrp *pgfind(pid_t); /* Find process group by id. */ struct proc *zpfind(pid_t); /* Find zombie process by id. */ struct fork_req { int fr_flags; int fr_pages; int *fr_pidp; struct proc **fr_procp; int *fr_pd_fd; int fr_pd_flags; struct filecaps *fr_pd_fcaps; }; /* * pget() flags. */ #define PGET_HOLD 0x00001 /* Hold the process. */ #define PGET_CANSEE 0x00002 /* Check against p_cansee(). */ #define PGET_CANDEBUG 0x00004 /* Check against p_candebug(). */ #define PGET_ISCURRENT 0x00008 /* Check that the found process is current. */ #define PGET_NOTWEXIT 0x00010 /* Check that the process is not in P_WEXIT. */ #define PGET_NOTINEXEC 0x00020 /* Check that the process is not in P_INEXEC. */ #define PGET_NOTID 0x00040 /* Do not assume tid if pid > PID_MAX. */ #define PGET_WANTREAD (PGET_HOLD | PGET_CANDEBUG | PGET_NOTWEXIT) int pget(pid_t pid, int flags, struct proc **pp); void ast(struct trapframe *framep); struct thread *choosethread(void); int cr_cansignal(struct ucred *cred, struct proc *proc, int signum); int enterpgrp(struct proc *p, pid_t pgid, struct pgrp *pgrp, struct session *sess); int enterthispgrp(struct proc *p, struct pgrp *pgrp); void faultin(struct proc *p); void fixjobc(struct proc *p, struct pgrp *pgrp, int entering); int fork1(struct thread *, struct fork_req *); void fork_exit(void (*)(void *, struct trapframe *), void *, struct trapframe *); void fork_return(struct thread *, struct trapframe *); int inferior(struct proc *p); void kern_yield(int); void kick_proc0(void); void killjobc(void); int leavepgrp(struct proc *p); int maybe_preempt(struct thread *td); void maybe_yield(void); void mi_switch(int flags, struct thread *newtd); int p_candebug(struct thread *td, struct proc *p); int p_cansee(struct thread *td, struct proc *p); int p_cansched(struct thread *td, struct proc *p); int p_cansignal(struct thread *td, struct proc *p, int signum); int p_canwait(struct thread *td, struct proc *p); struct pargs *pargs_alloc(int len); void pargs_drop(struct pargs *pa); void pargs_hold(struct pargs *pa); int proc_getargv(struct thread *td, struct proc *p, struct sbuf *sb); int proc_getauxv(struct thread *td, struct proc *p, struct sbuf *sb); int proc_getenvv(struct thread *td, struct proc *p, struct sbuf *sb); void procinit(void); void proc_linkup0(struct proc *p, struct thread *td); void proc_linkup(struct proc *p, struct thread *td); struct proc *proc_realparent(struct proc *child); void proc_reap(struct thread *td, struct proc *p, int *status, int options); void proc_reparent(struct proc *child, struct proc *newparent); struct pstats *pstats_alloc(void); void pstats_fork(struct pstats *src, struct pstats *dst); void pstats_free(struct pstats *ps); void reaper_abandon_children(struct proc *p, bool exiting); int securelevel_ge(struct ucred *cr, int level); int securelevel_gt(struct ucred *cr, int level); void sess_hold(struct session *); void sess_release(struct session *); int setrunnable(struct thread *); void setsugid(struct proc *p); int should_yield(void); int sigonstack(size_t sp); void stopevent(struct proc *, u_int, u_int); struct thread *tdfind(lwpid_t, pid_t); void threadinit(void); void tidhash_add(struct thread *); void tidhash_remove(struct thread *); void cpu_idle(int); int cpu_idle_wakeup(int); extern void (*cpu_idle_hook)(sbintime_t); /* Hook to machdep CPU idler. */ void cpu_switch(struct thread *, struct thread *, struct mtx *); void cpu_throw(struct thread *, struct thread *) __dead2; void unsleep(struct thread *); void userret(struct thread *, struct trapframe *); void cpu_exit(struct thread *); void exit1(struct thread *, int, int) __dead2; void cpu_copy_thread(struct thread *td, struct thread *td0); int cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa); void cpu_fork(struct thread *, struct proc *, struct thread *, int); void cpu_fork_kthread_handler(struct thread *, void (*)(void *), void *); void cpu_set_syscall_retval(struct thread *, int); void cpu_set_upcall(struct thread *, void (*)(void *), void *, stack_t *); int cpu_set_user_tls(struct thread *, void *tls_base); void cpu_thread_alloc(struct thread *); void cpu_thread_clean(struct thread *); void cpu_thread_exit(struct thread *); void cpu_thread_free(struct thread *); void cpu_thread_swapin(struct thread *); void cpu_thread_swapout(struct thread *); struct thread *thread_alloc(int pages); int thread_alloc_stack(struct thread *, int pages); void thread_cow_get_proc(struct thread *newtd, struct proc *p); void thread_cow_get(struct thread *newtd, struct thread *td); void thread_cow_free(struct thread *td); void thread_cow_update(struct thread *td); int thread_create(struct thread *td, struct rtprio *rtp, int (*initialize_thread)(struct thread *, void *), void *thunk); void thread_exit(void) __dead2; void thread_free(struct thread *td); void thread_link(struct thread *td, struct proc *p); void thread_reap(void); int thread_single(struct proc *p, int how); void thread_single_end(struct proc *p, int how); void thread_stash(struct thread *td); void thread_stopped(struct proc *p); void childproc_stopped(struct proc *child, int reason); void childproc_continued(struct proc *child); void childproc_exited(struct proc *child); int thread_suspend_check(int how); bool thread_suspend_check_needed(void); void thread_suspend_switch(struct thread *, struct proc *p); void thread_suspend_one(struct thread *td); void thread_unlink(struct thread *td); void thread_unsuspend(struct proc *p); void thread_wait(struct proc *p); struct thread *thread_find(struct proc *p, lwpid_t tid); void stop_all_proc(void); void resume_all_proc(void); static __inline int curthread_pflags_set(int flags) { struct thread *td; int save; td = curthread; save = ~flags | (td->td_pflags & flags); td->td_pflags |= flags; return (save); } static __inline void curthread_pflags_restore(int save) { curthread->td_pflags &= save; } static __inline __pure2 struct td_sched * td_get_sched(struct thread *td) { return ((struct td_sched *)&td[1]); } #endif /* _KERNEL */ #endif /* !_SYS_PROC_H_ */ Index: projects/vnet/sys/vm/vm_fault.c =================================================================== --- projects/vnet/sys/vm/vm_fault.c (revision 302276) +++ projects/vnet/sys/vm/vm_fault.c (revision 302277) @@ -1,1450 +1,1457 @@ /*- * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_fault.c 8.4 (Berkeley) 1/12/94 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * Page fault handling module. */ #include __FBSDID("$FreeBSD$"); #include "opt_ktrace.h" #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include #include #include #include #include #include #include #include #include #include #include #define PFBAK 4 #define PFFOR 4 #define VM_FAULT_READ_DEFAULT (1 + VM_FAULT_READ_AHEAD_INIT) #define VM_FAULT_READ_MAX (1 + VM_FAULT_READ_AHEAD_MAX) #define VM_FAULT_DONTNEED_MIN 1048576 struct faultstate { vm_page_t m; vm_object_t object; vm_pindex_t pindex; vm_page_t first_m; vm_object_t first_object; vm_pindex_t first_pindex; vm_map_t map; vm_map_entry_t entry; int lookup_still_valid; struct vnode *vp; }; static void vm_fault_dontneed(const struct faultstate *fs, vm_offset_t vaddr, int ahead); static void vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra, int backward, int forward); static inline void release_page(struct faultstate *fs) { vm_page_xunbusy(fs->m); vm_page_lock(fs->m); vm_page_deactivate(fs->m); vm_page_unlock(fs->m); fs->m = NULL; } static inline void unlock_map(struct faultstate *fs) { if (fs->lookup_still_valid) { vm_map_lookup_done(fs->map, fs->entry); fs->lookup_still_valid = FALSE; } } static void unlock_and_deallocate(struct faultstate *fs) { vm_object_pip_wakeup(fs->object); VM_OBJECT_WUNLOCK(fs->object); if (fs->object != fs->first_object) { VM_OBJECT_WLOCK(fs->first_object); vm_page_lock(fs->first_m); vm_page_free(fs->first_m); vm_page_unlock(fs->first_m); vm_object_pip_wakeup(fs->first_object); VM_OBJECT_WUNLOCK(fs->first_object); fs->first_m = NULL; } vm_object_deallocate(fs->first_object); unlock_map(fs); if (fs->vp != NULL) { vput(fs->vp); fs->vp = NULL; } } static void vm_fault_dirty(vm_map_entry_t entry, vm_page_t m, vm_prot_t prot, vm_prot_t fault_type, int fault_flags, boolean_t set_wd) { boolean_t need_dirty; if (((prot & VM_PROT_WRITE) == 0 && (fault_flags & VM_FAULT_DIRTY) == 0) || (m->oflags & VPO_UNMANAGED) != 0) return; VM_OBJECT_ASSERT_LOCKED(m->object); need_dirty = ((fault_type & VM_PROT_WRITE) != 0 && (fault_flags & VM_FAULT_WIRE) == 0) || (fault_flags & VM_FAULT_DIRTY) != 0; if (set_wd) vm_object_set_writeable_dirty(m->object); else /* * If two callers of vm_fault_dirty() with set_wd == * FALSE, one for the map entry with MAP_ENTRY_NOSYNC * flag set, other with flag clear, race, it is * possible for the no-NOSYNC thread to see m->dirty * != 0 and not clear VPO_NOSYNC. Take vm_page lock * around manipulation of VPO_NOSYNC and * vm_page_dirty() call, to avoid the race and keep * m->oflags consistent. */ vm_page_lock(m); /* * If this is a NOSYNC mmap we do not want to set VPO_NOSYNC * if the page is already dirty to prevent data written with * the expectation of being synced from not being synced. * Likewise if this entry does not request NOSYNC then make * sure the page isn't marked NOSYNC. Applications sharing * data should use the same flags to avoid ping ponging. */ if ((entry->eflags & MAP_ENTRY_NOSYNC) != 0) { if (m->dirty == 0) { m->oflags |= VPO_NOSYNC; } } else { m->oflags &= ~VPO_NOSYNC; } /* * If the fault is a write, we know that this page is being * written NOW so dirty it explicitly to save on * pmap_is_modified() calls later. * * Also tell the backing pager, if any, that it should remove * any swap backing since the page is now dirty. */ if (need_dirty) vm_page_dirty(m); if (!set_wd) vm_page_unlock(m); if (need_dirty) vm_pager_page_unswapped(m); } /* * vm_fault: * * Handle a page fault occurring at the given address, * requiring the given permissions, in the map specified. * If successful, the page is inserted into the * associated physical map. * * NOTE: the given address should be truncated to the * proper page address. * * KERN_SUCCESS is returned if the page fault is handled; otherwise, * a standard error specifying why the fault is fatal is returned. * * The map in question must be referenced, and remains so. * Caller may hold no locks. */ int vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags) { struct thread *td; int result; td = curthread; if ((td->td_pflags & TDP_NOFAULTING) != 0) return (KERN_PROTECTION_FAILURE); #ifdef KTRACE if (map != kernel_map && KTRPOINT(td, KTR_FAULT)) ktrfault(vaddr, fault_type); #endif result = vm_fault_hold(map, trunc_page(vaddr), fault_type, fault_flags, NULL); #ifdef KTRACE if (map != kernel_map && KTRPOINT(td, KTR_FAULTEND)) ktrfaultend(result); #endif return (result); } int vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags, vm_page_t *m_hold) { vm_prot_t prot; int alloc_req, era, faultcount, nera, result; boolean_t growstack, is_first_object_locked, wired; int map_generation; vm_object_t next_object; int hardfault; struct faultstate fs; struct vnode *vp; vm_page_t m; - int ahead, behind, cluster_offset, error, locked; + int ahead, behind, cluster_offset, dead, error, locked; hardfault = 0; growstack = TRUE; PCPU_INC(cnt.v_vm_faults); fs.vp = NULL; faultcount = 0; RetryFault:; /* * Find the backing store object and offset into it to begin the * search. */ fs.map = map; result = vm_map_lookup(&fs.map, vaddr, fault_type, &fs.entry, &fs.first_object, &fs.first_pindex, &prot, &wired); if (result != KERN_SUCCESS) { if (growstack && result == KERN_INVALID_ADDRESS && map != kernel_map) { result = vm_map_growstack(curproc, vaddr); if (result != KERN_SUCCESS) return (KERN_FAILURE); growstack = FALSE; goto RetryFault; } return (result); } map_generation = fs.map->timestamp; if (fs.entry->eflags & MAP_ENTRY_NOFAULT) { panic("vm_fault: fault on nofault entry, addr: %lx", (u_long)vaddr); } if (fs.entry->eflags & MAP_ENTRY_IN_TRANSITION && fs.entry->wiring_thread != curthread) { vm_map_unlock_read(fs.map); vm_map_lock(fs.map); if (vm_map_lookup_entry(fs.map, vaddr, &fs.entry) && (fs.entry->eflags & MAP_ENTRY_IN_TRANSITION)) { if (fs.vp != NULL) { vput(fs.vp); fs.vp = NULL; } fs.entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; vm_map_unlock_and_wait(fs.map, 0); } else vm_map_unlock(fs.map); goto RetryFault; } if (wired) fault_type = prot | (fault_type & VM_PROT_COPY); else KASSERT((fault_flags & VM_FAULT_WIRE) == 0, ("!wired && VM_FAULT_WIRE")); if (fs.vp == NULL /* avoid locked vnode leak */ && (fault_flags & (VM_FAULT_WIRE | VM_FAULT_DIRTY)) == 0 && /* avoid calling vm_object_set_writeable_dirty() */ ((prot & VM_PROT_WRITE) == 0 || (fs.first_object->type != OBJT_VNODE && (fs.first_object->flags & OBJ_TMPFS_NODE) == 0) || (fs.first_object->flags & OBJ_MIGHTBEDIRTY) != 0)) { VM_OBJECT_RLOCK(fs.first_object); if ((prot & VM_PROT_WRITE) != 0 && (fs.first_object->type == OBJT_VNODE || (fs.first_object->flags & OBJ_TMPFS_NODE) != 0) && (fs.first_object->flags & OBJ_MIGHTBEDIRTY) == 0) goto fast_failed; m = vm_page_lookup(fs.first_object, fs.first_pindex); /* A busy page can be mapped for read|execute access. */ if (m == NULL || ((prot & VM_PROT_WRITE) != 0 && vm_page_busied(m)) || m->valid != VM_PAGE_BITS_ALL) goto fast_failed; result = pmap_enter(fs.map->pmap, vaddr, m, prot, fault_type | PMAP_ENTER_NOSLEEP | (wired ? PMAP_ENTER_WIRED : 0), 0); if (result != KERN_SUCCESS) goto fast_failed; if (m_hold != NULL) { *m_hold = m; vm_page_lock(m); vm_page_hold(m); vm_page_unlock(m); } vm_fault_dirty(fs.entry, m, prot, fault_type, fault_flags, FALSE); VM_OBJECT_RUNLOCK(fs.first_object); if (!wired) vm_fault_prefault(&fs, vaddr, PFBAK, PFFOR); vm_map_lookup_done(fs.map, fs.entry); curthread->td_ru.ru_minflt++; return (KERN_SUCCESS); fast_failed: if (!VM_OBJECT_TRYUPGRADE(fs.first_object)) { VM_OBJECT_RUNLOCK(fs.first_object); VM_OBJECT_WLOCK(fs.first_object); } } else { VM_OBJECT_WLOCK(fs.first_object); } /* * Make a reference to this object to prevent its disposal while we * are messing with it. Once we have the reference, the map is free * to be diddled. Since objects reference their shadows (and copies), * they will stay around as well. * * Bump the paging-in-progress count to prevent size changes (e.g. * truncation operations) during I/O. This must be done after * obtaining the vnode lock in order to avoid possible deadlocks. */ vm_object_reference_locked(fs.first_object); vm_object_pip_add(fs.first_object, 1); fs.lookup_still_valid = TRUE; fs.first_m = NULL; /* * Search for the page at object/offset. */ fs.object = fs.first_object; fs.pindex = fs.first_pindex; while (TRUE) { /* - * If the object is dead, we stop here + * If the object is marked for imminent termination, + * we retry here, since the collapse pass has raced + * with us. Otherwise, if we see terminally dead + * object, return fail. */ - if (fs.object->flags & OBJ_DEAD) { + if ((fs.object->flags & OBJ_DEAD) != 0) { + dead = fs.object->type == OBJT_DEAD; unlock_and_deallocate(&fs); - return (KERN_PROTECTION_FAILURE); + if (dead) + return (KERN_PROTECTION_FAILURE); + pause("vmf_de", 1); + goto RetryFault; } /* * See if page is resident */ fs.m = vm_page_lookup(fs.object, fs.pindex); if (fs.m != NULL) { /* * Wait/Retry if the page is busy. We have to do this * if the page is either exclusive or shared busy * because the vm_pager may be using read busy for * pageouts (and even pageins if it is the vnode * pager), and we could end up trying to pagein and * pageout the same page simultaneously. * * We can theoretically allow the busy case on a read * fault if the page is marked valid, but since such * pages are typically already pmap'd, putting that * special case in might be more effort then it is * worth. We cannot under any circumstances mess * around with a shared busied page except, perhaps, * to pmap it. */ if (vm_page_busied(fs.m)) { /* * Reference the page before unlocking and * sleeping so that the page daemon is less * likely to reclaim it. */ vm_page_aflag_set(fs.m, PGA_REFERENCED); if (fs.object != fs.first_object) { if (!VM_OBJECT_TRYWLOCK( fs.first_object)) { VM_OBJECT_WUNLOCK(fs.object); VM_OBJECT_WLOCK(fs.first_object); VM_OBJECT_WLOCK(fs.object); } vm_page_lock(fs.first_m); vm_page_free(fs.first_m); vm_page_unlock(fs.first_m); vm_object_pip_wakeup(fs.first_object); VM_OBJECT_WUNLOCK(fs.first_object); fs.first_m = NULL; } unlock_map(&fs); if (fs.m == vm_page_lookup(fs.object, fs.pindex)) { vm_page_sleep_if_busy(fs.m, "vmpfw"); } vm_object_pip_wakeup(fs.object); VM_OBJECT_WUNLOCK(fs.object); PCPU_INC(cnt.v_intrans); vm_object_deallocate(fs.first_object); goto RetryFault; } vm_page_lock(fs.m); vm_page_remque(fs.m); vm_page_unlock(fs.m); /* * Mark page busy for other processes, and the * pagedaemon. If it still isn't completely valid * (readable), jump to readrest, else break-out ( we * found the page ). */ vm_page_xbusy(fs.m); if (fs.m->valid != VM_PAGE_BITS_ALL) goto readrest; break; } KASSERT(fs.m == NULL, ("fs.m should be NULL, not %p", fs.m)); /* * Page is not resident. If the pager might contain the page * or this is the beginning of the search, allocate a new * page. (Default objects are zero-fill, so there is no real * pager for them.) */ if (fs.object->type != OBJT_DEFAULT || fs.object == fs.first_object) { if (fs.pindex >= fs.object->size) { unlock_and_deallocate(&fs); return (KERN_PROTECTION_FAILURE); } /* * Allocate a new page for this object/offset pair. * * Unlocked read of the p_flag is harmless. At * worst, the P_KILLED might be not observed * there, and allocation can fail, causing * restart and new reading of the p_flag. */ if (!vm_page_count_severe() || P_KILLED(curproc)) { #if VM_NRESERVLEVEL > 0 vm_object_color(fs.object, atop(vaddr) - fs.pindex); #endif alloc_req = P_KILLED(curproc) ? VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL; if (fs.object->type != OBJT_VNODE && fs.object->backing_object == NULL) alloc_req |= VM_ALLOC_ZERO; fs.m = vm_page_alloc(fs.object, fs.pindex, alloc_req); } if (fs.m == NULL) { unlock_and_deallocate(&fs); VM_WAITPFAULT; goto RetryFault; } else if (fs.m->valid == VM_PAGE_BITS_ALL) break; } readrest: /* * We have either allocated a new page or found an existing * page that is only partially valid. * * Attempt to fault-in the page if there is a chance that the * pager has it, and potentially fault in additional pages * at the same time. */ if (fs.object->type != OBJT_DEFAULT) { int rv; u_char behavior = vm_map_entry_behavior(fs.entry); era = fs.entry->read_ahead; if (behavior == MAP_ENTRY_BEHAV_RANDOM || P_KILLED(curproc)) { behind = 0; nera = 0; ahead = 0; } else if (behavior == MAP_ENTRY_BEHAV_SEQUENTIAL) { behind = 0; nera = VM_FAULT_READ_AHEAD_MAX; ahead = nera; if (fs.pindex == fs.entry->next_read) vm_fault_dontneed(&fs, vaddr, ahead); } else if (fs.pindex == fs.entry->next_read) { /* * This is a sequential fault. Arithmetically * increase the requested number of pages in * the read-ahead window. The requested * number of pages is "# of sequential faults * x (read ahead min + 1) + read ahead min" */ behind = 0; nera = VM_FAULT_READ_AHEAD_MIN; if (era > 0) { nera += era + 1; if (nera > VM_FAULT_READ_AHEAD_MAX) nera = VM_FAULT_READ_AHEAD_MAX; } ahead = nera; if (era == VM_FAULT_READ_AHEAD_MAX) vm_fault_dontneed(&fs, vaddr, ahead); } else { /* * This is a non-sequential fault. Request a * cluster of pages that is aligned to a * VM_FAULT_READ_DEFAULT page offset boundary * within the object. Alignment to a page * offset boundary is more likely to coincide * with the underlying file system block than * alignment to a virtual address boundary. */ cluster_offset = fs.pindex % VM_FAULT_READ_DEFAULT; behind = ulmin(cluster_offset, atop(vaddr - fs.entry->start)); nera = 0; ahead = VM_FAULT_READ_DEFAULT - 1 - cluster_offset; } ahead = ulmin(ahead, atop(fs.entry->end - vaddr) - 1); if (era != nera) fs.entry->read_ahead = nera; /* * Call the pager to retrieve the data, if any, after * releasing the lock on the map. We hold a ref on * fs.object and the pages are exclusive busied. */ unlock_map(&fs); if (fs.object->type == OBJT_VNODE) { vp = fs.object->handle; if (vp == fs.vp) goto vnode_locked; else if (fs.vp != NULL) { vput(fs.vp); fs.vp = NULL; } locked = VOP_ISLOCKED(vp); if (locked != LK_EXCLUSIVE) locked = LK_SHARED; /* Do not sleep for vnode lock while fs.m is busy */ error = vget(vp, locked | LK_CANRECURSE | LK_NOWAIT, curthread); if (error != 0) { vhold(vp); release_page(&fs); unlock_and_deallocate(&fs); error = vget(vp, locked | LK_RETRY | LK_CANRECURSE, curthread); vdrop(vp); fs.vp = vp; KASSERT(error == 0, ("vm_fault: vget failed")); goto RetryFault; } fs.vp = vp; } vnode_locked: KASSERT(fs.vp == NULL || !fs.map->system_map, ("vm_fault: vnode-backed object mapped by system map")); /* * Page in the requested page and hint the pager, * that it may bring up surrounding pages. */ rv = vm_pager_get_pages(fs.object, &fs.m, 1, &behind, &ahead); if (rv == VM_PAGER_OK) { faultcount = behind + 1 + ahead; hardfault++; break; /* break to PAGE HAS BEEN FOUND */ } if (rv == VM_PAGER_ERROR) printf("vm_fault: pager read error, pid %d (%s)\n", curproc->p_pid, curproc->p_comm); /* * If an I/O error occurred or the requested page was * outside the range of the pager, clean up and return * an error. */ if (rv == VM_PAGER_ERROR || rv == VM_PAGER_BAD) { vm_page_lock(fs.m); vm_page_free(fs.m); vm_page_unlock(fs.m); fs.m = NULL; unlock_and_deallocate(&fs); return (rv == VM_PAGER_ERROR ? KERN_FAILURE : KERN_PROTECTION_FAILURE); } /* * The requested page does not exist at this object/ * offset. Remove the invalid page from the object, * waking up anyone waiting for it, and continue on to * the next object. However, if this is the top-level * object, we must leave the busy page in place to * prevent another process from rushing past us, and * inserting the page in that object at the same time * that we are. */ if (fs.object != fs.first_object) { vm_page_lock(fs.m); vm_page_free(fs.m); vm_page_unlock(fs.m); fs.m = NULL; } } /* * We get here if the object has default pager (or unwiring) * or the pager doesn't have the page. */ if (fs.object == fs.first_object) fs.first_m = fs.m; /* * Move on to the next object. Lock the next object before * unlocking the current one. */ next_object = fs.object->backing_object; if (next_object == NULL) { /* * If there's no object left, fill the page in the top * object with zeros. */ if (fs.object != fs.first_object) { vm_object_pip_wakeup(fs.object); VM_OBJECT_WUNLOCK(fs.object); fs.object = fs.first_object; fs.pindex = fs.first_pindex; fs.m = fs.first_m; VM_OBJECT_WLOCK(fs.object); } fs.first_m = NULL; /* * Zero the page if necessary and mark it valid. */ if ((fs.m->flags & PG_ZERO) == 0) { pmap_zero_page(fs.m); } else { PCPU_INC(cnt.v_ozfod); } PCPU_INC(cnt.v_zfod); fs.m->valid = VM_PAGE_BITS_ALL; /* Don't try to prefault neighboring pages. */ faultcount = 1; break; /* break to PAGE HAS BEEN FOUND */ } else { KASSERT(fs.object != next_object, ("object loop %p", next_object)); VM_OBJECT_WLOCK(next_object); vm_object_pip_add(next_object, 1); if (fs.object != fs.first_object) vm_object_pip_wakeup(fs.object); fs.pindex += OFF_TO_IDX(fs.object->backing_object_offset); VM_OBJECT_WUNLOCK(fs.object); fs.object = next_object; } } vm_page_assert_xbusied(fs.m); /* * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock * is held.] */ /* * If the page is being written, but isn't already owned by the * top-level object, we have to copy it into a new page owned by the * top-level object. */ if (fs.object != fs.first_object) { /* * We only really need to copy if we want to write it. */ if ((fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) != 0) { /* * This allows pages to be virtually copied from a * backing_object into the first_object, where the * backing object has no other refs to it, and cannot * gain any more refs. Instead of a bcopy, we just * move the page from the backing object to the * first object. Note that we must mark the page * dirty in the first object so that it will go out * to swap when needed. */ is_first_object_locked = FALSE; if ( /* * Only one shadow object */ (fs.object->shadow_count == 1) && /* * No COW refs, except us */ (fs.object->ref_count == 1) && /* * No one else can look this object up */ (fs.object->handle == NULL) && /* * No other ways to look the object up */ ((fs.object->type == OBJT_DEFAULT) || (fs.object->type == OBJT_SWAP)) && (is_first_object_locked = VM_OBJECT_TRYWLOCK(fs.first_object)) && /* * We don't chase down the shadow chain */ fs.object == fs.first_object->backing_object) { vm_page_lock(fs.m); vm_page_remove(fs.m); vm_page_unlock(fs.m); vm_page_lock(fs.first_m); vm_page_replace_checked(fs.m, fs.first_object, fs.first_pindex, fs.first_m); vm_page_free(fs.first_m); vm_page_unlock(fs.first_m); vm_page_dirty(fs.m); #if VM_NRESERVLEVEL > 0 /* * Rename the reservation. */ vm_reserv_rename(fs.m, fs.first_object, fs.object, OFF_TO_IDX( fs.first_object->backing_object_offset)); #endif /* * Removing the page from the backing object * unbusied it. */ vm_page_xbusy(fs.m); fs.first_m = fs.m; fs.m = NULL; PCPU_INC(cnt.v_cow_optim); } else { /* * Oh, well, lets copy it. */ pmap_copy_page(fs.m, fs.first_m); fs.first_m->valid = VM_PAGE_BITS_ALL; if (wired && (fault_flags & VM_FAULT_WIRE) == 0) { vm_page_lock(fs.first_m); vm_page_wire(fs.first_m); vm_page_unlock(fs.first_m); vm_page_lock(fs.m); vm_page_unwire(fs.m, PQ_INACTIVE); vm_page_unlock(fs.m); } /* * We no longer need the old page or object. */ release_page(&fs); } /* * fs.object != fs.first_object due to above * conditional */ vm_object_pip_wakeup(fs.object); VM_OBJECT_WUNLOCK(fs.object); /* * Only use the new page below... */ fs.object = fs.first_object; fs.pindex = fs.first_pindex; fs.m = fs.first_m; if (!is_first_object_locked) VM_OBJECT_WLOCK(fs.object); PCPU_INC(cnt.v_cow_faults); curthread->td_cow++; } else { prot &= ~VM_PROT_WRITE; } } /* * We must verify that the maps have not changed since our last * lookup. */ if (!fs.lookup_still_valid) { vm_object_t retry_object; vm_pindex_t retry_pindex; vm_prot_t retry_prot; if (!vm_map_trylock_read(fs.map)) { release_page(&fs); unlock_and_deallocate(&fs); goto RetryFault; } fs.lookup_still_valid = TRUE; if (fs.map->timestamp != map_generation) { result = vm_map_lookup_locked(&fs.map, vaddr, fault_type, &fs.entry, &retry_object, &retry_pindex, &retry_prot, &wired); /* * If we don't need the page any longer, put it on the inactive * list (the easiest thing to do here). If no one needs it, * pageout will grab it eventually. */ if (result != KERN_SUCCESS) { release_page(&fs); unlock_and_deallocate(&fs); /* * If retry of map lookup would have blocked then * retry fault from start. */ if (result == KERN_FAILURE) goto RetryFault; return (result); } if ((retry_object != fs.first_object) || (retry_pindex != fs.first_pindex)) { release_page(&fs); unlock_and_deallocate(&fs); goto RetryFault; } /* * Check whether the protection has changed or the object has * been copied while we left the map unlocked. Changing from * read to write permission is OK - we leave the page * write-protected, and catch the write fault. Changing from * write to read permission means that we can't mark the page * write-enabled after all. */ prot &= retry_prot; } } /* * If the page was filled by a pager, update the map entry's * last read offset. * * XXX The following assignment modifies the map * without holding a write lock on it. */ if (hardfault) fs.entry->next_read = fs.pindex + ahead + 1; vm_fault_dirty(fs.entry, fs.m, prot, fault_type, fault_flags, TRUE); vm_page_assert_xbusied(fs.m); /* * Page must be completely valid or it is not fit to * map into user space. vm_pager_get_pages() ensures this. */ KASSERT(fs.m->valid == VM_PAGE_BITS_ALL, ("vm_fault: page %p partially invalid", fs.m)); VM_OBJECT_WUNLOCK(fs.object); /* * Put this page into the physical map. We had to do the unlock above * because pmap_enter() may sleep. We don't put the page * back on the active queue until later so that the pageout daemon * won't find it (yet). */ pmap_enter(fs.map->pmap, vaddr, fs.m, prot, fault_type | (wired ? PMAP_ENTER_WIRED : 0), 0); if (faultcount != 1 && (fault_flags & VM_FAULT_WIRE) == 0 && wired == 0) vm_fault_prefault(&fs, vaddr, faultcount > 0 ? behind : PFBAK, faultcount > 0 ? ahead : PFFOR); VM_OBJECT_WLOCK(fs.object); vm_page_lock(fs.m); /* * If the page is not wired down, then put it where the pageout daemon * can find it. */ if ((fault_flags & VM_FAULT_WIRE) != 0) { KASSERT(wired, ("VM_FAULT_WIRE && !wired")); vm_page_wire(fs.m); } else vm_page_activate(fs.m); if (m_hold != NULL) { *m_hold = fs.m; vm_page_hold(fs.m); } vm_page_unlock(fs.m); vm_page_xunbusy(fs.m); /* * Unlock everything, and return */ unlock_and_deallocate(&fs); if (hardfault) { PCPU_INC(cnt.v_io_faults); curthread->td_ru.ru_majflt++; #ifdef RACCT if (racct_enable && fs.object->type == OBJT_VNODE) { PROC_LOCK(curproc); if ((fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) != 0) { racct_add_force(curproc, RACCT_WRITEBPS, PAGE_SIZE + behind * PAGE_SIZE); racct_add_force(curproc, RACCT_WRITEIOPS, 1); } else { racct_add_force(curproc, RACCT_READBPS, PAGE_SIZE + ahead * PAGE_SIZE); racct_add_force(curproc, RACCT_READIOPS, 1); } PROC_UNLOCK(curproc); } #endif } else curthread->td_ru.ru_minflt++; return (KERN_SUCCESS); } /* * Speed up the reclamation of pages that precede the faulting pindex within * the first object of the shadow chain. Essentially, perform the equivalent * to madvise(..., MADV_DONTNEED) on a large cluster of pages that precedes * the faulting pindex by the cluster size when the pages read by vm_fault() * cross a cluster-size boundary. The cluster size is the greater of the * smallest superpage size and VM_FAULT_DONTNEED_MIN. * * When "fs->first_object" is a shadow object, the pages in the backing object * that precede the faulting pindex are deactivated by vm_fault(). So, this * function must only be concerned with pages in the first object. */ static void vm_fault_dontneed(const struct faultstate *fs, vm_offset_t vaddr, int ahead) { vm_map_entry_t entry; vm_object_t first_object, object; vm_offset_t end, start; vm_page_t m, m_next; vm_pindex_t pend, pstart; vm_size_t size; object = fs->object; VM_OBJECT_ASSERT_WLOCKED(object); first_object = fs->first_object; if (first_object != object) { if (!VM_OBJECT_TRYWLOCK(first_object)) { VM_OBJECT_WUNLOCK(object); VM_OBJECT_WLOCK(first_object); VM_OBJECT_WLOCK(object); } } /* Neither fictitious nor unmanaged pages can be reclaimed. */ if ((first_object->flags & (OBJ_FICTITIOUS | OBJ_UNMANAGED)) == 0) { size = VM_FAULT_DONTNEED_MIN; if (MAXPAGESIZES > 1 && size < pagesizes[1]) size = pagesizes[1]; end = rounddown2(vaddr, size); if (vaddr - end >= size - PAGE_SIZE - ptoa(ahead) && (entry = fs->entry)->start < end) { if (end - entry->start < size) start = entry->start; else start = end - size; pmap_advise(fs->map->pmap, start, end, MADV_DONTNEED); pstart = OFF_TO_IDX(entry->offset) + atop(start - entry->start); m_next = vm_page_find_least(first_object, pstart); pend = OFF_TO_IDX(entry->offset) + atop(end - entry->start); while ((m = m_next) != NULL && m->pindex < pend) { m_next = TAILQ_NEXT(m, listq); if (m->valid != VM_PAGE_BITS_ALL || vm_page_busied(m)) continue; /* * Don't clear PGA_REFERENCED, since it would * likely represent a reference by a different * process. * * Typically, at this point, prefetched pages * are still in the inactive queue. Only * pages that triggered page faults are in the * active queue. */ vm_page_lock(m); vm_page_deactivate(m); vm_page_unlock(m); } } } if (first_object != object) VM_OBJECT_WUNLOCK(first_object); } /* * vm_fault_prefault provides a quick way of clustering * pagefaults into a processes address space. It is a "cousin" * of vm_map_pmap_enter, except it runs at page fault time instead * of mmap time. */ static void vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra, int backward, int forward) { pmap_t pmap; vm_map_entry_t entry; vm_object_t backing_object, lobject; vm_offset_t addr, starta; vm_pindex_t pindex; vm_page_t m; int i; pmap = fs->map->pmap; if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) return; entry = fs->entry; starta = addra - backward * PAGE_SIZE; if (starta < entry->start) { starta = entry->start; } else if (starta > addra) { starta = 0; } /* * Generate the sequence of virtual addresses that are candidates for * prefaulting in an outward spiral from the faulting virtual address, * "addra". Specifically, the sequence is "addra - PAGE_SIZE", "addra * + PAGE_SIZE", "addra - 2 * PAGE_SIZE", "addra + 2 * PAGE_SIZE", ... * If the candidate address doesn't have a backing physical page, then * the loop immediately terminates. */ for (i = 0; i < 2 * imax(backward, forward); i++) { addr = addra + ((i >> 1) + 1) * ((i & 1) == 0 ? -PAGE_SIZE : PAGE_SIZE); if (addr > addra + forward * PAGE_SIZE) addr = 0; if (addr < starta || addr >= entry->end) continue; if (!pmap_is_prefaultable(pmap, addr)) continue; pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT; lobject = entry->object.vm_object; VM_OBJECT_RLOCK(lobject); while ((m = vm_page_lookup(lobject, pindex)) == NULL && lobject->type == OBJT_DEFAULT && (backing_object = lobject->backing_object) != NULL) { KASSERT((lobject->backing_object_offset & PAGE_MASK) == 0, ("vm_fault_prefault: unaligned object offset")); pindex += lobject->backing_object_offset >> PAGE_SHIFT; VM_OBJECT_RLOCK(backing_object); VM_OBJECT_RUNLOCK(lobject); lobject = backing_object; } if (m == NULL) { VM_OBJECT_RUNLOCK(lobject); break; } if (m->valid == VM_PAGE_BITS_ALL && (m->flags & PG_FICTITIOUS) == 0) pmap_enter_quick(pmap, addr, m, entry->protection); VM_OBJECT_RUNLOCK(lobject); } } /* * Hold each of the physical pages that are mapped by the specified range of * virtual addresses, ["addr", "addr" + "len"), if those mappings are valid * and allow the specified types of access, "prot". If all of the implied * pages are successfully held, then the number of held pages is returned * together with pointers to those pages in the array "ma". However, if any * of the pages cannot be held, -1 is returned. */ int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len, vm_prot_t prot, vm_page_t *ma, int max_count) { vm_offset_t end, va; vm_page_t *mp; int count; boolean_t pmap_failed; if (len == 0) return (0); end = round_page(addr + len); addr = trunc_page(addr); /* * Check for illegal addresses. */ if (addr < vm_map_min(map) || addr > end || end > vm_map_max(map)) return (-1); if (atop(end - addr) > max_count) panic("vm_fault_quick_hold_pages: count > max_count"); count = atop(end - addr); /* * Most likely, the physical pages are resident in the pmap, so it is * faster to try pmap_extract_and_hold() first. */ pmap_failed = FALSE; for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) { *mp = pmap_extract_and_hold(map->pmap, va, prot); if (*mp == NULL) pmap_failed = TRUE; else if ((prot & VM_PROT_WRITE) != 0 && (*mp)->dirty != VM_PAGE_BITS_ALL) { /* * Explicitly dirty the physical page. Otherwise, the * caller's changes may go unnoticed because they are * performed through an unmanaged mapping or by a DMA * operation. * * The object lock is not held here. * See vm_page_clear_dirty_mask(). */ vm_page_dirty(*mp); } } if (pmap_failed) { /* * One or more pages could not be held by the pmap. Either no * page was mapped at the specified virtual address or that * mapping had insufficient permissions. Attempt to fault in * and hold these pages. */ for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) if (*mp == NULL && vm_fault_hold(map, va, prot, VM_FAULT_NORMAL, mp) != KERN_SUCCESS) goto error; } return (count); error: for (mp = ma; mp < ma + count; mp++) if (*mp != NULL) { vm_page_lock(*mp); vm_page_unhold(*mp); vm_page_unlock(*mp); } return (-1); } /* * Routine: * vm_fault_copy_entry * Function: * Create new shadow object backing dst_entry with private copy of * all underlying pages. When src_entry is equal to dst_entry, * function implements COW for wired-down map entry. Otherwise, * it forks wired entry into dst_map. * * In/out conditions: * The source and destination maps must be locked for write. * The source map entry must be wired down (or be a sharing map * entry corresponding to a main map entry that is wired down). */ void vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map, vm_map_entry_t dst_entry, vm_map_entry_t src_entry, vm_ooffset_t *fork_charge) { vm_object_t backing_object, dst_object, object, src_object; vm_pindex_t dst_pindex, pindex, src_pindex; vm_prot_t access, prot; vm_offset_t vaddr; vm_page_t dst_m; vm_page_t src_m; boolean_t upgrade; #ifdef lint src_map++; #endif /* lint */ upgrade = src_entry == dst_entry; access = prot = dst_entry->protection; src_object = src_entry->object.vm_object; src_pindex = OFF_TO_IDX(src_entry->offset); if (upgrade && (dst_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) { dst_object = src_object; vm_object_reference(dst_object); } else { /* * Create the top-level object for the destination entry. (Doesn't * actually shadow anything - we copy the pages directly.) */ dst_object = vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(dst_entry->end - dst_entry->start)); #if VM_NRESERVLEVEL > 0 dst_object->flags |= OBJ_COLORED; dst_object->pg_color = atop(dst_entry->start); #endif } VM_OBJECT_WLOCK(dst_object); KASSERT(upgrade || dst_entry->object.vm_object == NULL, ("vm_fault_copy_entry: vm_object not NULL")); if (src_object != dst_object) { dst_entry->object.vm_object = dst_object; dst_entry->offset = 0; dst_object->charge = dst_entry->end - dst_entry->start; } if (fork_charge != NULL) { KASSERT(dst_entry->cred == NULL, ("vm_fault_copy_entry: leaked swp charge")); dst_object->cred = curthread->td_ucred; crhold(dst_object->cred); *fork_charge += dst_object->charge; } else if (dst_object->cred == NULL) { KASSERT(dst_entry->cred != NULL, ("no cred for entry %p", dst_entry)); dst_object->cred = dst_entry->cred; dst_entry->cred = NULL; } /* * If not an upgrade, then enter the mappings in the pmap as * read and/or execute accesses. Otherwise, enter them as * write accesses. * * A writeable large page mapping is only created if all of * the constituent small page mappings are modified. Marking * PTEs as modified on inception allows promotion to happen * without taking potentially large number of soft faults. */ if (!upgrade) access &= ~VM_PROT_WRITE; /* * Loop through all of the virtual pages within the entry's * range, copying each page from the source object to the * destination object. Since the source is wired, those pages * must exist. In contrast, the destination is pageable. * Since the destination object does share any backing storage * with the source object, all of its pages must be dirtied, * regardless of whether they can be written. */ for (vaddr = dst_entry->start, dst_pindex = 0; vaddr < dst_entry->end; vaddr += PAGE_SIZE, dst_pindex++) { again: /* * Find the page in the source object, and copy it in. * Because the source is wired down, the page will be * in memory. */ if (src_object != dst_object) VM_OBJECT_RLOCK(src_object); object = src_object; pindex = src_pindex + dst_pindex; while ((src_m = vm_page_lookup(object, pindex)) == NULL && (backing_object = object->backing_object) != NULL) { /* * Unless the source mapping is read-only or * it is presently being upgraded from * read-only, the first object in the shadow * chain should provide all of the pages. In * other words, this loop body should never be * executed when the source mapping is already * read/write. */ KASSERT((src_entry->protection & VM_PROT_WRITE) == 0 || upgrade, ("vm_fault_copy_entry: main object missing page")); VM_OBJECT_RLOCK(backing_object); pindex += OFF_TO_IDX(object->backing_object_offset); if (object != dst_object) VM_OBJECT_RUNLOCK(object); object = backing_object; } KASSERT(src_m != NULL, ("vm_fault_copy_entry: page missing")); if (object != dst_object) { /* * Allocate a page in the destination object. */ dst_m = vm_page_alloc(dst_object, (src_object == dst_object ? src_pindex : 0) + dst_pindex, VM_ALLOC_NORMAL); if (dst_m == NULL) { VM_OBJECT_WUNLOCK(dst_object); VM_OBJECT_RUNLOCK(object); VM_WAIT; VM_OBJECT_WLOCK(dst_object); goto again; } pmap_copy_page(src_m, dst_m); VM_OBJECT_RUNLOCK(object); dst_m->valid = VM_PAGE_BITS_ALL; dst_m->dirty = VM_PAGE_BITS_ALL; } else { dst_m = src_m; if (vm_page_sleep_if_busy(dst_m, "fltupg")) goto again; vm_page_xbusy(dst_m); KASSERT(dst_m->valid == VM_PAGE_BITS_ALL, ("invalid dst page %p", dst_m)); } VM_OBJECT_WUNLOCK(dst_object); /* * Enter it in the pmap. If a wired, copy-on-write * mapping is being replaced by a write-enabled * mapping, then wire that new mapping. */ pmap_enter(dst_map->pmap, vaddr, dst_m, prot, access | (upgrade ? PMAP_ENTER_WIRED : 0), 0); /* * Mark it no longer busy, and put it on the active list. */ VM_OBJECT_WLOCK(dst_object); if (upgrade) { if (src_m != dst_m) { vm_page_lock(src_m); vm_page_unwire(src_m, PQ_INACTIVE); vm_page_unlock(src_m); vm_page_lock(dst_m); vm_page_wire(dst_m); vm_page_unlock(dst_m); } else { KASSERT(dst_m->wire_count > 0, ("dst_m %p is not wired", dst_m)); } } else { vm_page_lock(dst_m); vm_page_activate(dst_m); vm_page_unlock(dst_m); } vm_page_xunbusy(dst_m); } VM_OBJECT_WUNLOCK(dst_object); if (upgrade) { dst_entry->eflags &= ~(MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY); vm_object_deallocate(src_object); } } /* * Block entry into the machine-independent layer's page fault handler by * the calling thread. Subsequent calls to vm_fault() by that thread will * return KERN_PROTECTION_FAILURE. Enable machine-dependent handling of * spurious page faults. */ int vm_fault_disable_pagefaults(void) { return (curthread_pflags_set(TDP_NOFAULTING | TDP_RESETSPUR)); } void vm_fault_enable_pagefaults(int save) { curthread_pflags_restore(save); } Index: projects/vnet/tools/build/options/WITH_EXTRA_TCP_STACKS =================================================================== --- projects/vnet/tools/build/options/WITH_EXTRA_TCP_STACKS (nonexistent) +++ projects/vnet/tools/build/options/WITH_EXTRA_TCP_STACKS (revision 302277) @@ -0,0 +1,2 @@ +.\" $FreeBSD$ +Set to build extra TCP stack modules. Property changes on: projects/vnet/tools/build/options/WITH_EXTRA_TCP_STACKS ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: projects/vnet =================================================================== --- projects/vnet (revision 302276) +++ projects/vnet (revision 302277) Property changes on: projects/vnet ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r302224-302276