Index: projects/clang500-import/Makefile.inc1 =================================================================== --- projects/clang500-import/Makefile.inc1 (revision 319250) +++ projects/clang500-import/Makefile.inc1 (revision 319251) @@ -1,2724 +1,2740 @@ # # $FreeBSD$ # # Make command line options: # -DNO_CLEANDIR run ${MAKE} clean, instead of ${MAKE} cleandir # -DNO_CLEAN do not clean at all # -DDB_FROM_SRC use the user/group databases in src/etc instead of # the system database when installing. # -DNO_SHARE do not go into share subdir # -DKERNFAST define NO_KERNEL{CONFIG,CLEAN,OBJ} # -DNO_KERNELCONFIG do not run config in ${MAKE} buildkernel # -DNO_KERNELCLEAN do not run ${MAKE} clean in ${MAKE} buildkernel # -DNO_KERNELOBJ do not run ${MAKE} obj in ${MAKE} buildkernel # -DNO_PORTSUPDATE do not update ports in ${MAKE} update # -DNO_ROOT install without using root privilege # -DNO_DOCUPDATE do not update doc in ${MAKE} update # -DWITHOUT_CTF do not run the DTrace CTF conversion tools on built objects # LOCAL_DIRS="list of dirs" to add additional dirs to the SUBDIR list # LOCAL_ITOOLS="list of tools" to add additional tools to the ITOOLS list # LOCAL_LIB_DIRS="list of dirs" to add additional dirs to libraries target # LOCAL_MTREE="list of mtree files" to process to allow local directories # to be created before files are installed # LOCAL_TOOL_DIRS="list of dirs" to add additional dirs to the build-tools # list # LOCAL_XTOOL_DIRS="list of dirs" to add additional dirs to the # cross-tools target # METALOG="path to metadata log" to write permission and ownership # when NO_ROOT is set. (default: ${DESTDIR}/METALOG) # TARGET="machine" to crossbuild world for a different machine type # TARGET_ARCH= may be required when a TARGET supports multiple endians # BUILDENV_SHELL= shell to launch for the buildenv target (def:${SHELL}) # WORLD_FLAGS= additional flags to pass to make(1) during buildworld # KERNEL_FLAGS= additional flags to pass to make(1) during buildkernel # SUBDIR_OVERRIDE="list of dirs" to build rather than everything. # All libraries and includes, and some build tools will still build. # # The intended user-driven targets are: # buildworld - rebuild *everything*, including glue to help do upgrades # installworld- install everything built by "buildworld" # checkworld - run test suite on installed world # doxygen - build API documentation of the kernel # update - convenient way to update your source tree (eg: svn/svnup) # # Standard targets (not defined here) are documented in the makefiles in # /usr/share/mk. These include: # obj depend all install clean cleandepend cleanobj .if !defined(TARGET) || !defined(TARGET_ARCH) .error "Both TARGET and TARGET_ARCH must be defined." .endif SRCDIR?= ${.CURDIR} LOCALBASE?= /usr/local # Cross toolchain changes must be in effect before bsd.compiler.mk # so that gets the right CC, and pass CROSS_TOOLCHAIN to submakes. .if defined(CROSS_TOOLCHAIN) .include "${LOCALBASE}/share/toolchains/${CROSS_TOOLCHAIN}.mk" CROSSENV+=CROSS_TOOLCHAIN="${CROSS_TOOLCHAIN}" .endif .if defined(CROSS_TOOLCHAIN_PREFIX) CROSS_COMPILER_PREFIX?=${CROSS_TOOLCHAIN_PREFIX} .endif XCOMPILERS= CC CXX CPP .for COMPILER in ${XCOMPILERS} .if defined(CROSS_COMPILER_PREFIX) X${COMPILER}?= ${CROSS_COMPILER_PREFIX}${${COMPILER}} .else X${COMPILER}?= ${${COMPILER}} .endif .endfor # If a full path to an external cross compiler is given, don't build # a cross compiler. .if ${XCC:N${CCACHE_BIN}:M/*} MK_CLANG_BOOTSTRAP= no MK_GCC_BOOTSTRAP= no .endif MAKEOBJDIRPREFIX?= /usr/obj .if ${MACHINE} == ${TARGET} && ${MACHINE_ARCH} == ${TARGET_ARCH} && !defined(CROSS_BUILD_TESTING) OBJTREE= ${MAKEOBJDIRPREFIX} .else OBJTREE= ${MAKEOBJDIRPREFIX}/${TARGET}.${TARGET_ARCH} .endif # Pull in compiler metadata from buildworld/toolchain if possible to avoid # running CC from bsd.compiler.mk. .if make(installworld) || make(install) .-include "${OBJTREE}${.CURDIR}/compiler-metadata.mk" .endif # Pull in COMPILER_TYPE and COMPILER_FREEBSD_VERSION early. .include .include "share/mk/src.opts.mk" # Check if there is a local compiler that can satisfy as an external compiler. # Which compiler is expected to be used? .if ${MK_CLANG_BOOTSTRAP} == "yes" WANT_COMPILER_TYPE= clang .elif ${MK_GCC_BOOTSTRAP} == "yes" WANT_COMPILER_TYPE= gcc .else WANT_COMPILER_TYPE= .endif .if !defined(WANT_COMPILER_FREEBSD_VERSION) .if ${WANT_COMPILER_TYPE} == "clang" WANT_COMPILER_FREEBSD_VERSION_FILE= lib/clang/freebsd_cc_version.h WANT_COMPILER_FREEBSD_VERSION!= \ awk '$$2 == "FREEBSD_CC_VERSION" {printf("%d\n", $$3)}' \ ${SRCDIR}/${WANT_COMPILER_FREEBSD_VERSION_FILE} || echo unknown WANT_COMPILER_VERSION_FILE= lib/clang/include/clang/Basic/Version.inc WANT_COMPILER_VERSION!= \ awk '$$2 == "CLANG_VERSION" {split($$3, a, "."); print a[1] * 10000 + a[2] * 100 + a[3]}' \ ${SRCDIR}/${WANT_COMPILER_VERSION_FILE} || echo unknown .elif ${WANT_COMPILER_TYPE} == "gcc" WANT_COMPILER_FREEBSD_VERSION_FILE= gnu/usr.bin/cc/cc_tools/freebsd-native.h WANT_COMPILER_FREEBSD_VERSION!= \ awk '$$2 == "FBSD_CC_VER" {printf("%d\n", $$3)}' \ ${SRCDIR}/${WANT_COMPILER_FREEBSD_VERSION_FILE} || echo unknown WANT_COMPILER_VERSION_FILE= contrib/gcc/BASE-VER WANT_COMPILER_VERSION!= \ awk -F. '{print $$1 * 10000 + $$2 * 100 + $$3}' \ ${SRCDIR}/${WANT_COMPILER_VERSION_FILE} || echo unknown .endif .export WANT_COMPILER_FREEBSD_VERSION WANT_COMPILER_VERSION .endif # !defined(WANT_COMPILER_FREEBSD_VERSION) # It needs to be the same revision as we would build for the bootstrap. # If the expected vs CC is different then we can't skip. # GCC cannot be used for cross-arch yet. For clang we pass -target later if # TARGET_ARCH!=MACHINE_ARCH. .if ${MK_SYSTEM_COMPILER} == "yes" && \ (${MK_CLANG_BOOTSTRAP} == "yes" || ${MK_GCC_BOOTSTRAP} == "yes") && \ !make(showconfig) && !make(native-xtools) && !make(xdev*) && \ ${WANT_COMPILER_TYPE} == ${COMPILER_TYPE} && \ (${COMPILER_TYPE} == "clang" || ${TARGET_ARCH} == ${MACHINE_ARCH}) && \ ${COMPILER_VERSION} == ${WANT_COMPILER_VERSION} && \ ${COMPILER_FREEBSD_VERSION} == ${WANT_COMPILER_FREEBSD_VERSION} # Everything matches, disable the bootstrap compiler. MK_CLANG_BOOTSTRAP= no MK_GCC_BOOTSTRAP= no USING_SYSTEM_COMPILER= yes .endif # ${WANT_COMPILER_TYPE} == ${COMPILER_TYPE} USING_SYSTEM_COMPILER?= no TEST_SYSTEM_COMPILER_VARS= \ USING_SYSTEM_COMPILER MK_SYSTEM_COMPILER \ MK_CROSS_COMPILER MK_CLANG_BOOTSTRAP MK_GCC_BOOTSTRAP \ WANT_COMPILER_TYPE WANT_COMPILER_VERSION WANT_COMPILER_VERSION_FILE \ WANT_COMPILER_FREEBSD_VERSION WANT_COMPILER_FREEBSD_VERSION_FILE \ CC COMPILER_TYPE COMPILER_FEATURES COMPILER_VERSION \ COMPILER_FREEBSD_VERSION test-system-compiler: .PHONY .for v in ${TEST_SYSTEM_COMPILER_VARS} ${_+_}@printf "%-35s= %s\n" "${v}" "${${v}}" .endfor .if ${USING_SYSTEM_COMPILER} == "yes" && \ (make(buildworld) || make(buildkernel) || make(kernel-toolchain) || \ make(toolchain) || make(_cross-tools)) .info SYSTEM_COMPILER: Determined that CC=${CC} matches the source tree. Not bootstrapping a cross-compiler. .endif # For installworld need to ensure that the looked-up compiler metadata is # passed along rather than trying to run cc from the restricted # STRICTTMPPATH. .if ${MK_CLANG_BOOTSTRAP} == "no" && ${MK_GCC_BOOTSTRAP} == "no" .if !defined(X_COMPILER_TYPE) CROSSENV+= COMPILER_VERSION=${COMPILER_VERSION} \ COMPILER_TYPE=${COMPILER_TYPE} \ COMPILER_FEATURES=${COMPILER_FEATURES} \ COMPILER_FREEBSD_VERSION=${COMPILER_FREEBSD_VERSION} .else CROSSENV+= COMPILER_VERSION=${X_COMPILER_VERSION} \ COMPILER_FEATURES=${X_COMPILER_FEATURES} \ COMPILER_TYPE=${X_COMPILER_TYPE} \ COMPILER_FREEBSD_VERSION=${X_COMPILER_FREEBSD_VERSION} .endif .endif # Store some compiler metadata for use in installworld where we don't # want to invoke CC at all. _COMPILER_METADATA_VARS= COMPILER_VERSION \ COMPILER_TYPE \ COMPILER_FEATURES \ COMPILER_FREEBSD_VERSION compiler-metadata.mk: .PHONY .META @: > ${.TARGET} @echo ".info Using cached compiler metadata from build at $$(hostname) on $$(date)" \ > ${.TARGET} .for v in ${_COMPILER_METADATA_VARS} @echo "${v}=${${v}}" >> ${.TARGET} .endfor @echo ".export ${_COMPILER_METADATA_VARS}" >> ${.TARGET} # Handle external binutils. .if defined(CROSS_TOOLCHAIN_PREFIX) CROSS_BINUTILS_PREFIX?=${CROSS_TOOLCHAIN_PREFIX} .endif # If we do not have a bootstrap binutils (because the in-tree one does not # support the target architecture), provide a default cross-binutils prefix. # This allows riscv64 builds, for example, to automatically use the # riscv64-binutils port or package. .if !make(showconfig) .if !empty(BROKEN_OPTIONS:MBINUTILS_BOOTSTRAP) && \ ${MK_LLD_BOOTSTRAP} == "no" && \ !defined(CROSS_BINUTILS_PREFIX) CROSS_BINUTILS_PREFIX=/usr/local/${TARGET_ARCH}-freebsd/bin/ .if !exists(${CROSS_BINUTILS_PREFIX}) .error In-tree binutils does not support the ${TARGET_ARCH} architecture. Install the ${TARGET_ARCH}-binutils port or package or set CROSS_BINUTILS_PREFIX. .endif .endif .endif XBINUTILS= AS AR LD NM OBJCOPY RANLIB SIZE STRINGS .for BINUTIL in ${XBINUTILS} .if defined(CROSS_BINUTILS_PREFIX) && \ exists(${CROSS_BINUTILS_PREFIX}${${BINUTIL}}) X${BINUTIL}?= ${CROSS_BINUTILS_PREFIX}${${BINUTIL}} .else X${BINUTIL}?= ${${BINUTIL}} .endif .endfor # We must do lib/ and libexec/ before bin/ in case of a mid-install error to # keep the users system reasonably usable. For static->dynamic root upgrades, # we don't want to install a dynamic binary without rtld and the needed # libraries. More commonly, for dynamic root, we don't want to install a # binary that requires a newer library version that hasn't been installed yet. # This ordering is not a guarantee though. The only guarantee of a working # system here would require fine-grained ordering of all components based # on their dependencies. .if !empty(SUBDIR_OVERRIDE) SUBDIR= ${SUBDIR_OVERRIDE} .else SUBDIR= lib libexec .if !defined(NO_ROOT) && (make(installworld) || make(install)) # Ensure libraries are installed before progressing. SUBDIR+=.WAIT .endif SUBDIR+=bin .if ${MK_CDDL} != "no" SUBDIR+=cddl .endif SUBDIR+=gnu include .if ${MK_KERBEROS} != "no" SUBDIR+=kerberos5 .endif .if ${MK_RESCUE} != "no" SUBDIR+=rescue .endif SUBDIR+=sbin .if ${MK_CRYPT} != "no" SUBDIR+=secure .endif .if !defined(NO_SHARE) SUBDIR+=share .endif SUBDIR+=sys usr.bin usr.sbin .if ${MK_TESTS} != "no" SUBDIR+= tests .endif .if ${MK_OFED} != "no" SUBDIR+=contrib/ofed .endif # Local directories are last, since it is nice to at least get the base # system rebuilt before you do them. .for _DIR in ${LOCAL_DIRS} .if exists(${.CURDIR}/${_DIR}/Makefile) SUBDIR+= ${_DIR} .endif .endfor # Add LOCAL_LIB_DIRS, but only if they will not be picked up as a SUBDIR # of a LOCAL_DIRS directory. This allows LOCAL_DIRS=foo and # LOCAL_LIB_DIRS=foo/lib to behave as expected. .for _DIR in ${LOCAL_DIRS:M*/} ${LOCAL_DIRS:N*/:S|$|/|} _REDUNDANT_LIB_DIRS+= ${LOCAL_LIB_DIRS:M${_DIR}*} .endfor .for _DIR in ${LOCAL_LIB_DIRS} .if empty(_REDUNDANT_LIB_DIRS:M${_DIR}) && exists(${.CURDIR}/${_DIR}/Makefile) SUBDIR+= ${_DIR} .endif .endfor # We must do etc/ last as it hooks into building the man whatis file # by calling 'makedb' in share/man. This is only relevant for # install/distribute so they build the whatis file after every manpage is # installed. .if make(installworld) || make(install) SUBDIR+=.WAIT .endif SUBDIR+=etc .endif # !empty(SUBDIR_OVERRIDE) .if defined(NOCLEAN) .warning NOCLEAN option is deprecated. Use NO_CLEAN instead. NO_CLEAN= ${NOCLEAN} .endif .if defined(NO_CLEANDIR) CLEANDIR= clean cleandepend .else CLEANDIR= cleandir .endif .if defined(WORLDFAST) NO_CLEAN= t NO_OBJ= t .endif .if ${MK_META_MODE} == "yes" # If filemon is used then we can rely on the build being incremental-safe. # The .meta files will also track the build command and rebuild should # it change. .if empty(.MAKE.MODE:Mnofilemon) NO_CLEAN= t .endif .endif .if defined(NO_OBJ) || ${MK_AUTO_OBJ} == "yes" NO_OBJ= t NO_KERNELOBJ= t .endif .if !defined(NO_OBJ) _obj= obj .endif LOCAL_TOOL_DIRS?= PACKAGEDIR?= ${DESTDIR}/${DISTDIR} .if empty(SHELL:M*csh*) BUILDENV_SHELL?=${SHELL} .else BUILDENV_SHELL?=/bin/sh .endif .if !defined(SVN) || empty(SVN) . for _P in /usr/bin /usr/local/bin . for _S in svn svnlite . if exists(${_P}/${_S}) SVN= ${_P}/${_S} . endif . endfor . endfor .endif SVNFLAGS?= -r HEAD .if !defined(OSRELDATE) .if exists(/usr/include/osreldate.h) OSRELDATE!= awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \ /usr/include/osreldate.h .else OSRELDATE= 0 .endif .export OSRELDATE .endif # Set VERSION for CTFMERGE to use via the default CTFFLAGS=-L VERSION. .if !defined(_REVISION) _REVISION!= MK_AUTO_OBJ=no ${MAKE} -C ${SRCDIR}/release -V REVISION .export _REVISION .endif .if !defined(_BRANCH) _BRANCH!= MK_AUTO_OBJ=no ${MAKE} -C ${SRCDIR}/release -V BRANCH .export _BRANCH .endif .if !defined(SRCRELDATE) SRCRELDATE!= awk '/^\#define[[:space:]]*__FreeBSD_version/ { print $$3 }' \ ${SRCDIR}/sys/sys/param.h .export SRCRELDATE .endif .if !defined(VERSION) VERSION= FreeBSD ${_REVISION}-${_BRANCH:C/-p[0-9]+$//} ${TARGET_ARCH} ${SRCRELDATE} .export VERSION .endif .if !defined(PKG_VERSION) .if ${_BRANCH:MSTABLE*} || ${_BRANCH:MCURRENT*} || ${_BRANCH:MALPHA*} TIMENOW= %Y%m%d%H%M%S EXTRA_REVISION= .s${TIMENOW:gmtime} .endif .if ${_BRANCH:M*-p*} EXTRA_REVISION= _${_BRANCH:C/.*-p([0-9]+$)/\1/} .endif PKG_VERSION= ${_REVISION}${EXTRA_REVISION} .endif KNOWN_ARCHES?= aarch64/arm64 \ amd64 \ arm \ armeb/arm \ armv6/arm \ i386 \ mips \ mipsel/mips \ mips64el/mips \ mipsn32el/mips \ mips64/mips \ mipsn32/mips \ mipshf/mips \ mipselhf/mips \ mips64elhf/mips \ mips64hf/mips \ powerpc \ powerpc64/powerpc \ powerpcspe/powerpc \ riscv64/riscv \ riscv64sf/riscv \ sparc64 .if ${TARGET} == ${TARGET_ARCH} _t= ${TARGET} .else _t= ${TARGET_ARCH}/${TARGET} .endif .for _t in ${_t} .if empty(KNOWN_ARCHES:M${_t}) .error Unknown target ${TARGET_ARCH}:${TARGET}. .endif .endfor .if ${TARGET} == ${MACHINE} TARGET_CPUTYPE?=${CPUTYPE} .else TARGET_CPUTYPE?= .endif .if !empty(TARGET_CPUTYPE) _TARGET_CPUTYPE=${TARGET_CPUTYPE} .else _TARGET_CPUTYPE=dummy .endif _CPUTYPE!= MK_AUTO_OBJ=no MAKEFLAGS= CPUTYPE=${_TARGET_CPUTYPE} ${MAKE} \ -f /dev/null -m ${.CURDIR}/share/mk -V CPUTYPE .if ${_CPUTYPE} != ${_TARGET_CPUTYPE} .error CPUTYPE global should be set with ?=. .endif .if make(buildworld) BUILD_ARCH!= uname -p .if ${MACHINE_ARCH} != ${BUILD_ARCH} .error To cross-build, set TARGET_ARCH. .endif .endif WORLDTMP= ${OBJTREE}${.CURDIR}/tmp BPATH= ${WORLDTMP}/legacy/usr/sbin:${WORLDTMP}/legacy/usr/bin:${WORLDTMP}/legacy/bin XPATH= ${WORLDTMP}/usr/sbin:${WORLDTMP}/usr/bin STRICTTMPPATH= ${BPATH}:${XPATH} TMPPATH= ${STRICTTMPPATH}:${PATH} # # Avoid running mktemp(1) unless actually needed. # It may not be functional, e.g., due to new ABI # when in the middle of installing over this system. # .if make(distributeworld) || make(installworld) || make(stageworld) INSTALLTMP!= /usr/bin/mktemp -d -u -t install .endif .if make(stagekernel) || make(distributekernel) TAGS+= kernel PACKAGE= kernel .endif # # Building a world goes through the following stages # # 1. legacy stage [BMAKE] # This stage is responsible for creating compatibility # shims that are needed by the bootstrap-tools, # build-tools and cross-tools stages. These are generally # APIs that tools from one of those three stages need to # build that aren't present on the host. # 1. bootstrap-tools stage [BMAKE] # This stage is responsible for creating programs that # are needed for backward compatibility reasons. They # are not built as cross-tools. # 2. build-tools stage [TMAKE] # This stage is responsible for creating the object # tree and building any tools that are needed during # the build process. Some programs are listed during # this phase because they build binaries to generate # files needed to build these programs. This stage also # builds the 'build-tools' target rather than 'all'. # 3. cross-tools stage [XMAKE] # This stage is responsible for creating any tools that # are needed for building the system. A cross-compiler is one # of them. This differs from build tools in two ways: # 1. the 'all' target is built rather than 'build-tools' # 2. these tools are installed into TMPPATH for stage 4. # 4. world stage [WMAKE] # This stage actually builds the world. # 5. install stage (optional) [IMAKE] # This stage installs a previously built world. # BOOTSTRAPPING?= 0 # Keep these in sync -- see below for special case exception MINIMUM_SUPPORTED_OSREL?= 900044 MINIMUM_SUPPORTED_REL?= 9.1 # Common environment for world related stages CROSSENV+= MAKEOBJDIRPREFIX=${OBJTREE} \ MACHINE_ARCH=${TARGET_ARCH} \ MACHINE=${TARGET} \ CPUTYPE=${TARGET_CPUTYPE} .if ${MK_META_MODE} != "no" # Don't rebuild build-tools targets during normal build. CROSSENV+= BUILD_TOOLS_META=.NOMETA .endif .if defined(TARGET_CFLAGS) CROSSENV+= ${TARGET_CFLAGS} .endif # bootstrap-tools stage BMAKEENV= INSTALL="sh ${.CURDIR}/tools/install.sh" \ TOOLS_PREFIX=${WORLDTMP} \ PATH=${BPATH}:${PATH} \ WORLDTMP=${WORLDTMP} \ MAKEFLAGS="-m ${.CURDIR}/tools/build/mk ${.MAKEFLAGS}" # need to keep this in sync with targets/pseudo/bootstrap-tools/Makefile BSARGS= DESTDIR= \ BOOTSTRAPPING=${OSRELDATE} \ SSP_CFLAGS= \ MK_HTML=no NO_LINT=yes MK_MAN=no \ -DNO_PIC MK_PROFILE=no -DNO_SHARED \ -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \ MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \ MK_LLDB=no MK_TESTS=no \ MK_INCLUDES=yes BMAKE= MAKEOBJDIRPREFIX=${WORLDTMP} \ ${BMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \ ${BSARGS} # build-tools stage TMAKE= MAKEOBJDIRPREFIX=${OBJTREE} \ ${BMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \ TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \ DESTDIR= \ BOOTSTRAPPING=${OSRELDATE} \ SSP_CFLAGS= \ -DNO_LINT \ -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \ MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \ MK_LLDB=no MK_TESTS=no # cross-tools stage XMAKE= TOOLS_PREFIX=${WORLDTMP} ${BMAKE} \ TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \ MK_GDB=no MK_TESTS=no # kernel-tools stage KTMAKEENV= INSTALL="sh ${.CURDIR}/tools/install.sh" \ PATH=${BPATH}:${PATH} \ WORLDTMP=${WORLDTMP} KTMAKE= TOOLS_PREFIX=${WORLDTMP} MAKEOBJDIRPREFIX=${WORLDTMP} \ ${KTMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 \ DESTDIR= \ BOOTSTRAPPING=${OSRELDATE} \ SSP_CFLAGS= \ MK_HTML=no -DNO_LINT MK_MAN=no \ -DNO_PIC MK_PROFILE=no -DNO_SHARED \ -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no # world stage WMAKEENV= ${CROSSENV} \ INSTALL="sh ${.CURDIR}/tools/install.sh" \ PATH=${TMPPATH} # make hierarchy HMAKE= PATH=${TMPPATH} ${MAKE} LOCAL_MTREE=${LOCAL_MTREE:Q} .if defined(NO_ROOT) HMAKE+= PATH=${TMPPATH} METALOG=${METALOG} -DNO_ROOT .endif CROSSENV+= CC="${XCC} ${XCFLAGS}" CXX="${XCXX} ${XCXXFLAGS} ${XCFLAGS}" \ CPP="${XCPP} ${XCFLAGS}" \ AS="${XAS}" AR="${XAR}" LD="${XLD}" LLVM_LINK="${XLLVM_LINK}" \ NM=${XNM} OBJCOPY="${XOBJCOPY}" \ RANLIB=${XRANLIB} STRINGS=${XSTRINGS} \ SIZE="${XSIZE}" .if defined(CROSS_BINUTILS_PREFIX) && exists(${CROSS_BINUTILS_PREFIX}) # In the case of xdev-build tools, CROSS_BINUTILS_PREFIX won't be a # directory, but the compiler will look in the right place for its # tools so we don't need to tell it where to look. BFLAGS+= -B${CROSS_BINUTILS_PREFIX} .endif # The internal bootstrap compiler has a default sysroot set by TOOLS_PREFIX # and target set by TARGET/TARGET_ARCH. However, there are several needs to # always pass an explicit --sysroot and -target. # - External compiler needs sysroot and target flags. # - External ld needs sysroot. # - To be clear about the use of a sysroot when using the internal compiler. # - Easier debugging. # - Allowing WITH_SYSTEM_COMPILER+WITH_META_MODE to work together due to # the flip-flopping build command when sometimes using external and # sometimes using internal. # - Allow using lld which has no support for default paths. .if !defined(CROSS_BINUTILS_PREFIX) || !exists(${CROSS_BINUTILS_PREFIX}) BFLAGS+= -B${WORLDTMP}/usr/bin .endif .if ${TARGET} == "arm" .if ${TARGET_ARCH:Marmv6*} != "" && ${TARGET_CPUTYPE:M*soft*} == "" TARGET_ABI= gnueabihf .else TARGET_ABI= gnueabi .endif .endif .if ${WANT_COMPILER_TYPE} == gcc || \ (defined(X_COMPILER_TYPE) && ${X_COMPILER_TYPE} == gcc) # GCC requires -isystem and -L when using a cross-compiler. --sysroot # won't set header path and -L is used to ensure the base library path # is added before the port PREFIX library path. XCFLAGS+= -isystem ${WORLDTMP}/usr/include -L${WORLDTMP}/usr/lib # GCC requires -B to find /usr/lib/crti.o when using a cross-compiler # combined with --sysroot. XCFLAGS+= -B${WORLDTMP}/usr/lib # Force using libc++ for external GCC. # XXX: This should be checking MK_GNUCXX == no .if ${X_COMPILER_VERSION} >= 40800 XCXXFLAGS+= -isystem ${WORLDTMP}/usr/include/c++/v1 -std=c++11 \ -nostdinc++ .endif .elif ${WANT_COMPILER_TYPE} == clang || \ (defined(X_COMPILER_TYPE) && ${X_COMPILER_TYPE} == clang) TARGET_ABI?= unknown TARGET_TRIPLE?= ${TARGET_ARCH:C/amd64/x86_64/}-${TARGET_ABI}-freebsd12.0 XCFLAGS+= -target ${TARGET_TRIPLE} .endif XCFLAGS+= --sysroot=${WORLDTMP} .if !empty(BFLAGS) XCFLAGS+= ${BFLAGS} .endif .if ${MK_LIB32} != "no" && (${TARGET_ARCH} == "amd64" || \ ${TARGET_ARCH} == "powerpc64" || ${TARGET_ARCH:Mmips64*} != "") LIBCOMPAT= 32 .include "Makefile.libcompat" .elif ${MK_LIBSOFT} != "no" && ${TARGET_ARCH} == "armv6" LIBCOMPAT= SOFT .include "Makefile.libcompat" .endif WMAKE= ${WMAKEENV} ${MAKE} ${WORLD_FLAGS} -f Makefile.inc1 DESTDIR=${WORLDTMP} IMAKEENV= ${CROSSENV} IMAKE= ${IMAKEENV} ${MAKE} -f Makefile.inc1 \ ${IMAKE_INSTALL} ${IMAKE_MTREE} .if empty(.MAKEFLAGS:M-n) IMAKEENV+= PATH=${STRICTTMPPATH}:${INSTALLTMP} \ LD_LIBRARY_PATH=${INSTALLTMP} \ PATH_LOCALE=${INSTALLTMP}/locale IMAKE+= __MAKE_SHELL=${INSTALLTMP}/sh .else IMAKEENV+= PATH=${TMPPATH}:${INSTALLTMP} .endif .if defined(DB_FROM_SRC) INSTALLFLAGS+= -N ${.CURDIR}/etc MTREEFLAGS+= -N ${.CURDIR}/etc .endif _INSTALL_DDIR= ${DESTDIR}/${DISTDIR} INSTALL_DDIR= ${_INSTALL_DDIR:S://:/:g:C:/$::} .if defined(NO_ROOT) METALOG?= ${DESTDIR}/${DISTDIR}/METALOG IMAKE+= -DNO_ROOT METALOG=${METALOG} INSTALLFLAGS+= -U -M ${METALOG} -D ${INSTALL_DDIR} MTREEFLAGS+= -W .endif .if defined(BUILD_PKGS) INSTALLFLAGS+= -h sha256 .endif .if defined(DB_FROM_SRC) || defined(NO_ROOT) IMAKE_INSTALL= INSTALL="install ${INSTALLFLAGS}" IMAKE_MTREE= MTREE_CMD="mtree ${MTREEFLAGS}" .endif # kernel stage KMAKEENV= ${WMAKEENV} KMAKE= ${KMAKEENV} ${MAKE} ${.MAKEFLAGS} ${KERNEL_FLAGS} KERNEL=${INSTKERNNAME} # # buildworld # # Attempt to rebuild the entire system, with reasonable chance of # success, regardless of how old your existing system is. # _worldtmp: .PHONY .if ${.CURDIR:C/[^,]//g} != "" # The m4 build of sendmail files doesn't like it if ',' is used # anywhere in the path of it's files. @echo @echo "*** Error: path to source tree contains a comma ','" @echo false .endif @echo @echo "--------------------------------------------------------------" @echo ">>> Rebuilding the temporary build tree" @echo "--------------------------------------------------------------" .if !defined(NO_CLEAN) rm -rf ${WORLDTMP} .if defined(LIBCOMPAT) rm -rf ${LIBCOMPATTMP} .endif .else rm -rf ${WORLDTMP}/legacy/usr/include .endif # Dependencies cannot cope with certain source tree changes, particularly # with respect to removing source files and replacing generated files. # Handle these cases here in an ad-hoc fashion. # 20160829 remove stale dependencies for ptrace stub, rewritten in C # in r305012 .for f in ptrace .if exists(${OBJTREE}${.CURDIR}/lib/libc/.depend.${f}.o) @if egrep -q '/${f}.[sS]' \ ${OBJTREE}${.CURDIR}/lib/libc/.depend.${f}.o; then \ echo Removing stale dependencies for ${f} syscall wrappers; \ rm -f ${OBJTREE}${.CURDIR}/lib/libc/.depend.${f}.* \ ${OBJTREE}${.CURDIR}/world32/${.CURDIR}/lib/libc/.depend.${f}.*; \ fi .endif .endfor # 20170523 remove stale generated asm files for functions which are no longer # syscalls after r302092 (pipe) and r318736 (others) .for f in getdents lstat mknod pipe stat .if exists(${OBJTREE}${.CURDIR}/lib/libc/${f}.s) || \ exists(${OBJTREE}${.CURDIR}/lib/libc/${f}.S) @echo Removing stale generated ${f} syscall files @rm -f ${OBJTREE}${.CURDIR}/lib/libc/${f}.* \ ${OBJTREE}${.CURDIR}/lib/libc/.depend.${f}.* \ ${OBJTREE}${.CURDIR}/world32/${.CURDIR}/lib/libc/${f}.* \ ${OBJTREE}${.CURDIR}/world32/${.CURDIR}/lib/libc/.depend.${f}.* .endif .endfor .for _dir in \ lib lib/casper usr legacy/bin legacy/usr mkdir -p ${WORLDTMP}/${_dir} .endfor mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${WORLDTMP}/legacy/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${WORLDTMP}/legacy/usr/include >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${WORLDTMP}/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${WORLDTMP}/usr/include >/dev/null ln -sf ${.CURDIR}/sys ${WORLDTMP} .if ${MK_DEBUG_FILES} != "no" # We could instead disable debug files for these build stages mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${WORLDTMP}/legacy/usr/lib >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${WORLDTMP}/usr/lib >/dev/null .endif .if defined(LIBCOMPAT) mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${WORLDTMP}/usr >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${WORLDTMP}/legacy/usr/lib/debug/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${WORLDTMP}/usr/lib/debug/usr >/dev/null .endif .endif .if ${MK_TESTS} != "no" mkdir -p ${WORLDTMP}${TESTSBASE} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${WORLDTMP}${TESTSBASE} >/dev/null .if ${MK_DEBUG_FILES} != "no" mkdir -p ${WORLDTMP}/usr/lib/debug/${TESTSBASE} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${WORLDTMP}/usr/lib/debug/${TESTSBASE} >/dev/null .endif .endif .for _mtree in ${LOCAL_MTREE} mtree -deU -f ${.CURDIR}/${_mtree} -p ${WORLDTMP} > /dev/null .endfor _legacy: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 1.1: legacy release compatibility shims" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${BMAKE} legacy _bootstrap-tools: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 1.2: bootstrap tools" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${BMAKE} bootstrap-tools _cleanobj: .if !defined(NO_CLEAN) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.1: cleaning up the object tree" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${WMAKE} ${CLEANDIR} .if defined(LIBCOMPAT) ${_+_}cd ${.CURDIR}; ${LIBCOMPATWMAKE} -f Makefile.inc1 ${CLEANDIR} .endif .endif _obj: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.2: rebuilding the object tree" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${WMAKE} obj _build-tools: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.3: build tools" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${TMAKE} build-tools _cross-tools: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 3: cross tools" @echo "--------------------------------------------------------------" @rm -f ${.OBJDIR}/compiler-metadata.mk ${_+_}cd ${.CURDIR}; ${XMAKE} cross-tools ${_+_}cd ${.CURDIR}; ${XMAKE} kernel-tools _compiler-metadata: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 3.1: recording compiler metadata" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${WMAKE} compiler-metadata.mk _includes: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 4.1: building includes" @echo "--------------------------------------------------------------" # Special handling for SUBDIR_OVERRIDE in buildworld as they most likely need # headers from default SUBDIR. Do SUBDIR_OVERRIDE includes last. ${_+_}cd ${.CURDIR}; ${WMAKE} SUBDIR_OVERRIDE= SHARED=symlinks \ MK_INCLUDES=yes includes .if !empty(SUBDIR_OVERRIDE) && make(buildworld) ${_+_}cd ${.CURDIR}; ${WMAKE} MK_INCLUDES=yes SHARED=symlinks includes .endif _libraries: @echo @echo "--------------------------------------------------------------" @echo ">>> stage 4.2: building libraries" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; \ ${WMAKE} -DNO_FSCHG MK_HTML=no -DNO_LINT MK_MAN=no \ MK_PROFILE=no MK_TESTS=no MK_TESTS_SUPPORT=${MK_TESTS} libraries everything: .PHONY @echo @echo "--------------------------------------------------------------" @echo ">>> stage 4.3: building everything" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; _PARALLEL_SUBDIR_OK=1 ${WMAKE} all WMAKE_TGTS= .if !defined(WORLDFAST) WMAKE_TGTS+= _worldtmp _legacy .if empty(SUBDIR_OVERRIDE) WMAKE_TGTS+= _bootstrap-tools .endif WMAKE_TGTS+= _cleanobj .if !defined(NO_OBJ) WMAKE_TGTS+= _obj .endif WMAKE_TGTS+= _build-tools _cross-tools WMAKE_TGTS+= _compiler-metadata WMAKE_TGTS+= _includes .endif .if !defined(NO_LIBS) WMAKE_TGTS+= _libraries .endif WMAKE_TGTS+= everything .if defined(LIBCOMPAT) && empty(SUBDIR_OVERRIDE) WMAKE_TGTS+= build${libcompat} .endif buildworld: buildworld_prologue ${WMAKE_TGTS} buildworld_epilogue .PHONY .ORDER: buildworld_prologue ${WMAKE_TGTS} buildworld_epilogue buildworld_prologue: .PHONY @echo "--------------------------------------------------------------" @echo ">>> World build started on `LC_ALL=C date`" @echo "--------------------------------------------------------------" buildworld_epilogue: .PHONY @echo @echo "--------------------------------------------------------------" @echo ">>> World build completed on `LC_ALL=C date`" @echo "--------------------------------------------------------------" # # We need to have this as a target because the indirection between Makefile # and Makefile.inc1 causes the correct PATH to be used, rather than a # modification of the current environment's PATH. In addition, we need # to quote multiword values. # buildenvvars: .PHONY @echo ${WMAKEENV:Q} ${.MAKE.EXPORTED:@v@$v=\"${$v}\"@} .if ${.TARGETS:Mbuildenv} .if ${.MAKEFLAGS:M-j} .error The buildenv target is incompatible with -j .endif .endif BUILDENV_DIR?= ${.CURDIR} buildenv: .PHONY @echo Entering world for ${TARGET_ARCH}:${TARGET} .if ${BUILDENV_SHELL:M*zsh*} @echo For ZSH you must run: export CPUTYPE=${TARGET_CPUTYPE} .endif @cd ${BUILDENV_DIR} && env ${WMAKEENV} BUILDENV=1 ${BUILDENV_SHELL} \ || true TOOLCHAIN_TGTS= ${WMAKE_TGTS:Neverything:Nbuild${libcompat}} toolchain: ${TOOLCHAIN_TGTS} .PHONY kernel-toolchain: ${TOOLCHAIN_TGTS:N_includes:N_libraries} .PHONY # # installcheck # # Checks to be sure system is ready for installworld/installkernel. # installcheck: _installcheck_world _installcheck_kernel .PHONY _installcheck_world: .PHONY _installcheck_kernel: .PHONY # # Require DESTDIR to be set if installing for a different architecture or # using the user/group database in the source tree. # .if ${TARGET_ARCH} != ${MACHINE_ARCH} || ${TARGET} != ${MACHINE} || \ defined(DB_FROM_SRC) .if !make(distributeworld) _installcheck_world: __installcheck_DESTDIR _installcheck_kernel: __installcheck_DESTDIR __installcheck_DESTDIR: .PHONY .if !defined(DESTDIR) || empty(DESTDIR) @echo "ERROR: Please set DESTDIR!"; \ false .endif .endif .endif .if !defined(DB_FROM_SRC) # # Check for missing UIDs/GIDs. # CHECK_UIDS= auditdistd CHECK_GIDS= audit .if ${MK_SENDMAIL} != "no" CHECK_UIDS+= smmsp CHECK_GIDS+= smmsp .endif .if ${MK_PF} != "no" CHECK_UIDS+= proxy CHECK_GIDS+= proxy authpf .endif .if ${MK_UNBOUND} != "no" CHECK_UIDS+= unbound CHECK_GIDS+= unbound .endif _installcheck_world: __installcheck_UGID __installcheck_UGID: .PHONY .for uid in ${CHECK_UIDS} @if ! `id -u ${uid} >/dev/null 2>&1`; then \ echo "ERROR: Required ${uid} user is missing, see /usr/src/UPDATING."; \ false; \ fi .endfor .for gid in ${CHECK_GIDS} @if ! `find / -prune -group ${gid} >/dev/null 2>&1`; then \ echo "ERROR: Required ${gid} group is missing, see /usr/src/UPDATING."; \ false; \ fi .endfor .endif +# +# If installing over the running system (DESTDIR is / or unset) and the install +# includes rescue, try running rescue from the objdir as a sanity check. If +# rescue is not functional (e.g., because it depends on a system call not +# supported by the currently running kernel), abort the installation. +# +.if !make(distributeworld) && ${MK_RESCUE} != "no" && \ + (empty(DESTDIR) || ${DESTDIR} == "/") && empty(BYPASS_INSTALLCHECK_SH) +_installcheck_world: __installcheck_sh_check +__installcheck_sh_check: .PHONY + @if [ "`${OBJTREE}${.CURDIR}/rescue/rescue/rescue sh -c 'echo OK'`" != \ + OK ]; then \ + echo "rescue/sh check failed, installation aborted" >&2; \ + false; \ + fi +.endif # # Required install tools to be saved in a scratch dir for safety. # .if ${MK_ZONEINFO} != "no" _zoneinfo= zic tzsetup .endif ITOOLS= [ awk cap_mkdb cat chflags chmod chown cmp cp \ date echo egrep find grep id install ${_install-info} \ ln make mkdir mtree mv pwd_mkdb \ rm sed services_mkdb sh strip sysctl test true uname wc ${_zoneinfo} \ ${LOCAL_ITOOLS} # Needed for share/man .if ${MK_MAN_UTILS} != "no" ITOOLS+=makewhatis .endif # # distributeworld # # Distributes everything compiled by a `buildworld'. # # installworld # # Installs everything compiled by a 'buildworld'. # # Non-base distributions produced by the base system EXTRA_DISTRIBUTIONS= doc .if defined(LIBCOMPAT) EXTRA_DISTRIBUTIONS+= lib${libcompat} .endif .if ${MK_TESTS} != "no" EXTRA_DISTRIBUTIONS+= tests .endif DEBUG_DISTRIBUTIONS= .if ${MK_DEBUG_FILES} != "no" DEBUG_DISTRIBUTIONS+= base ${EXTRA_DISTRIBUTIONS:S,doc,,:S,tests,,} .endif MTREE_MAGIC?= mtree 2.0 distributeworld installworld stageworld: _installcheck_world .PHONY mkdir -p ${INSTALLTMP} progs=$$(for prog in ${ITOOLS}; do \ if progpath=`which $$prog`; then \ echo $$progpath; \ else \ echo "Required tool $$prog not found in PATH." >&2; \ exit 1; \ fi; \ done); \ libs=$$(ldd -f "%o %p\n" -f "%o %p\n" $$progs 2>/dev/null | sort -u | \ while read line; do \ set -- $$line; \ if [ "$$2 $$3" != "not found" ]; then \ echo $$2; \ else \ echo "Required library $$1 not found." >&2; \ exit 1; \ fi; \ done); \ cp $$libs $$progs ${INSTALLTMP} cp -R $${PATH_LOCALE:-"/usr/share/locale"} ${INSTALLTMP}/locale .if defined(NO_ROOT) -mkdir -p ${METALOG:H} echo "#${MTREE_MAGIC}" > ${METALOG} .endif .if make(distributeworld) .for dist in ${EXTRA_DISTRIBUTIONS} -mkdir ${DESTDIR}/${DISTDIR}/${dist} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.root.dist \ -p ${DESTDIR}/${DISTDIR}/${dist} >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr/include >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib >/dev/null .endif .if defined(LIBCOMPAT) mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib/debug/usr >/dev/null .endif .endif .if ${MK_TESTS} != "no" && ${dist} == "tests" -mkdir -p ${DESTDIR}/${DISTDIR}/${dist}${TESTSBASE} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}${TESTSBASE} >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${DESTDIR}/${DISTDIR}/${dist}/usr/lib/debug/${TESTSBASE} >/dev/null .endif .endif .if defined(NO_ROOT) ${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.root.dist | \ sed -e 's#^\./#./${dist}/#' >> ${METALOG} ${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.usr.dist | \ sed -e 's#^\./#./${dist}/usr/#' >> ${METALOG} ${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.include.dist | \ sed -e 's#^\./#./${dist}/usr/include/#' >> ${METALOG} .if defined(LIBCOMPAT) ${IMAKEENV} mtree -C -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist | \ sed -e 's#^\./#./${dist}/usr/#' >> ${METALOG} .endif .endif .endfor -mkdir ${DESTDIR}/${DISTDIR}/base ${_+_}cd ${.CURDIR}/etc; ${CROSSENV} PATH=${TMPPATH} ${MAKE} \ METALOG=${METALOG} ${IMAKE_INSTALL} ${IMAKE_MTREE} \ DISTBASE=/base DESTDIR=${DESTDIR}/${DISTDIR}/base \ LOCAL_MTREE=${LOCAL_MTREE:Q} distrib-dirs .endif ${_+_}cd ${.CURDIR}; ${IMAKE} re${.TARGET:S/world$//}; \ ${IMAKEENV} rm -rf ${INSTALLTMP} .if make(distributeworld) .for dist in ${EXTRA_DISTRIBUTIONS} find ${DESTDIR}/${DISTDIR}/${dist} -mindepth 1 -type d -empty -delete .endfor .if defined(NO_ROOT) .for dist in base ${EXTRA_DISTRIBUTIONS} @# For each file that exists in this dist, print the corresponding @# line from the METALOG. This relies on the fact that @# a line containing only the filename will sort immediately before @# the relevant mtree line. cd ${DESTDIR}/${DISTDIR}; \ find ./${dist} | sort -u ${METALOG} - | \ awk 'BEGIN { print "#${MTREE_MAGIC}" } !/ type=/ { file = $$1 } / type=/ { if ($$1 == file) { sub(/^\.\/${dist}\//, "./"); print } }' > \ ${DESTDIR}/${DISTDIR}/${dist}.meta .endfor .for dist in ${DEBUG_DISTRIBUTIONS} @# For each file that exists in this dist, print the corresponding @# line from the METALOG. This relies on the fact that @# a line containing only the filename will sort immediately before @# the relevant mtree line. cd ${DESTDIR}/${DISTDIR}; \ find ./${dist}/usr/lib/debug | sort -u ${METALOG} - | \ awk 'BEGIN { print "#${MTREE_MAGIC}" } !/ type=/ { file = $$1 } / type=/ { if ($$1 == file) { sub(/^\.\/${dist}\//, "./"); print } }' > \ ${DESTDIR}/${DISTDIR}/${dist}.debug.meta .endfor .endif .endif packageworld: .PHONY .for dist in base ${EXTRA_DISTRIBUTIONS} .if defined(NO_ROOT) ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ tar cvf - --exclude usr/lib/debug \ @${DESTDIR}/${DISTDIR}/${dist}.meta | \ ${XZ_CMD} > ${PACKAGEDIR}/${dist}.txz .else ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ tar cvf - --exclude usr/lib/debug . | \ ${XZ_CMD} > ${PACKAGEDIR}/${dist}.txz .endif .endfor .for dist in ${DEBUG_DISTRIBUTIONS} . if defined(NO_ROOT) ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ tar cvf - @${DESTDIR}/${DISTDIR}/${dist}.debug.meta | \ ${XZ_CMD} > ${PACKAGEDIR}/${dist}-dbg.txz . else ${_+_}cd ${DESTDIR}/${DISTDIR}/${dist}; \ tar cvLf - usr/lib/debug | \ ${XZ_CMD} > ${PACKAGEDIR}/${dist}-dbg.txz . endif .endfor # # reinstall # # If you have a build server, you can NFS mount the source and obj directories # and do a 'make reinstall' on the *client* to install new binaries from the # most recent server build. # restage reinstall: .MAKE .PHONY @echo "--------------------------------------------------------------" @echo ">>> Making hierarchy" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 \ LOCAL_MTREE=${LOCAL_MTREE:Q} hierarchy .if make(restage) @echo "--------------------------------------------------------------" @echo ">>> Making distribution" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 \ LOCAL_MTREE=${LOCAL_MTREE:Q} distribution .endif @echo @echo "--------------------------------------------------------------" @echo ">>> Installing everything" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 install .if defined(LIBCOMPAT) ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 install${libcompat} .endif redistribute: .MAKE .PHONY @echo "--------------------------------------------------------------" @echo ">>> Distributing everything" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 distribute .if defined(LIBCOMPAT) ${_+_}cd ${.CURDIR}; ${MAKE} -f Makefile.inc1 distribute${libcompat} \ DISTRIBUTION=lib${libcompat} .endif distrib-dirs distribution: .MAKE .PHONY ${_+_}cd ${.CURDIR}/etc; ${CROSSENV} PATH=${TMPPATH} ${MAKE} \ ${IMAKE_INSTALL} ${IMAKE_MTREE} METALOG=${METALOG} ${.TARGET} .if make(distribution) ${_+_}cd ${.CURDIR}; ${CROSSENV} PATH=${TMPPATH} \ ${MAKE} -f Makefile.inc1 ${IMAKE_INSTALL} \ METALOG=${METALOG} MK_TESTS=no installconfig .endif # # buildkernel and installkernel # # Which kernels to build and/or install is specified by setting # KERNCONF. If not defined a GENERIC kernel is built/installed. # Only the existing (depending TARGET) config files are used # for building kernels and only the first of these is designated # as the one being installed. # # Note that we have to use TARGET instead of TARGET_ARCH when # we're in kernel-land. Since only TARGET_ARCH is (expected) to # be set to cross-build, we have to make sure TARGET is set # properly. .if defined(KERNFAST) NO_KERNELCLEAN= t NO_KERNELCONFIG= t NO_KERNELOBJ= t # Shortcut for KERNCONF=Blah -DKERNFAST is now KERNFAST=Blah .if !defined(KERNCONF) && ${KERNFAST} != "1" KERNCONF=${KERNFAST} .endif .endif .if ${TARGET_ARCH} == "powerpc64" KERNCONF?= GENERIC64 .else KERNCONF?= GENERIC .endif INSTKERNNAME?= kernel KERNSRCDIR?= ${.CURDIR}/sys KRNLCONFDIR= ${KERNSRCDIR}/${TARGET}/conf KRNLOBJDIR= ${OBJTREE}${KERNSRCDIR} KERNCONFDIR?= ${KRNLCONFDIR} BUILDKERNELS= INSTALLKERNEL= .if defined(NO_INSTALLKERNEL) # All of the BUILDKERNELS loops start at index 1. BUILDKERNELS+= dummy .endif .for _kernel in ${KERNCONF} .if exists(${KERNCONFDIR}/${_kernel}) BUILDKERNELS+= ${_kernel} .if empty(INSTALLKERNEL) && !defined(NO_INSTALLKERNEL) INSTALLKERNEL= ${_kernel} .endif .endif .endfor ${WMAKE_TGTS:N_worldtmp:Nbuild${libcompat}} ${.ALLTARGETS:M_*:N_worldtmp}: .MAKE .PHONY # # buildkernel # # Builds all kernels defined by BUILDKERNELS. # buildkernel: .MAKE .PHONY .if empty(BUILDKERNELS:Ndummy) @echo "ERROR: Missing kernel configuration file(s) (${KERNCONF})."; \ false .endif @echo .for _kernel in ${BUILDKERNELS:Ndummy} @echo "--------------------------------------------------------------" @echo ">>> Kernel build for ${_kernel} started on `LC_ALL=C date`" @echo "--------------------------------------------------------------" @echo "===> ${_kernel}" mkdir -p ${KRNLOBJDIR} .if !defined(NO_KERNELCONFIG) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 1: configuring the kernel" @echo "--------------------------------------------------------------" cd ${KRNLCONFDIR}; \ PATH=${TMPPATH} \ config ${CONFIGARGS} -d ${KRNLOBJDIR}/${_kernel} \ -I '${KERNCONFDIR}' '${KERNCONFDIR}/${_kernel}' .endif .if !defined(NO_CLEAN) && !defined(NO_KERNELCLEAN) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.1: cleaning up the object tree" @echo "--------------------------------------------------------------" ${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} ${CLEANDIR} .endif .if !defined(NO_KERNELOBJ) @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.2: rebuilding the object tree" @echo "--------------------------------------------------------------" ${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} obj .endif @echo @echo "--------------------------------------------------------------" @echo ">>> stage 2.3: build tools" @echo "--------------------------------------------------------------" ${_+_}cd ${.CURDIR}; ${KTMAKE} kernel-tools @echo @echo "--------------------------------------------------------------" @echo ">>> stage 3.1: building everything" @echo "--------------------------------------------------------------" ${_+_}cd ${KRNLOBJDIR}/${_kernel}; ${KMAKE} all -DNO_MODULES_OBJ @echo "--------------------------------------------------------------" @echo ">>> Kernel build for ${_kernel} completed on `LC_ALL=C date`" @echo "--------------------------------------------------------------" .endfor NO_INSTALLEXTRAKERNELS?= yes # # installkernel, etc. # # Install the kernel defined by INSTALLKERNEL # installkernel installkernel.debug \ reinstallkernel reinstallkernel.debug: _installcheck_kernel .PHONY .if !defined(NO_INSTALLKERNEL) .if empty(INSTALLKERNEL) @echo "ERROR: No kernel \"${KERNCONF}\" to install."; \ false .endif @echo "--------------------------------------------------------------" @echo ">>> Installing kernel ${INSTALLKERNEL}" @echo "--------------------------------------------------------------" cd ${KRNLOBJDIR}/${INSTALLKERNEL}; \ ${CROSSENV} PATH=${TMPPATH} \ ${MAKE} ${IMAKE_INSTALL} KERNEL=${INSTKERNNAME} ${.TARGET:S/kernel//} .endif .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes" .for _kernel in ${BUILDKERNELS:[2..-1]} @echo "--------------------------------------------------------------" @echo ">>> Installing kernel ${_kernel}" @echo "--------------------------------------------------------------" cd ${KRNLOBJDIR}/${_kernel}; \ ${CROSSENV} PATH=${TMPPATH} \ ${MAKE} ${IMAKE_INSTALL} KERNEL=${INSTKERNNAME}.${_kernel} ${.TARGET:S/kernel//} .endfor .endif distributekernel distributekernel.debug: .PHONY .if !defined(NO_INSTALLKERNEL) .if empty(INSTALLKERNEL) @echo "ERROR: No kernel \"${KERNCONF}\" to install."; \ false .endif mkdir -p ${DESTDIR}/${DISTDIR} .if defined(NO_ROOT) @echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.premeta .endif cd ${KRNLOBJDIR}/${INSTALLKERNEL}; \ ${IMAKEENV} ${IMAKE_INSTALL:S/METALOG/kernel.premeta/} \ ${IMAKE_MTREE} PATH=${TMPPATH} ${MAKE} KERNEL=${INSTKERNNAME} \ DESTDIR=${INSTALL_DDIR}/kernel \ ${.TARGET:S/distributekernel/install/} .if defined(NO_ROOT) @sed -e 's|^./kernel|.|' ${DESTDIR}/${DISTDIR}/kernel.premeta > \ ${DESTDIR}/${DISTDIR}/kernel.meta .endif .endif .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes" .for _kernel in ${BUILDKERNELS:[2..-1]} .if defined(NO_ROOT) @echo "#${MTREE_MAGIC}" > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta .endif cd ${KRNLOBJDIR}/${_kernel}; \ ${IMAKEENV} ${IMAKE_INSTALL:S/METALOG/kernel.${_kernel}.premeta/} \ ${IMAKE_MTREE} PATH=${TMPPATH} ${MAKE} \ KERNEL=${INSTKERNNAME}.${_kernel} \ DESTDIR=${INSTALL_DDIR}/kernel.${_kernel} \ ${.TARGET:S/distributekernel/install/} .if defined(NO_ROOT) @sed -e "s|^./kernel.${_kernel}|.|" \ ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.premeta > \ ${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta .endif .endfor .endif packagekernel: .PHONY .if defined(NO_ROOT) .if !defined(NO_INSTALLKERNEL) cd ${DESTDIR}/${DISTDIR}/kernel; \ tar cvf - --exclude '*.debug' \ @${DESTDIR}/${DISTDIR}/kernel.meta | \ ${XZ_CMD} > ${PACKAGEDIR}/kernel.txz .endif cd ${DESTDIR}/${DISTDIR}/kernel; \ tar cvf - --include '*/*/*.debug' \ @${DESTDIR}/${DISTDIR}/kernel.meta | \ ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel-dbg.txz .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes" .for _kernel in ${BUILDKERNELS:[2..-1]} cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ tar cvf - --exclude '*.debug' \ @${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta | \ ${XZ_CMD} > ${PACKAGEDIR}/kernel.${_kernel}.txz cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ tar cvf - --include '*/*/*.debug' \ @${DESTDIR}/${DISTDIR}/kernel.${_kernel}.meta | \ ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}-dbg.txz .endfor .endif .else .if !defined(NO_INSTALLKERNEL) cd ${DESTDIR}/${DISTDIR}/kernel; \ tar cvf - --exclude '*.debug' . | \ ${XZ_CMD} > ${PACKAGEDIR}/kernel.txz .endif cd ${DESTDIR}/${DISTDIR}/kernel; \ tar cvf - --include '*/*/*.debug' $$(eval find .) | \ ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel-dbg.txz .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes" .for _kernel in ${BUILDKERNELS:[2..-1]} cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ tar cvf - --exclude '*.debug' . | \ ${XZ_CMD} > ${PACKAGEDIR}/kernel.${_kernel}.txz cd ${DESTDIR}/${DISTDIR}/kernel.${_kernel}; \ tar cvf - --include '*/*/*.debug' $$(eval find .) | \ ${XZ_CMD} > ${DESTDIR}/${DISTDIR}/kernel.${_kernel}-dbg.txz .endfor .endif .endif stagekernel: .PHONY ${_+_}${MAKE} -C ${.CURDIR} ${.MAKEFLAGS} distributekernel PORTSDIR?= /usr/ports WSTAGEDIR?= ${MAKEOBJDIRPREFIX}${.CURDIR}/${TARGET}.${TARGET_ARCH}/worldstage KSTAGEDIR?= ${MAKEOBJDIRPREFIX}${.CURDIR}/${TARGET}.${TARGET_ARCH}/kernelstage REPODIR?= ${MAKEOBJDIRPREFIX}${.CURDIR}/repo PKGSIGNKEY?= # empty .ORDER: stage-packages create-packages .ORDER: create-packages create-world-packages .ORDER: create-packages create-kernel-packages .ORDER: create-packages sign-packages _pkgbootstrap: .PHONY .if !exists(${LOCALBASE}/sbin/pkg) @env ASSUME_ALWAYS_YES=YES pkg bootstrap .endif packages: .PHONY ${_+_}${MAKE} -C ${.CURDIR} PKG_VERSION=${PKG_VERSION} real-packages package-pkg: .PHONY rm -rf /tmp/ports.${TARGET} || : env ${WMAKEENV:Q} SRCDIR=${.CURDIR} PORTSDIR=${PORTSDIR} REVISION=${_REVISION} \ PKG_CMD=${PKG_CMD} PKG_VERSION=${PKG_VERSION} REPODIR=${REPODIR} \ WSTAGEDIR=${WSTAGEDIR} \ sh ${.CURDIR}/release/scripts/make-pkg-package.sh real-packages: stage-packages create-packages sign-packages .PHONY stage-packages: .PHONY @mkdir -p ${REPODIR} ${WSTAGEDIR} ${KSTAGEDIR} ${_+_}@cd ${.CURDIR}; \ ${MAKE} DESTDIR=${WSTAGEDIR} -DNO_ROOT -B stageworld ; \ ${MAKE} DESTDIR=${KSTAGEDIR} -DNO_ROOT -B stagekernel create-packages: _pkgbootstrap .PHONY @mkdir -p ${REPODIR} ${_+_}@cd ${.CURDIR}; \ ${MAKE} DESTDIR=${WSTAGEDIR} \ PKG_VERSION=${PKG_VERSION} create-world-packages ; \ ${MAKE} DESTDIR=${KSTAGEDIR} \ PKG_VERSION=${PKG_VERSION} DISTDIR=kernel \ create-kernel-packages create-world-packages: _pkgbootstrap .PHONY @rm -f ${WSTAGEDIR}/*.plist 2>/dev/null || : @cd ${WSTAGEDIR} ; \ awk -f ${SRCDIR}/release/scripts/mtree-to-plist.awk \ ${WSTAGEDIR}/METALOG @for plist in ${WSTAGEDIR}/*.plist; do \ plist=$${plist##*/} ; \ pkgname=$${plist%.plist} ; \ sh ${SRCDIR}/release/packages/generate-ucl.sh -o $${pkgname} \ -s ${SRCDIR} -u ${WSTAGEDIR}/$${pkgname}.ucl ; \ done @for plist in ${WSTAGEDIR}/*.plist; do \ plist=$${plist##*/} ; \ pkgname=$${plist%.plist} ; \ awk -F\" ' \ /^name/ { printf("===> Creating %s-", $$2); next } \ /^version/ { print $$2; next } \ ' ${WSTAGEDIR}/$${pkgname}.ucl ; \ ${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/bin/sh -o ALLOW_BASE_SHLIBS=yes \ create -M ${WSTAGEDIR}/$${pkgname}.ucl \ -p ${WSTAGEDIR}/$${pkgname}.plist \ -r ${WSTAGEDIR} \ -o ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION} ; \ done create-kernel-packages: _pkgbootstrap .PHONY .if exists(${KSTAGEDIR}/kernel.meta) .for flavor in "" -debug @cd ${KSTAGEDIR}/${DISTDIR} ; \ awk -f ${SRCDIR}/release/scripts/mtree-to-plist.awk \ -v kernel=yes -v _kernconf=${INSTALLKERNEL} \ ${KSTAGEDIR}/kernel.meta ; \ cap_arg=`cd ${SRCDIR}/etc ; ${MAKE} -VCAP_MKDB_ENDIAN` ; \ pwd_arg=`cd ${SRCDIR}/etc ; ${MAKE} -VPWD_MKDB_ENDIAN` ; \ sed -e "s/%VERSION%/${PKG_VERSION}/" \ -e "s/%PKGNAME%/kernel-${INSTALLKERNEL:tl}${flavor}/" \ -e "s/%COMMENT%/FreeBSD ${INSTALLKERNEL} kernel ${flavor}/" \ -e "s/%DESC%/FreeBSD ${INSTALLKERNEL} kernel ${flavor}/" \ -e "s/%CAP_MKDB_ENDIAN%/$${cap_arg}/g" \ -e "s/%PWD_MKDB_ENDIAN%/$${pwd_arg}/g" \ ${SRCDIR}/release/packages/kernel.ucl \ > ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.ucl ; \ awk -F\" ' \ /name/ { printf("===> Creating %s-", $$2); next } \ /version/ {print $$2; next } ' \ ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.ucl ; \ ${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/bin/sh -o ALLOW_BASE_SHLIBS=yes \ create -M ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.ucl \ -p ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.plist \ -r ${KSTAGEDIR}/${DISTDIR} \ -o ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION} .endfor .endif .if ${BUILDKERNELS:[#]} > 1 && ${NO_INSTALLEXTRAKERNELS} != "yes" .for _kernel in ${BUILDKERNELS:[2..-1]} .if exists(${KSTAGEDIR}/kernel.${_kernel}.meta) .for flavor in "" -debug @cd ${KSTAGEDIR}/kernel.${_kernel} ; \ awk -f ${SRCDIR}/release/scripts/mtree-to-plist.awk \ -v kernel=yes -v _kernconf=${_kernel} \ ${KSTAGEDIR}/kernel.${_kernel}.meta ; \ cap_arg=`cd ${SRCDIR}/etc ; ${MAKE} -VCAP_MKDB_ENDIAN` ; \ pwd_arg=`cd ${SRCDIR}/etc ; ${MAKE} -VPWD_MKDB_ENDIAN` ; \ sed -e "s/%VERSION%/${PKG_VERSION}/" \ -e "s/%PKGNAME%/kernel-${_kernel:tl}${flavor}/" \ -e "s/%COMMENT%/FreeBSD ${_kernel} kernel ${flavor}/" \ -e "s/%DESC%/FreeBSD ${_kernel} kernel ${flavor}/" \ -e "s/%CAP_MKDB_ENDIAN%/$${cap_arg}/g" \ -e "s/%PWD_MKDB_ENDIAN%/$${pwd_arg}/g" \ ${SRCDIR}/release/packages/kernel.ucl \ > ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.ucl ; \ awk -F\" ' \ /name/ { printf("===> Creating %s-", $$2); next } \ /version/ {print $$2; next } ' \ ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.ucl ; \ ${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/bin/sh -o ALLOW_BASE_SHLIBS=yes \ create -M ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.ucl \ -p ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.plist \ -r ${KSTAGEDIR}/kernel.${_kernel} \ -o ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION} .endfor .endif .endfor .endif sign-packages: _pkgbootstrap .PHONY @[ -L "${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/latest" ] && \ unlink ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/latest ; \ ${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/bin/sh repo \ -o ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION} \ ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI)/${PKG_VERSION} \ ${PKGSIGNKEY} ; \ cd ${REPODIR}/$$(${PKG_CMD} -o ABI_FILE=${WSTAGEDIR}/bin/sh config ABI); \ ln -s ${PKG_VERSION} latest # # # checkworld # # Run test suite on installed world. # checkworld: .PHONY @if [ ! -x "${LOCALBASE}/bin/kyua" ]; then \ echo "You need kyua (devel/kyua) to run the test suite." | /usr/bin/fmt; \ exit 1; \ fi ${_+_}PATH="$$PATH:${LOCALBASE}/bin" kyua test -k ${TESTSBASE}/Kyuafile # # # doxygen # # Build the API documentation with doxygen # doxygen: .PHONY @if [ ! -x "${LOCALBASE}/bin/doxygen" ]; then \ echo "You need doxygen (devel/doxygen) to generate the API documentation of the kernel." | /usr/bin/fmt; \ exit 1; \ fi ${_+_}cd ${.CURDIR}/tools/kerneldoc/subsys; ${MAKE} obj all # # update # # Update the source tree(s), by running svn/svnup to update to the # latest copy. # update: .PHONY .if defined(SVN_UPDATE) @echo "--------------------------------------------------------------" @echo ">>> Updating ${.CURDIR} using Subversion" @echo "--------------------------------------------------------------" @(cd ${.CURDIR}; ${SVN} update ${SVNFLAGS}) .endif # # ------------------------------------------------------------------------ # # From here onwards are utility targets used by the 'make world' and # related targets. If your 'world' breaks, you may like to try to fix # the problem and manually run the following targets to attempt to # complete the build. Beware, this is *not* guaranteed to work, you # need to have a pretty good grip on the current state of the system # to attempt to manually finish it. If in doubt, 'make world' again. # # # legacy: Build compatibility shims for the next three targets. This is a # minimal set of tools and shims necessary to compensate for older systems # which don't have the APIs required by the targets built in bootstrap-tools, # build-tools or cross-tools. # # ELF Tool Chain libraries are needed for ELF tools and dtrace tools. # r296685 fix cross-endian objcopy .if ${BOOTSTRAPPING} < 1100102 _elftoolchain_libs= lib/libelf lib/libdwarf .endif legacy: .PHONY # Temporary special case for automatically detecting the clang compiler issue # Note: 9.x didn't have FreeBSD_version bumps often enough, so you may need to # set BOOTSTRAPPING to 0 if you're stable/9 tree post-dates r286035 but is before # the version bump in r296219 (from July 29, 2015 -> Feb 29, 2016). .if ${BOOTSTRAPPING} != 0 && \ ${WANT_COMPILER_TYPE} == "clang" && ${COMPILER_TYPE} == "clang" && ${COMPILER_VERSION} < 30601 .if ${BOOTSTRAPPING} > 10000000 && ${BOOTSTRAPPING} < 1002501 @echo "ERROR: Source upgrades from stable/10 prior to r286033 are not supported."; false .elif ${BOOTSTRAPPING} > 9000000 && ${BOOTSTRAPPING} < 903509 @echo "ERROR: Source upgrades from stable/9 prior to r286035 are not supported."; false .endif .endif .if ${BOOTSTRAPPING} < ${MINIMUM_SUPPORTED_OSREL} && ${BOOTSTRAPPING} != 0 @echo "ERROR: Source upgrades from versions prior to ${MINIMUM_SUPPORTED_REL} are not supported."; \ false .endif .for _tool in tools/build ${_elftoolchain_libs} ${_+_}@${ECHODIR} "===> ${_tool} (obj,includes,all,install)"; \ cd ${.CURDIR}/${_tool}; \ if [ -z "${NO_OBJ}" ]; then ${MAKE} DIRPRFX=${_tool}/ obj; fi; \ ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX}/legacy includes; \ ${MAKE} DIRPRFX=${_tool}/ MK_INCLUDES=no all; \ ${MAKE} DIRPRFX=${_tool}/ MK_INCLUDES=no \ DESTDIR=${MAKEOBJDIRPREFIX}/legacy install .endfor # # bootstrap-tools: Build tools needed for compatibility. These are binaries that # are built to build other binaries in the system. However, the focus of these # binaries is usually quite narrow. Bootstrap tools use the host's compiler and # libraries, augmented by -legacy. # _bt= _bootstrap-tools .if ${MK_GAMES} != "no" _strfile= usr.bin/fortune/strfile .endif .if ${MK_GCC} != "no" && ${MK_CXX} != "no" _gperf= gnu/usr.bin/gperf .endif .if ${MK_VT} != "no" _vtfontcvt= usr.bin/vtfontcvt .endif .if ${BOOTSTRAPPING} < 1000033 _libopenbsd= lib/libopenbsd _m4= usr.bin/m4 _lex= usr.bin/lex ${_bt}-usr.bin/m4: ${_bt}-lib/libopenbsd ${_bt}-usr.bin/lex: ${_bt}-usr.bin/m4 .endif # r245440 mtree -N support added # r313404 requires sha384.h for libnetbsd, added to libmd in r292782 .if ${BOOTSTRAPPING} < 1100093 _nmtree= lib/libmd \ lib/libnetbsd \ usr.sbin/nmtree ${_bt}-lib/libnetbsd: ${_bt}-lib/libmd ${_bt}-usr.sbin/nmtree: ${_bt}-lib/libnetbsd .endif # r246097: log addition login.conf.db, passwd, pwd.db, and spwd.db with cat -l .if ${BOOTSTRAPPING} < 1000027 _cat= bin/cat .endif # r277259 crunchide: Correct 64-bit section header offset # r281674 crunchide: always include both 32- and 64-bit ELF support .if ${BOOTSTRAPPING} < 1100078 _crunchide= usr.sbin/crunch/crunchide .endif # r285986 crunchen: use STRIPBIN rather than STRIP # 1100113: Support MK_AUTO_OBJ # 1200006: META_MODE fixes .if ${BOOTSTRAPPING} < 1100078 || \ (${MK_AUTO_OBJ} == "yes" && ${BOOTSTRAPPING} < 1100114) || \ (${MK_META_MODE} == "yes" && ${BOOTSTRAPPING} < 1200006) _crunchgen= usr.sbin/crunch/crunchgen .endif # r296926 -P keymap search path, MFC to stable/10 in r298297 .if ${BOOTSTRAPPING} < 1003501 || \ (${BOOTSTRAPPING} >= 1100000 && ${BOOTSTRAPPING} < 1100103) _kbdcontrol= usr.sbin/kbdcontrol .endif _yacc= lib/liby \ usr.bin/yacc ${_bt}-usr.bin/yacc: ${_bt}-lib/liby .if ${MK_BSNMP} != "no" _gensnmptree= usr.sbin/bsnmpd/gensnmptree .endif # We need to build tblgen when we're building clang or lld, either as # bootstrap tools, or as the part of the normal build. .if ${MK_CLANG_BOOTSTRAP} != "no" || ${MK_CLANG} != "no" || \ ${MK_LLD_BOOTSTRAP} != "no" || ${MK_LLD} != "no" _clang_tblgen= \ lib/clang/libllvmminimal \ usr.bin/clang/llvm-tblgen \ usr.bin/clang/clang-tblgen ${_bt}-usr.bin/clang/clang-tblgen: ${_bt}-lib/clang/libllvmminimal ${_bt}-usr.bin/clang/llvm-tblgen: ${_bt}-lib/clang/libllvmminimal .endif # Default to building the GPL DTC, but build the BSDL one if users explicitly # request it. _dtc= usr.bin/dtc .if ${MK_GPL_DTC} != "no" _dtc= gnu/usr.bin/dtc .endif .if ${MK_KERBEROS} != "no" _kerberos5_bootstrap_tools= \ kerberos5/tools/make-roken \ kerberos5/lib/libroken \ kerberos5/lib/libvers \ kerberos5/tools/asn1_compile \ kerberos5/tools/slc \ usr.bin/compile_et .ORDER: ${_kerberos5_bootstrap_tools:C/^/${_bt}-/g} .endif # r283777 makewhatis(1) replaced with mandoc version which builds a database. _libopenbsd?= lib/libopenbsd _makewhatis= usr.bin/mandoc ${_bt}-usr.bin/mandoc: ${_bt}-lib/libopenbsd bootstrap-tools: .PHONY # Please document (add comment) why something is in 'bootstrap-tools'. # Try to bound the building of the bootstrap-tool to just the # FreeBSD versions that need the tool built at this stage of the build. .for _tool in \ ${_clang_tblgen} \ ${_kerberos5_bootstrap_tools} \ ${_strfile} \ ${_gperf} \ ${_dtc} \ ${_cat} \ ${_kbdcontrol} \ usr.bin/lorder \ ${_libopenbsd} \ ${_makewhatis} \ usr.bin/rpcgen \ ${_yacc} \ ${_m4} \ ${_lex} \ usr.bin/xinstall \ ${_gensnmptree} \ usr.sbin/config \ ${_crunchide} \ ${_crunchgen} \ ${_nmtree} \ ${_vtfontcvt} \ usr.bin/localedef ${_bt}-${_tool}: .PHONY .MAKE ${_+_}@${ECHODIR} "===> ${_tool} (obj,all,install)"; \ cd ${.CURDIR}/${_tool}; \ if [ -z "${NO_OBJ}" ]; then ${MAKE} DIRPRFX=${_tool}/ obj; fi; \ ${MAKE} DIRPRFX=${_tool}/ all; \ ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX}/legacy install bootstrap-tools: ${_bt}-${_tool} .endfor # # build-tools: Build special purpose build tools # .if !defined(NO_SHARE) _share= share/syscons/scrnmaps .endif .if ${MK_GCC} != "no" _gcc_tools= gnu/usr.bin/cc/cc_tools .endif .if ${MK_RESCUE} != "no" # rescue includes programs that have build-tools targets _rescue=rescue/rescue .endif .for _tool in \ bin/csh \ bin/sh \ ${LOCAL_TOOL_DIRS} \ lib/ncurses/ncurses \ lib/ncurses/ncursesw \ ${_rescue} \ ${_share} \ usr.bin/awk \ lib/libmagic \ usr.bin/mkesdb_static \ usr.bin/mkcsmapper_static \ usr.bin/vi/catalog build-tools_${_tool}: .PHONY ${_+_}@${ECHODIR} "===> ${_tool} (obj,build-tools)"; \ cd ${.CURDIR}/${_tool}; \ if [ -z "${NO_OBJ}" ]; then ${MAKE} DIRPRFX=${_tool}/ obj; fi; \ ${MAKE} DIRPRFX=${_tool}/ build-tools build-tools: build-tools_${_tool} .endfor .for _tool in \ ${_gcc_tools} build-tools_${_tool}: .PHONY ${_+_}@${ECHODIR} "===> ${_tool} (obj,all)"; \ cd ${.CURDIR}/${_tool}; \ if [ -z "${NO_OBJ}" ]; then ${MAKE} DIRPRFX=${_tool}/ obj; fi; \ ${MAKE} DIRPRFX=${_tool}/ all build-tools: build-tools_${_tool} .endfor # # kernel-tools: Build kernel-building tools # kernel-tools: .PHONY mkdir -p ${MAKEOBJDIRPREFIX}/usr mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${MAKEOBJDIRPREFIX}/usr >/dev/null # # cross-tools: All the tools needed to build the rest of the system after # we get done with the earlier stages. It is the last set of tools needed # to begin building the target binaries. # .if ${TARGET_ARCH} != ${MACHINE_ARCH} .if ${TARGET_ARCH} == "amd64" || ${TARGET_ARCH} == "i386" _btxld= usr.sbin/btxld .endif .endif # Rebuild ctfconvert and ctfmerge to avoid difficult-to-diagnose failures # resulting from missing bug fixes or ELF Toolchain updates. .if ${MK_CDDL} != "no" _dtrace_tools= cddl/lib/libctf cddl/usr.bin/ctfconvert \ cddl/usr.bin/ctfmerge .endif # If we're given an XAS, don't build binutils. .if ${XAS:M/*} == "" .if ${MK_BINUTILS_BOOTSTRAP} != "no" _binutils= gnu/usr.bin/binutils .endif .if ${MK_ELFTOOLCHAIN_BOOTSTRAP} != "no" _elftctools= lib/libelftc \ lib/libpe \ usr.bin/elfcopy \ usr.bin/nm \ usr.bin/size \ usr.bin/strings # These are not required by the build, but can be useful for developers who # cross-build on a FreeBSD 10 host: _elftctools+= usr.bin/addr2line .endif .elif ${TARGET_ARCH} != ${MACHINE_ARCH} && ${MK_ELFTOOLCHAIN_BOOTSTRAP} != "no" # If cross-building with an external binutils we still need to build strip for # the target (for at least crunchide). _elftctools= lib/libelftc \ lib/libpe \ usr.bin/elfcopy .endif .if ${MK_CLANG_BOOTSTRAP} != "no" _clang= usr.bin/clang .endif .if ${MK_LLD_BOOTSTRAP} != "no" _lld= usr.bin/clang/lld .endif .if ${MK_CLANG_BOOTSTRAP} != "no" || ${MK_LLD_BOOTSTRAP} != "no" _clang_libs= lib/clang .endif .if ${MK_GCC_BOOTSTRAP} != "no" _gcc= gnu/usr.bin/cc .endif .if ${MK_USB} != "no" _usb_tools= sys/boot/usb/tools .endif cross-tools: .MAKE .PHONY .for _tool in \ ${LOCAL_XTOOL_DIRS} \ ${_clang_libs} \ ${_clang} \ ${_lld} \ ${_binutils} \ ${_elftctools} \ ${_dtrace_tools} \ ${_gcc} \ ${_btxld} \ ${_usb_tools} ${_+_}@${ECHODIR} "===> ${_tool} (obj,all,install)"; \ cd ${.CURDIR}/${_tool}; \ if [ -z "${NO_OBJ}" ]; then ${MAKE} DIRPRFX=${_tool}/ obj; fi; \ ${MAKE} DIRPRFX=${_tool}/ all; \ ${MAKE} DIRPRFX=${_tool}/ DESTDIR=${MAKEOBJDIRPREFIX} install .endfor NXBDESTDIR= ${OBJTREE}/nxb-bin NXBENV= MAKEOBJDIRPREFIX=${OBJTREE}/nxb \ TOOLS_PREFIX= \ INSTALL="sh ${.CURDIR}/tools/install.sh" \ PATH=${PATH}:${OBJTREE}/gperf_for_gcc/usr/bin NXBMAKE= ${NXBENV} ${MAKE} \ LLVM_TBLGEN=${NXBDESTDIR}/usr/bin/llvm-tblgen \ CLANG_TBLGEN=${NXBDESTDIR}/usr/bin/clang-tblgen \ MACHINE=${TARGET} MACHINE_ARCH=${TARGET_ARCH} \ MK_GDB=no MK_TESTS=no \ SSP_CFLAGS= \ MK_HTML=no NO_LINT=yes MK_MAN=no \ -DNO_PIC MK_PROFILE=no -DNO_SHARED \ -DNO_CPU_CFLAGS MK_WARNS=no MK_CTF=no \ MK_CLANG_EXTRAS=no MK_CLANG_FULL=no \ MK_LLDB=no MK_DEBUG_FILES=no # native-xtools is the current target for qemu-user cross builds of ports # via poudriere and the imgact_binmisc kernel module. # For non-clang enabled targets that are still using the in tree gcc # we must build a gperf binary for one instance of its Makefiles. On # clang-enabled systems, the gperf binary is obsolete. native-xtools: .PHONY .if ${MK_GCC_BOOTSTRAP} != "no" mkdir -p ${OBJTREE}/gperf_for_gcc/usr/bin ${_+_}@${ECHODIR} "===> ${_gperf} (obj,all,install)"; \ cd ${.CURDIR}/${_gperf}; \ if [ -z "${NO_OBJ}" ]; then ${NXBMAKE} DIRPRFX=${_gperf}/ obj; fi; \ ${NXBMAKE} DIRPRFX=${_gperf}/ all; \ ${NXBMAKE} DIRPRFX=${_gperf}/ DESTDIR=${OBJTREE}/gperf_for_gcc install .endif mkdir -p ${NXBDESTDIR}/bin ${NXBDESTDIR}/sbin ${NXBDESTDIR}/usr mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${NXBDESTDIR}/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${NXBDESTDIR}/usr/include >/dev/null .if ${MK_DEBUG_FILES} != "no" mtree -deU -f ${.CURDIR}/etc/mtree/BSD.debug.dist \ -p ${NXBDESTDIR}/usr/lib >/dev/null .endif .for _tool in \ bin/cat \ bin/chmod \ bin/cp \ bin/csh \ bin/echo \ bin/expr \ bin/hostname \ bin/ln \ bin/ls \ bin/mkdir \ bin/mv \ bin/ps \ bin/realpath \ bin/rm \ bin/rmdir \ bin/sh \ bin/sleep \ ${_clang_tblgen} \ usr.bin/ar \ ${_binutils} \ ${_elftctools} \ ${_gcc} \ ${_gcc_tools} \ ${_clang_libs} \ ${_clang} \ sbin/md5 \ sbin/sysctl \ usr.bin/diff \ usr.bin/awk \ usr.bin/basename \ usr.bin/bmake \ usr.bin/bzip2 \ usr.bin/cmp \ usr.bin/dirname \ usr.bin/env \ usr.bin/fetch \ usr.bin/find \ usr.bin/grep \ usr.bin/gzip \ usr.bin/id \ usr.bin/lex \ usr.bin/limits \ usr.bin/lorder \ usr.bin/mktemp \ usr.bin/mt \ usr.bin/patch \ usr.bin/readelf \ usr.bin/sed \ usr.bin/sort \ usr.bin/tar \ usr.bin/touch \ usr.bin/tr \ usr.bin/true \ usr.bin/uniq \ usr.bin/unzip \ usr.bin/xargs \ usr.bin/xinstall \ usr.bin/xz \ usr.bin/yacc \ usr.sbin/chown ${_+_}@${ECHODIR} "===> ${_tool} (obj,all,install)"; \ cd ${.CURDIR}/${_tool}; \ if [ -z "${NO_OBJ}" ]; then ${NXBMAKE} DIRPRFX=${_tool}/ obj; fi; \ ${NXBMAKE} DIRPRFX=${_tool}/ all; \ ${NXBMAKE} DIRPRFX=${_tool}/ DESTDIR=${NXBDESTDIR} install .endfor # # hierarchy - ensure that all the needed directories are present # hierarchy hier: .MAKE .PHONY ${_+_}cd ${.CURDIR}/etc; ${HMAKE} distrib-dirs # # libraries - build all libraries, and install them under ${DESTDIR}. # # The list of libraries with dependents (${_prebuild_libs}) and their # interdependencies (__L) are built automatically by the # ${.CURDIR}/tools/make_libdeps.sh script. # libraries: .MAKE .PHONY ${_+_}cd ${.CURDIR}; \ ${MAKE} -f Makefile.inc1 _prereq_libs; \ ${MAKE} -f Makefile.inc1 _startup_libs; \ ${MAKE} -f Makefile.inc1 _prebuild_libs; \ ${MAKE} -f Makefile.inc1 _generic_libs # # static libgcc.a prerequisite for shared libc # _prereq_libs= lib/libcompiler_rt .if ${MK_SSP} != "no" _prereq_libs+= gnu/lib/libssp/libssp_nonshared .endif # These dependencies are not automatically generated: # # gnu/lib/csu, gnu/lib/libgcc, lib/csu and lib/libc must be built before # all shared libraries for ELF. # _startup_libs= gnu/lib/csu _startup_libs+= lib/csu _startup_libs+= lib/libcompiler_rt _startup_libs+= lib/libc _startup_libs+= lib/libc_nonshared .if ${MK_LIBCPLUSPLUS} != "no" _startup_libs+= lib/libcxxrt .endif .if ${MK_LLVM_LIBUNWIND} != "no" _prereq_libs+= lib/libgcc_eh lib/libgcc_s _startup_libs+= lib/libgcc_eh lib/libgcc_s lib/libgcc_s__L: lib/libc__L lib/libgcc_s__L: lib/libc_nonshared__L .if ${MK_LIBCPLUSPLUS} != "no" lib/libcxxrt__L: lib/libgcc_s__L .endif .else # MK_LLVM_LIBUNWIND == no _prereq_libs+= gnu/lib/libgcc _startup_libs+= gnu/lib/libgcc gnu/lib/libgcc__L: lib/libc__L gnu/lib/libgcc__L: lib/libc_nonshared__L .if ${MK_LIBCPLUSPLUS} != "no" lib/libcxxrt__L: gnu/lib/libgcc__L .endif .endif _prebuild_libs= ${_kerberos5_lib_libasn1} \ ${_kerberos5_lib_libhdb} \ ${_kerberos5_lib_libheimbase} \ ${_kerberos5_lib_libheimntlm} \ ${_libsqlite3} \ ${_kerberos5_lib_libheimipcc} \ ${_kerberos5_lib_libhx509} ${_kerberos5_lib_libkrb5} \ ${_kerberos5_lib_libroken} \ ${_kerberos5_lib_libwind} \ lib/libbz2 ${_libcom_err} lib/libcrypt \ lib/libelf lib/libexpat \ lib/libfigpar \ ${_lib_libgssapi} \ lib/libkiconv lib/libkvm lib/liblzma lib/libmd lib/libnv \ ${_lib_casper} \ lib/ncurses/ncurses lib/ncurses/ncursesw \ lib/libopie lib/libpam/libpam ${_lib_libthr} \ ${_lib_libradius} lib/libsbuf lib/libtacplus \ lib/libgeom \ ${_cddl_lib_libumem} ${_cddl_lib_libnvpair} \ ${_cddl_lib_libuutil} \ ${_cddl_lib_libavl} \ ${_cddl_lib_libzfs_core} \ ${_cddl_lib_libctf} \ lib/libutil lib/libpjdlog ${_lib_libypclnt} lib/libz lib/msun \ ${_secure_lib_libcrypto} ${_lib_libldns} \ ${_secure_lib_libssh} ${_secure_lib_libssl} .if ${MK_GNUCXX} != "no" _prebuild_libs+= gnu/lib/libstdc++ gnu/lib/libsupc++ gnu/lib/libstdc++__L: lib/msun__L gnu/lib/libsupc++__L: gnu/lib/libstdc++__L .endif .if ${MK_DIALOG} != "no" _prebuild_libs+= gnu/lib/libdialog gnu/lib/libdialog__L: lib/msun__L lib/ncurses/ncursesw__L .endif .if ${MK_LIBCPLUSPLUS} != "no" _prebuild_libs+= lib/libc++ .endif lib/libgeom__L: lib/libexpat__L lib/libkvm__L: lib/libelf__L .if ${MK_LIBTHR} != "no" _lib_libthr= lib/libthr .endif .if ${MK_RADIUS_SUPPORT} != "no" _lib_libradius= lib/libradius .endif .if ${MK_OFED} != "no" _ofed_lib= contrib/ofed/usr.lib _prebuild_libs+= contrib/ofed/usr.lib/libosmcomp _prebuild_libs+= contrib/ofed/usr.lib/libopensm _prebuild_libs+= contrib/ofed/usr.lib/libibcommon _prebuild_libs+= contrib/ofed/usr.lib/libibverbs _prebuild_libs+= contrib/ofed/usr.lib/libibumad contrib/ofed/usr.lib/libopensm__L: lib/libthr__L contrib/ofed/usr.lib/libosmcomp__L: lib/libthr__L contrib/ofed/usr.lib/libibumad__L: contrib/ofed/usr.lib/libibcommon__L .endif .if ${MK_CASPER} != "no" _lib_casper= lib/libcasper .endif lib/libpjdlog__L: lib/libutil__L lib/libcasper__L: lib/libnv__L lib/liblzma__L: lib/libthr__L _generic_libs= ${_cddl_lib} gnu/lib ${_kerberos5_lib} lib ${_secure_lib} usr.bin/lex/lib ${_ofed_lib} .for _DIR in ${LOCAL_LIB_DIRS} .if exists(${.CURDIR}/${_DIR}/Makefile) && empty(_generic_libs:M${_DIR}) _generic_libs+= ${_DIR} .endif .endfor lib/libopie__L lib/libtacplus__L: lib/libmd__L .if ${MK_CDDL} != "no" _cddl_lib_libumem= cddl/lib/libumem _cddl_lib_libnvpair= cddl/lib/libnvpair _cddl_lib_libavl= cddl/lib/libavl _cddl_lib_libuutil= cddl/lib/libuutil _cddl_lib_libzfs_core= cddl/lib/libzfs_core _cddl_lib_libctf= cddl/lib/libctf _cddl_lib= cddl/lib cddl/lib/libzfs_core__L: cddl/lib/libnvpair__L cddl/lib/libzfs__L: lib/libgeom__L cddl/lib/libctf__L: lib/libz__L .endif # cddl/lib/libdtrace requires lib/libproc and lib/librtld_db; it's only built # on select architectures though (see cddl/lib/Makefile) .if ${MACHINE_CPUARCH} != "sparc64" _prebuild_libs+= lib/libprocstat lib/libproc lib/librtld_db lib/libprocstat__L: lib/libelf__L lib/libkvm__L lib/libutil__L lib/libproc__L: lib/libprocstat__L lib/librtld_db__L: lib/libprocstat__L .endif .if ${MK_CRYPT} != "no" .if ${MK_OPENSSL} != "no" _secure_lib_libcrypto= secure/lib/libcrypto _secure_lib_libssl= secure/lib/libssl lib/libradius__L secure/lib/libssl__L: secure/lib/libcrypto__L .if ${MK_LDNS} != "no" _lib_libldns= lib/libldns lib/libldns__L: secure/lib/libcrypto__L .endif .if ${MK_OPENSSH} != "no" _secure_lib_libssh= secure/lib/libssh secure/lib/libssh__L: lib/libz__L secure/lib/libcrypto__L lib/libcrypt__L .if ${MK_LDNS} != "no" secure/lib/libssh__L: lib/libldns__L .endif .if ${MK_GSSAPI} != "no" && ${MK_KERBEROS_SUPPORT} != "no" secure/lib/libssh__L: lib/libgssapi__L kerberos5/lib/libkrb5__L \ kerberos5/lib/libhx509__L kerberos5/lib/libasn1__L lib/libcom_err__L \ lib/libmd__L kerberos5/lib/libroken__L .endif .endif .endif _secure_lib= secure/lib .endif .if ${MK_KERBEROS} != "no" kerberos5/lib/libasn1__L: lib/libcom_err__L kerberos5/lib/libroken__L kerberos5/lib/libhdb__L: kerberos5/lib/libasn1__L lib/libcom_err__L \ kerberos5/lib/libkrb5__L kerberos5/lib/libroken__L \ kerberos5/lib/libwind__L lib/libsqlite3__L kerberos5/lib/libheimntlm__L: secure/lib/libcrypto__L kerberos5/lib/libkrb5__L \ kerberos5/lib/libroken__L lib/libcom_err__L kerberos5/lib/libhx509__L: kerberos5/lib/libasn1__L lib/libcom_err__L \ secure/lib/libcrypto__L kerberos5/lib/libroken__L kerberos5/lib/libwind__L kerberos5/lib/libkrb5__L: kerberos5/lib/libasn1__L lib/libcom_err__L \ lib/libcrypt__L secure/lib/libcrypto__L kerberos5/lib/libhx509__L \ kerberos5/lib/libroken__L kerberos5/lib/libwind__L \ kerberos5/lib/libheimbase__L kerberos5/lib/libheimipcc__L kerberos5/lib/libroken__L: lib/libcrypt__L kerberos5/lib/libwind__L: kerberos5/lib/libroken__L lib/libcom_err__L kerberos5/lib/libheimbase__L: lib/libthr__L kerberos5/lib/libheimipcc__L: kerberos5/lib/libroken__L kerberos5/lib/libheimbase__L lib/libthr__L .endif lib/libsqlite3__L: lib/libthr__L .if ${MK_GSSAPI} != "no" _lib_libgssapi= lib/libgssapi .endif .if ${MK_KERBEROS} != "no" _kerberos5_lib= kerberos5/lib _kerberos5_lib_libasn1= kerberos5/lib/libasn1 _kerberos5_lib_libhdb= kerberos5/lib/libhdb _kerberos5_lib_libheimbase= kerberos5/lib/libheimbase _kerberos5_lib_libkrb5= kerberos5/lib/libkrb5 _kerberos5_lib_libhx509= kerberos5/lib/libhx509 _kerberos5_lib_libroken= kerberos5/lib/libroken _kerberos5_lib_libheimntlm= kerberos5/lib/libheimntlm _libsqlite3= lib/libsqlite3 _kerberos5_lib_libheimipcc= kerberos5/lib/libheimipcc _kerberos5_lib_libwind= kerberos5/lib/libwind _libcom_err= lib/libcom_err .endif .if ${MK_NIS} != "no" _lib_libypclnt= lib/libypclnt .endif .if ${MK_OPENSSL} == "no" lib/libradius__L: lib/libmd__L .endif lib/libproc__L: \ ${_cddl_lib_libctf:D${_cddl_lib_libctf}__L} lib/libelf__L lib/librtld_db__L lib/libutil__L .if ${MK_CXX} != "no" .if ${MK_LIBCPLUSPLUS} != "no" lib/libproc__L: lib/libcxxrt__L .else # This implies MK_GNUCXX != "no"; see lib/libproc lib/libproc__L: gnu/lib/libsupc++__L .endif .endif .for _lib in ${_prereq_libs} ${_lib}__PL: .PHONY .MAKE .if exists(${.CURDIR}/${_lib}) ${_+_}@${ECHODIR} "===> ${_lib} (obj,all,install)"; \ cd ${.CURDIR}/${_lib}; \ if [ -z "${NO_OBJ}" ]; then ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ obj; fi; \ ${MAKE} MK_TESTS=no MK_PROFILE=no -DNO_PIC \ DIRPRFX=${_lib}/ all; \ ${MAKE} MK_TESTS=no MK_PROFILE=no -DNO_PIC \ DIRPRFX=${_lib}/ install .endif .endfor .for _lib in ${_startup_libs} ${_prebuild_libs} ${_generic_libs} ${_lib}__L: .PHONY .MAKE .if exists(${.CURDIR}/${_lib}) ${_+_}@${ECHODIR} "===> ${_lib} (obj,all,install)"; \ cd ${.CURDIR}/${_lib}; \ if [ -z "${NO_OBJ}" ]; then ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ obj; fi; \ ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ all; \ ${MAKE} MK_TESTS=no DIRPRFX=${_lib}/ install .endif .endfor _prereq_libs: ${_prereq_libs:S/$/__PL/} _startup_libs: ${_startup_libs:S/$/__L/} _prebuild_libs: ${_prebuild_libs:S/$/__L/} _generic_libs: ${_generic_libs:S/$/__L/} # Enable SUBDIR_PARALLEL when not calling 'make all', unless called from # 'everything' with _PARALLEL_SUBDIR_OK set. This is because it is unlikely # that running 'make all' from the top-level, especially with a SUBDIR_OVERRIDE # or LOCAL_DIRS set, will have a reliable build if SUBDIRs are built in # parallel. This is safe for the world stage of buildworld though since it has # already built libraries in a proper order and installed includes into # WORLDTMP. Special handling is done for SUBDIR ordering for 'install*' to # avoid trashing a system if it crashes mid-install. .if !make(all) || defined(_PARALLEL_SUBDIR_OK) SUBDIR_PARALLEL= .endif .include .if make(check-old) || make(check-old-dirs) || \ make(check-old-files) || make(check-old-libs) || \ make(delete-old) || make(delete-old-dirs) || \ make(delete-old-files) || make(delete-old-libs) # # check for / delete old files section # .include "ObsoleteFiles.inc" OLD_LIBS_MESSAGE="Please be sure no application still uses those libraries, \ else you can not start such an application. Consult UPDATING for more \ information regarding how to cope with the removal/revision bump of a \ specific library." .if !defined(BATCH_DELETE_OLD_FILES) RM_I=-i .else RM_I=-v .endif delete-old-files: .PHONY @echo ">>> Removing old files (only deletes safe to delete libs)" # Ask for every old file if the user really wants to remove it. # It's annoying, but better safe than sorry. # NB: We cannot pass the list of OLD_FILES as a parameter because the # argument list will get too long. Using .for/.endfor make "loops" will make # the Makefile parser segfault. @exec 3<&0; \ cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_FILES -V "OLD_FILES:Musr/share/*.gz:R" | xargs -n1 | \ while read file; do \ if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \ chflags noschg "${DESTDIR}/$${file}" 2>/dev/null || true; \ rm ${RM_I} "${DESTDIR}/$${file}" <&3; \ fi; \ for ext in debug symbols; do \ if ! [ -e "${DESTDIR}/$${file}" ] && [ -f \ "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \ rm ${RM_I} "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" \ <&3; \ fi; \ done; \ done # Remove catpages without corresponding manpages. @exec 3<&0; \ find ${DESTDIR}/usr/share/man/cat* ! -type d | \ sed -ep -e's:${DESTDIR}/usr/share/man/cat:${DESTDIR}/usr/share/man/man:' | \ while read catpage; do \ read manpage; \ if [ ! -e "$${manpage}" ]; then \ rm ${RM_I} $${catpage} <&3; \ fi; \ done @echo ">>> Old files removed" check-old-files: .PHONY @echo ">>> Checking for old files" @cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_FILES -V "OLD_FILES:Musr/share/*.gz:R" | xargs -n1 | \ while read file; do \ if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \ echo "${DESTDIR}/$${file}"; \ fi; \ for ext in debug symbols; do \ if [ -f "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \ echo "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}"; \ fi; \ done; \ done # Check for catpages without corresponding manpages. @find ${DESTDIR}/usr/share/man/cat* ! -type d | \ sed -ep -e's:${DESTDIR}/usr/share/man/cat:${DESTDIR}/usr/share/man/man:' | \ while read catpage; do \ read manpage; \ if [ ! -e "$${manpage}" ]; then \ echo $${catpage}; \ fi; \ done delete-old-libs: .PHONY @echo ">>> Removing old libraries" @echo "${OLD_LIBS_MESSAGE}" | fmt @exec 3<&0; \ cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_LIBS | xargs -n1 | \ while read file; do \ if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \ chflags noschg "${DESTDIR}/$${file}" 2>/dev/null || true; \ rm ${RM_I} "${DESTDIR}/$${file}" <&3; \ fi; \ for ext in debug symbols; do \ if ! [ -e "${DESTDIR}/$${file}" ] && [ -f \ "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \ rm ${RM_I} "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" \ <&3; \ fi; \ done; \ done @echo ">>> Old libraries removed" check-old-libs: .PHONY @echo ">>> Checking for old libraries" @cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_LIBS | xargs -n1 | \ while read file; do \ if [ -f "${DESTDIR}/$${file}" -o -L "${DESTDIR}/$${file}" ]; then \ echo "${DESTDIR}/$${file}"; \ fi; \ for ext in debug symbols; do \ if [ -f "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}" ]; then \ echo "${DESTDIR}${DEBUGDIR}/$${file}.$${ext}"; \ fi; \ done; \ done delete-old-dirs: .PHONY @echo ">>> Removing old directories" @cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_DIRS | xargs -n1 | sort -r | \ while read dir; do \ if [ -d "${DESTDIR}/$${dir}" ]; then \ rmdir -v "${DESTDIR}/$${dir}" || true; \ elif [ -L "${DESTDIR}/$${dir}" ]; then \ echo "${DESTDIR}/$${dir} is a link, please remove everything manually."; \ fi; \ if [ -d "${DESTDIR}${DEBUGDIR}/$${dir}" ]; then \ rmdir -v "${DESTDIR}${DEBUGDIR}/$${dir}" || true; \ elif [ -L "${DESTDIR}${DEBUGDIR}/$${dir}" ]; then \ echo "${DESTDIR}${DEBUGDIR}/$${dir} is a link, please remove everything manually."; \ fi; \ done @echo ">>> Old directories removed" check-old-dirs: .PHONY @echo ">>> Checking for old directories" @cd ${.CURDIR}; \ ${MAKE} -f ${.CURDIR}/Makefile.inc1 ${.MAKEFLAGS} ${.TARGET} \ -V OLD_DIRS | xargs -n1 | \ while read dir; do \ if [ -d "${DESTDIR}/$${dir}" ]; then \ echo "${DESTDIR}/$${dir}"; \ elif [ -L "${DESTDIR}/$${dir}" ]; then \ echo "${DESTDIR}/$${dir} is a link, please remove everything manually."; \ fi; \ if [ -d "${DESTDIR}${DEBUGDIR}/$${dir}" ]; then \ echo "${DESTDIR}${DEBUGDIR}/$${dir}"; \ elif [ -L "${DESTDIR}${DEBUGDIR}/$${dir}" ]; then \ echo "${DESTDIR}${DEBUGDIR}/$${dir} is a link, please remove everything manually."; \ fi; \ done delete-old: delete-old-files delete-old-dirs .PHONY @echo "To remove old libraries run '${MAKE_CMD} delete-old-libs'." check-old: check-old-files check-old-libs check-old-dirs .PHONY @echo "To remove old files and directories run '${MAKE_CMD} delete-old'." @echo "To remove old libraries run '${MAKE_CMD} delete-old-libs'." .endif # # showconfig - show build configuration. # showconfig: .PHONY @(${MAKE} -n -f ${.CURDIR}/sys/conf/kern.opts.mk -V dummy -dg1; \ ${MAKE} -n -f ${.CURDIR}/share/mk/src.opts.mk -V dummy -dg1) 2>&1 | grep ^MK_ | sort -u .if !empty(KRNLOBJDIR) && !empty(KERNCONF) DTBOUTPUTPATH= ${KRNLOBJDIR}/${KERNCONF}/ .if !defined(FDT_DTS_FILE) || empty(FDT_DTS_FILE) .if exists(${KERNCONFDIR}/${KERNCONF}) FDT_DTS_FILE!= awk 'BEGIN {FS="="} /^makeoptions[[:space:]]+FDT_DTS_FILE/ {print $$2}' \ '${KERNCONFDIR}/${KERNCONF}' ; echo .endif .endif .endif .if !defined(DTBOUTPUTPATH) || !exists(${DTBOUTPUTPATH}) DTBOUTPUTPATH= ${.CURDIR} .endif # # Build 'standalone' Device Tree Blob # builddtb: .PHONY @PATH=${TMPPATH} MACHINE=${TARGET} \ ${.CURDIR}/sys/tools/fdt/make_dtb.sh ${.CURDIR}/sys \ "${FDT_DTS_FILE}" ${DTBOUTPUTPATH} ############### # cleanworld # In the following, the first 'rm' in a series will usually remove all # files and directories. If it does not, then there are probably some # files with file flags set, so this unsets them and tries the 'rm' a # second time. There are situations where this target will be cleaning # some directories via more than one method, but that duplication is # needed to correctly handle all the possible situations. Removing all # files without file flags set in the first 'rm' instance saves time, # because 'chflags' will need to operate on fewer files afterwards. # # It is expected that BW_CANONICALOBJDIR == the CANONICALOBJDIR as would be # created by bsd.obj.mk, except that we don't want to .include that file # in this makefile. # BW_CANONICALOBJDIR:=${OBJTREE}${.CURDIR} cleanworld: .PHONY .if exists(${BW_CANONICALOBJDIR}/) -rm -rf ${BW_CANONICALOBJDIR}/* -chflags -R 0 ${BW_CANONICALOBJDIR} rm -rf ${BW_CANONICALOBJDIR}/* .endif .if ${.CURDIR} == ${.OBJDIR} || ${.CURDIR}/obj == ${.OBJDIR} # To be safe in this case, fall back to a 'make cleandir' ${_+_}@cd ${.CURDIR}; ${MAKE} cleandir .endif .if defined(TARGET) && defined(TARGET_ARCH) .if ${TARGET} == ${MACHINE} && ${TARGET_ARCH} == ${MACHINE_ARCH} XDEV_CPUTYPE?=${CPUTYPE} .else XDEV_CPUTYPE?=${TARGET_CPUTYPE} .endif NOFUN=-DNO_FSCHG MK_HTML=no -DNO_LINT \ MK_MAN=no MK_NLS=no MK_PROFILE=no \ MK_KERBEROS=no MK_RESCUE=no MK_TESTS=no MK_WARNS=no \ TARGET=${TARGET} TARGET_ARCH=${TARGET_ARCH} \ CPUTYPE=${XDEV_CPUTYPE} XDDIR=${TARGET_ARCH}-freebsd XDTP?=/usr/${XDDIR} .if ${XDTP:N/*} .error XDTP variable should be an absolute path .endif CDBENV=MAKEOBJDIRPREFIX=${MAKEOBJDIRPREFIX}/${XDDIR} \ INSTALL="sh ${.CURDIR}/tools/install.sh" CDENV= ${CDBENV} \ TOOLS_PREFIX=${XDTP} .if ${WANT_COMPILER_TYPE} == gcc || \ (defined(X_COMPILER_TYPE) && ${X_COMPILER_TYPE} == gcc) # GCC requires -isystem and -L when using a cross-compiler. --sysroot # won't set header path and -L is used to ensure the base library path # is added before the port PREFIX library path. CD2CFLAGS+= -isystem ${XDDESTDIR}/usr/include -L${XDDESTDIR}/usr/lib # GCC requires -B to find /usr/lib/crti.o when using a cross-compiler # combined with --sysroot. CD2CFLAGS+= -B${XDDESTDIR}/usr/lib # Force using libc++ for external GCC. # XXX: This should be checking MK_GNUCXX == no .if ${X_COMPILER_VERSION} >= 40800 CD2CXXFLAGS+= -isystem ${XDDESTDIR}/usr/include/c++/v1 -std=c++11 \ -nostdinc++ .endif .endif CD2CFLAGS+= --sysroot=${XDDESTDIR}/ CD2ENV=${CDENV} CC="${CC} ${CD2CFLAGS}" CXX="${CXX} ${CD2CXXFLAGS} ${CD2CFLAGS}" \ CPP="${CPP} ${CD2CFLAGS}" \ MACHINE=${TARGET} MACHINE_ARCH=${TARGET_ARCH} CDTMP= ${MAKEOBJDIRPREFIX}/${XDDIR}/${.CURDIR}/tmp CDMAKE=${CDENV} PATH=${CDTMP}/usr/bin:${PATH} ${MAKE} ${NOFUN} CD2MAKE=${CD2ENV} PATH=${CDTMP}/usr/bin:${XDDESTDIR}/usr/bin:${PATH} ${MAKE} ${NOFUN} .if ${MK_META_MODE} != "no" # Don't rebuild build-tools targets during normal build. CD2MAKE+= BUILD_TOOLS_META=.NOMETA .endif XDDESTDIR=${DESTDIR}/${XDTP} .if !defined(OSREL) OSREL!= uname -r | sed -e 's/[-(].*//' .endif .ORDER: xdev-build xdev-install xdev-links xdev: xdev-build xdev-install .PHONY .ORDER: _xb-worldtmp _xb-bootstrap-tools _xb-build-tools _xb-cross-tools xdev-build: _xb-worldtmp _xb-bootstrap-tools _xb-build-tools _xb-cross-tools .PHONY _xb-worldtmp: .PHONY mkdir -p ${CDTMP}/usr mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${CDTMP}/usr >/dev/null _xb-bootstrap-tools: .PHONY .for _tool in \ ${_clang_tblgen} \ ${_gperf} \ ${_yacc} ${_+_}@${ECHODIR} "===> ${_tool} (obj,all,install)"; \ cd ${.CURDIR}/${_tool}; \ if [ -z "${NO_OBJ}" ]; then ${CDMAKE} DIRPRFX=${_tool}/ obj; fi; \ ${CDMAKE} DIRPRFX=${_tool}/ all; \ ${CDMAKE} DIRPRFX=${_tool}/ DESTDIR=${CDTMP} install .endfor _xb-build-tools: .PHONY ${_+_}@cd ${.CURDIR}; \ ${CDBENV} ${MAKE} -f Makefile.inc1 ${NOFUN} build-tools _xb-cross-tools: .PHONY .for _tool in \ ${_binutils} \ ${_elftctools} \ usr.bin/ar \ ${_clang_libs} \ ${_clang} \ ${_gcc} ${_+_}@${ECHODIR} "===> xdev ${_tool} (obj,all)"; \ cd ${.CURDIR}/${_tool}; \ if [ -z "${NO_OBJ}" ]; then ${CDMAKE} DIRPRFX=${_tool}/ obj; fi; \ ${CDMAKE} DIRPRFX=${_tool}/ all .endfor _xi-mtree: .PHONY ${_+_}@${ECHODIR} "mtree populating ${XDDESTDIR}" mkdir -p ${XDDESTDIR} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.root.dist \ -p ${XDDESTDIR} >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.usr.dist \ -p ${XDDESTDIR}/usr >/dev/null mtree -deU -f ${.CURDIR}/etc/mtree/BSD.include.dist \ -p ${XDDESTDIR}/usr/include >/dev/null .if defined(LIBCOMPAT) mtree -deU -f ${.CURDIR}/etc/mtree/BSD.lib${libcompat}.dist \ -p ${XDDESTDIR}/usr >/dev/null .endif .if ${MK_TESTS} != "no" mkdir -p ${XDDESTDIR}${TESTSBASE} mtree -deU -f ${.CURDIR}/etc/mtree/BSD.tests.dist \ -p ${XDDESTDIR}${TESTSBASE} >/dev/null .endif .ORDER: xdev-build _xi-mtree _xi-cross-tools _xi-includes _xi-libraries xdev-install: xdev-build _xi-mtree _xi-cross-tools _xi-includes _xi-libraries .PHONY _xi-cross-tools: .PHONY @echo "_xi-cross-tools" .for _tool in \ ${_binutils} \ ${_elftctools} \ usr.bin/ar \ ${_clang_libs} \ ${_clang} \ ${_gcc} ${_+_}@${ECHODIR} "===> xdev ${_tool} (install)"; \ cd ${.CURDIR}/${_tool}; \ ${CDMAKE} DIRPRFX=${_tool}/ install DESTDIR=${XDDESTDIR} .endfor _xi-includes: .PHONY ${_+_}cd ${.CURDIR}; ${CD2MAKE} -f Makefile.inc1 includes \ DESTDIR=${XDDESTDIR} _xi-libraries: .PHONY ${_+_}cd ${.CURDIR}; ${CD2MAKE} -f Makefile.inc1 libraries \ DESTDIR=${XDDESTDIR} xdev-links: .PHONY ${_+_}cd ${XDDESTDIR}/usr/bin; \ mkdir -p ../../../../usr/bin; \ for i in *; do \ ln -sf ../../${XDTP}/usr/bin/$$i \ ../../../../usr/bin/${XDDIR}-$$i; \ ln -sf ../../${XDTP}/usr/bin/$$i \ ../../../../usr/bin/${XDDIR}${OSREL}-$$i; \ done .else xdev xdev-build xdev-install xdev-links: .PHONY @echo "*** Error: Both TARGET and TARGET_ARCH must be defined for \"${.TARGET}\" target" .endif Index: projects/clang500-import/share/man/man5/src.conf.5 =================================================================== --- projects/clang500-import/share/man/man5/src.conf.5 (revision 319250) +++ projects/clang500-import/share/man/man5/src.conf.5 (revision 319251) @@ -1,1590 +1,1584 @@ .\" DO NOT EDIT-- this file is generated by tools/build/options/makeman. .\" $FreeBSD$ -.Dd April 21, 2017 +.Dd May 30, 2017 .Dt SRC.CONF 5 .Os .Sh NAME .Nm src.conf .Nd "source build options" .Sh DESCRIPTION The .Nm file contains settings that will apply to every build involving the .Fx source tree; see .Xr build 7 . .Pp The .Nm file uses the standard makefile syntax. However, .Nm should not specify any dependencies to .Xr make 1 . Instead, .Nm is to set .Xr make 1 variables that control the aspects of how the system builds. .Pp The default location of .Nm is .Pa /etc/src.conf , though an alternative location can be specified in the .Xr make 1 variable .Va SRCCONF . Overriding the location of .Nm may be necessary if the system-wide settings are not suitable for a particular build. For instance, setting .Va SRCCONF to .Pa /dev/null effectively resets all build controls to their defaults. .Pp The only purpose of .Nm is to control the compilation of the .Fx source code, which is usually located in .Pa /usr/src . As a rule, the system administrator creates .Nm when the values of certain control variables need to be changed from their defaults. .Pp In addition, control variables can be specified for a particular build via the .Fl D option of .Xr make 1 or in its environment; see .Xr environ 7 . .Pp The environment of .Xr make 1 for the build can be controlled via the .Va SRC_ENV_CONF variable, which defaults to .Pa /etc/src-env.conf . Some examples that may only be set in this file are .Va WITH_DIRDEPS_BUILD , and .Va WITH_META_MODE as they are environment-only variables. Note that .Va MAKEOBJDIRPREFIX may be set here only when using .Va WITH_DIRDEPS_BUILD . .Pp The values of variables are ignored regardless of their setting; even if they would be set to .Dq Li FALSE or .Dq Li NO . The presence of an option causes it to be honored by .Xr make 1 . .Pp This list provides a name and short description for variables that can be used for source builds. .Bl -tag -width indent .It Va WITHOUT_ACCT Set to not build process accounting tools such as .Xr accton 8 and .Xr sa 8 . .It Va WITHOUT_ACPI Set to not build .Xr acpiconf 8 , .Xr acpidump 8 and related programs. .It Va WITHOUT_AMD Set to not build .Xr amd 8 , and related programs. .It Va WITHOUT_APM Set to not build .Xr apm 8 , .Xr apmd 8 and related programs. .It Va WITHOUT_ASSERT_DEBUG Set to compile programs and libraries without the .Xr assert 3 checks. .It Va WITHOUT_AT Set to not build .Xr at 1 and related utilities. .It Va WITHOUT_ATM Set to not build programs and libraries related to ATM networking. .It Va WITHOUT_AUDIT Set to not build audit support into system programs. .It Va WITHOUT_AUTHPF Set to not build .Xr authpf 8 . .It Va WITHOUT_AUTOFS Set to not build .Xr autofs 5 related programs, libraries, and kernel modules. .It Va WITH_AUTO_OBJ Enable automatic creation of objdirs. .Pp This must be set in the environment, make command line, or .Pa /etc/src-env.conf , not .Pa /etc/src.conf . .It Va WITHOUT_BHYVE Set to not build or install .Xr bhyve 8 , associated utilities, and examples. .Pp This option only affects amd64/amd64. .It Va WITHOUT_BINUTILS Set to not build or install binutils (as, ld, objcopy, and objdump ) as part of the normal system build. The resulting system cannot build programs from source. .Pp This is a default setting on arm64/aarch64, riscv/riscv64 and riscv/riscv64sf. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_GDB .El .It Va WITH_BINUTILS Set to build and install binutils (as, ld, objcopy, and objdump) as part of the normal system build. .Pp This is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe and sparc64/sparc64. .It Va WITHOUT_BINUTILS_BOOTSTRAP Set to not build binutils (as, ld, objcopy and objdump) as part of the bootstrap process. .Bf -symbolic The option does not work for build targets unless some alternative toolchain is provided. .Ef .Pp This is a default setting on arm64/aarch64, riscv/riscv64 and riscv/riscv64sf. .It Va WITH_BINUTILS_BOOTSTRAP Set build binutils (as, ld, objcopy and objdump) as part of the bootstrap process. .Pp This is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe and sparc64/sparc64. .It Va WITHOUT_BLACKLIST -Set this if you do not want to build blacklistd/blacklistctl. +Set this if you do not want to build +.Xr blacklistd 8 +and +.Xr blacklistctl 8 . When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_BLACKLIST_SUPPORT .El .It Va WITHOUT_BLACKLIST_SUPPORT -Set to build some programs without blacklistd support, like +Set to build some programs without +.Xr libblacklist 3 +support, like .Xr fingerd 8 , .Xr ftpd 8 , .Xr rlogind 8 , .Xr rshd 8 , and .Xr sshd 8 . .It Va WITHOUT_BLUETOOTH Set to not build Bluetooth related kernel modules, programs and libraries. .It Va WITHOUT_BOOT Set to not build the boot blocks and loader. .It Va WITHOUT_BOOTPARAMD Set to not build or install .Xr bootparamd 8 . .It Va WITHOUT_BOOTPD Set to not build or install .Xr bootpd 8 . .It Va WITHOUT_BSDINSTALL Set to not build .Xr bsdinstall 8 , .Xr sade 8 , and related programs. .It Va WITHOUT_BSD_CPIO Set to not build the BSD licensed version of cpio based on .Xr libarchive 3 . .It Va WITH_BSD_GREP Install BSD-licensed grep as '[ef]grep' instead of GNU grep. .It Va WITHOUT_BSD_GREP_FASTMATCH Set this option to exclude the fastmatch implementation from .Xr bsdgrep 1 , instead using only .Xr regex 3 . .It Va WITHOUT_BSNMP Set to not build or install .Xr bsnmpd 1 and related libraries and data files. .It Va WITHOUT_BZIP2 Set to not build contributed bzip2 software as a part of the base system. .Bf -symbolic The option has no effect yet. .Ef When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_BZIP2_SUPPORT .El .It Va WITHOUT_BZIP2_SUPPORT Set to build some programs without optional bzip2 support. .It Va WITHOUT_CALENDAR Set to not build .Xr calendar 1 . .It Va WITHOUT_CAPSICUM Set to not build Capsicum support into system programs. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_CASPER .El .It Va WITHOUT_CASPER Set to not build Casper program and related libraries. .It Va WITH_CCACHE_BUILD Set to use .Xr ccache 1 for the build. No configuration is required except to install the .Sy devel/ccache package. When using with .Xr distcc 1 , set .Sy CCACHE_PREFIX=/usr/local/bin/distcc . The default cache directory of .Pa $HOME/.ccache will be used, which can be overridden by setting .Sy CCACHE_DIR . The .Sy CCACHE_COMPILERCHECK option defaults to .Sy content when using the in-tree bootstrap compiler, and .Sy mtime when using an external compiler. The .Sy CCACHE_CPP2 option is used for Clang but not GCC. .Pp Sharing a cache between multiple work directories requires using a layout similar to .Pa /some/prefix/src .Pa /some/prefix/obj and an environment such as: .Bd -literal -offset indent CCACHE_BASEDIR='${SRCTOP:H}' MAKEOBJDIRPREFIX='${SRCTOP:H}/obj' .Ed .Pp See .Xr ccache 1 for more configuration options. .It Va WITHOUT_CCD Set to not build .Xr geom_ccd 4 and related utilities. .It Va WITHOUT_CDDL Set to not build code licensed under Sun's CDDL. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_CTF .It .Va WITHOUT_ZFS .El .It Va WITHOUT_CLANG Set to not build the Clang C/C++ compiler during the regular phase of the build. .Pp This is a default setting on riscv/riscv64, riscv/riscv64sf and sparc64/sparc64. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_CLANG_EXTRAS .It .Va WITHOUT_CLANG_FULL .El .It Va WITH_CLANG Set to build the Clang C/C++ compiler during the normal phase of the build. .Pp This is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe. .It Va WITHOUT_CLANG_BOOTSTRAP Set to not build the Clang C/C++ compiler during the bootstrap phase of the build. To be able to build the system, either gcc or clang bootstrap must be enabled unless an alternate compiler is provided via XCC. .Pp This is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64, riscv/riscv64sf and sparc64/sparc64. .It Va WITH_CLANG_BOOTSTRAP Set to build the Clang C/C++ compiler during the bootstrap phase of the build. .Pp This is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm64/aarch64 and i386/i386. .It Va WITH_CLANG_EXTRAS Set to build additional clang and llvm tools, such as bugpoint. .It Va WITHOUT_CLANG_FULL Set to avoid building the ARCMigrate, Rewriter and StaticAnalyzer components of the Clang C/C++ compiler. .Pp This is a default setting on riscv/riscv64, riscv/riscv64sf and sparc64/sparc64. .It Va WITH_CLANG_FULL Set to build the ARCMigrate, Rewriter and StaticAnalyzer components of the Clang C/C++ compiler. .Pp This is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpcspe. .It Va WITHOUT_CLANG_IS_CC Set to install the GCC compiler as .Pa /usr/bin/cc , .Pa /usr/bin/c++ and .Pa /usr/bin/cpp . .Pp This is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64, riscv/riscv64sf and sparc64/sparc64. .It Va WITH_CLANG_IS_CC Set to install the Clang C/C++ compiler as .Pa /usr/bin/cc , .Pa /usr/bin/c++ and .Pa /usr/bin/cpp . .Pp This is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm64/aarch64 and i386/i386. .It Va WITHOUT_CPP Set to not build .Xr cpp 1 . .It Va WITHOUT_CROSS_COMPILER Set to not build any cross compiler in the cross-tools stage of buildworld. When compiling a different version of .Fx than what is installed on the system, provide an alternate compiler with XCC to ensure success. When compiling with an identical version of .Fx to the host, this option may be safely used. This option may also be safe when the host version of .Fx is close to the sources being built, but all bets are off if there have been any changes to the toolchain between the versions. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_BINUTILS_BOOTSTRAP .It .Va WITHOUT_CLANG_BOOTSTRAP .It .Va WITHOUT_ELFTOOLCHAIN_BOOTSTRAP .It .Va WITHOUT_GCC_BOOTSTRAP .El .It Va WITHOUT_CRYPT Set to not build any crypto code. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_KERBEROS .It .Va WITHOUT_KERBEROS_SUPPORT .It .Va WITHOUT_OPENSSH .It .Va WITHOUT_OPENSSL .El .Pp When set, these options are also in effect: .Pp .Bl -inset -compact .It Va WITHOUT_GSSAPI (unless .Va WITH_GSSAPI is set explicitly) .El .It Va WITH_CTF Set to compile with CTF (Compact C Type Format) data. CTF data encapsulates a reduced form of debugging information similar to DWARF and the venerable stabs and is required for DTrace. .It Va WITHOUT_CTM Set to not build .Xr ctm 1 and related utilities. .It Va WITHOUT_CUSE Set to not build CUSE-related programs and libraries. .It Va WITHOUT_CXGBETOOL Set to not build .Xr cxgbetool 8 .Pp This is a default setting on arm/arm, arm/armeb, arm/armv6, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpcspe, riscv/riscv64 and riscv/riscv64sf. .It Va WITH_CXGBETOOL Set to build .Xr cxgbetool 8 .Pp This is a default setting on amd64/amd64, arm64/aarch64, i386/i386, powerpc/powerpc64 and sparc64/sparc64. .It Va WITHOUT_CXX Set to not build .Xr c++ 1 and related libraries. It will also prevent building of .Xr gperf 1 and .Xr devd 8 . .Pp This is a default setting on riscv/riscv64 and riscv/riscv64sf. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_CLANG .It .Va WITHOUT_CLANG_EXTRAS .It .Va WITHOUT_CLANG_FULL .It .Va WITHOUT_DTRACE_TESTS .It .Va WITHOUT_GNUCXX .It .Va WITHOUT_GROFF .It .Va WITHOUT_TESTS .It .Va WITHOUT_TESTS_SUPPORT .El .It Va WITH_CXX Set to build .Xr c++ 1 and related libraries. .Pp This is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe and sparc64/sparc64. .It Va WITHOUT_DEBUG_FILES Set to avoid building or installing standalone debug files for each executable binary and shared library. .It Va WITHOUT_DIALOG Set to not build .Xr dialog 1 , .Xr dialog 3 , .Xr dpv 1 , and .Xr dpv 3 . When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_BSDINSTALL .El .It Va WITHOUT_DICT Set to not build the Webster dictionary files. .It Va WITH_DIRDEPS_BUILD This is an experimental build system. For details see http://www.crufty.net/sjg/docs/freebsd-meta-mode.htm. Build commands can be seen from the top-level with: .Dl make show-valid-targets The build is driven by dirdeps.mk using .Va DIRDEPS stored in Makefile.depend files found in each directory. .Pp The build can be started from anywhere, and behaves the same. The initial instance of .Xr make 1 recursively reads .Va DIRDEPS from .Pa Makefile.depend , computing a graph of tree dependencies from the current origin. Setting .Va NO_DIRDEPS skips checking dirdep dependencies and will only build in the current and child directories. .Va NO_DIRDEPS_BELOW skips building any dirdeps and only build the current directory. .Pp This also utilizes the .Va WITH_META_MODE logic for incremental builds. .Pp The build hides commands executed unless .Va NO_SILENT is defined. .Pp Note that there is currently no mass install feature for this. .Pp When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITH_INSTALL_AS_USER .El .Pp When set, these options are also in effect: .Pp .Bl -inset -compact .It Va WITH_AUTO_OBJ (unless .Va WITHOUT_AUTO_OBJ is set explicitly) .It Va WITH_META_MODE (unless .Va WITHOUT_META_MODE is set explicitly) .It Va WITH_STAGING (unless .Va WITHOUT_STAGING is set explicitly) .It Va WITH_STAGING_MAN (unless .Va WITHOUT_STAGING_MAN is set explicitly) .It Va WITH_STAGING_PROG (unless .Va WITHOUT_STAGING_PROG is set explicitly) .It Va WITH_SYSROOT (unless .Va WITHOUT_SYSROOT is set explicitly) .El .Pp This must be set in the environment, make command line, or .Pa /etc/src-env.conf , not .Pa /etc/src.conf . .It Va WITH_DIRDEPS_CACHE Cache result of dirdeps.mk which can save significant time for subsequent builds. Depends on .Va WITH_DIRDEPS_BUILD . .Pp This must be set in the environment, make command line, or .Pa /etc/src-env.conf , not .Pa /etc/src.conf . .It Va WITHOUT_DMAGENT Set to not build dma Mail Transport Agent. .It Va WITHOUT_DOCCOMPRESS Set to not install compressed system documentation. Only the uncompressed version will be installed. .It Va WITH_DTRACE_TESTS Set to build and install the DTrace test suite in .Pa /usr/tests/cddl/usr.sbin/dtrace . This test suite is considered experimental on architectures other than amd64/amd64 and running it may cause system instability. .It Va WITHOUT_DYNAMICROOT Set this if you do not want to link .Pa /bin and .Pa /sbin dynamically. .It Va WITHOUT_ED_CRYPTO Set to build .Xr ed 1 without support for encryption/decryption. .It Va WITHOUT_EE Set to not build and install .Xr edit 1 , .Xr ee 1 , and related programs. .It Va WITHOUT_EFI Set not to build .Xr efivar 3 and .Xr efivar 8 . .Pp This is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64, riscv/riscv64sf and sparc64/sparc64. .It Va WITH_EFI Set to build .Xr efivar 3 and .Xr efivar 8 . .Pp This is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm64/aarch64 and i386/i386. .It Va WITHOUT_ELFTOOLCHAIN_BOOTSTRAP Set to not build ELF Tool Chain tools (addr2line, nm, size, strings and strip) as part of the bootstrap process. .Bf -symbolic An alternate bootstrap tool chain must be provided. .Ef .It Va WITHOUT_EXAMPLES Set to avoid installing examples to .Pa /usr/share/examples/ . .It Va WITH_EXTRA_TCP_STACKS Set to build extra TCP stack modules. .It Va WITHOUT_FDT Set to not build Flattened Device Tree support as part of the base system. This includes the device tree compiler (dtc) and libfdt support library. .It Va WITHOUT_FILE Set to not build .Xr file 1 and related programs. .It Va WITHOUT_FINGER Set to not build or install .Xr finger 1 and .Xr fingerd 8 . .It Va WITHOUT_FLOPPY Set to not build or install programs for operating floppy disk driver. .It Va WITHOUT_FMTREE Set to not build and install .Pa /usr/sbin/fmtree . .It Va WITHOUT_FORMAT_EXTENSIONS Set to not enable .Fl fformat-extensions when compiling the kernel. Also disables all format checking. .It Va WITHOUT_FORTH Set to build bootloaders without Forth support. .It Va WITHOUT_FP_LIBC Set to build .Nm libc without floating-point support. .It Va WITHOUT_FREEBSD_UPDATE Set to not build .Xr freebsd-update 8 . .It Va WITHOUT_FTP Set to not build or install .Xr ftp 1 and .Xr ftpd 8 . .It Va WITHOUT_GAMES Set to not build games. .It Va WITHOUT_GCC Set to not build and install gcc and g++ as part of the normal build process. .Pp This is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm64/aarch64, i386/i386, riscv/riscv64 and riscv/riscv64sf. .It Va WITH_GCC Set to build and install gcc and g++. .Pp This is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe and sparc64/sparc64. .It Va WITHOUT_GCC_BOOTSTRAP Set to not build gcc and g++ as part of the bootstrap process. You must enable either gcc or clang bootstrap to be able to build the system, unless an alternative compiler is provided via XCC. .Pp This is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm64/aarch64, i386/i386, riscv/riscv64 and riscv/riscv64sf. .It Va WITH_GCC_BOOTSTRAP Set to build gcc and g++ as part of the bootstrap process. .Pp This is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe and sparc64/sparc64. .It Va WITHOUT_GCOV Set to not build the .Xr gcov 1 tool. .It Va WITHOUT_GDB Set to not build .Xr gdb 1 . .Pp This is a default setting on -amd64/amd64, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64 and riscv/riscv64sf. +arm64/aarch64, riscv/riscv64 and riscv/riscv64sf. .It Va WITH_GDB Set to build .Xr gdb 1 . .Pp This is a default setting on +amd64/amd64, arm/arm, arm/armeb, arm/armv6, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe and sparc64/sparc64. +.It Va WITHOUT_GDB_LIBEXEC +Set to install +.Xr gdb 1 +into +.Pa /usr/bin . +.Pp +This is a default setting on arm/arm, arm/armeb, arm/armv6 and sparc64/sparc64. +.It Va WITH_GDB_LIBEXEC +Set to install +.Xr gdb 1 +into +.Pa /usr/libexec . +This permits +.Xr gdb 1 +to be used as a fallback for +.Xr crashinfo 8 +if a newer version is not installed. +.Pp +This is a default setting on +amd64/amd64, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64 and riscv/riscv64sf. .It Va WITHOUT_GNUCXX Do not build the GNU C++ stack (g++, libstdc++). This is the default on platforms where clang is the system compiler. .Pp This is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm64/aarch64, i386/i386, riscv/riscv64 and riscv/riscv64sf. .It Va WITH_GNUCXX Build the GNU C++ stack (g++, libstdc++). This is the default on platforms where gcc is the system compiler. .Pp This is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe and sparc64/sparc64. .It Va WITHOUT_GNU_DIFF Set to not build GNU .Xr diff 1 and .Xr diff3 1 . .It Va WITHOUT_GNU_GREP Set to not build GNU .Xr grep 1 . .It Va WITH_GNU_GREP_COMPAT Set this option to include GNU extensions in .Xr bsdgrep 1 by linking against libgnuregex. .It Va WITHOUT_GPIO Set to not build .Xr gpioctl 8 as part of the base system. .It Va WITHOUT_GPL_DTC Set to build the BSD licensed version of the device tree compiler rather than the GPLed one from elinux.org. .It Va WITHOUT_GROFF Set to not build .Xr groff 1 and .Xr vgrind 1 . You should consider installing the textproc/groff port to not break .Xr man 1 . .Pp This is a default setting on riscv/riscv64 and riscv/riscv64sf. .It Va WITH_GROFF Set to build .Xr groff 1 and .Xr vgrind 1 . .Pp This is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe and sparc64/sparc64. .It Va WITHOUT_GSSAPI Set to not build libgssapi. .It Va WITHOUT_HAST Set to not build .Xr hastd 8 and related utilities. .It Va WITH_HESIOD Set to build Hesiod support. .It Va WITHOUT_HTML Set to not build HTML docs. .It Va WITHOUT_HYPERV Set to not build or install HyperV utilities. .It Va WITHOUT_ICONV Set to not build iconv as part of libc. .It Va WITHOUT_INCLUDES Set to not install header files. This option used to be spelled .Va NO_INCS . .Bf -symbolic The option does not work for build targets. .Ef .It Va WITHOUT_INET Set to not build programs and libraries related to IPv4 networking. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_INET_SUPPORT .El .It Va WITHOUT_INET6 Set to not build programs and libraries related to IPv6 networking. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_INET6_SUPPORT .El .It Va WITHOUT_INET6_SUPPORT Set to build libraries, programs, and kernel modules without IPv6 support. .It Va WITHOUT_INETD Set to not build .Xr inetd 8 . .It Va WITHOUT_INET_SUPPORT Set to build libraries, programs, and kernel modules without IPv4 support. .It Va WITHOUT_INSTALLLIB Set this to not install optional libraries. For example, when creating a .Xr nanobsd 8 image. .Bf -symbolic The option does not work for build targets. .Ef .It Va WITH_INSTALL_AS_USER Set to make install targets succeed for non-root users by installing files with owner and group attributes set to that of the user running the .Xr make 1 command. The user still must set the .Va DESTDIR variable to point to a directory where the user has write permissions. .It Va WITHOUT_IPFILTER Set to not build IP Filter package. .It Va WITHOUT_IPFW Set to not build IPFW tools. .It Va WITHOUT_IPSEC_SUPPORT Set to not build the kernel with .Xr ipsec 4 support. This option is needed for .Xr ipsec 4 and .Xr tcpmd5 4 . .It Va WITHOUT_ISCSI Set to not build .Xr iscid 8 and related utilities. .It Va WITHOUT_JAIL Set to not build tools for the support of jails; e.g., .Xr jail 8 . .It Va WITHOUT_KDUMP Set to not build .Xr kdump 1 and .Xr truss 1 . .It Va WITHOUT_KERBEROS Set this to not build Kerberos 5 (KTH Heimdal). When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_KERBEROS_SUPPORT .El .Pp When set, these options are also in effect: .Pp .Bl -inset -compact .It Va WITHOUT_GSSAPI (unless .Va WITH_GSSAPI is set explicitly) .El .It Va WITHOUT_KERBEROS_SUPPORT Set to build some programs without Kerberos support, like .Xr ssh 1 , .Xr telnet 1 , .Xr sshd 8 , and .Xr telnetd 8 . .It Va WITHOUT_KERNEL_SYMBOLS Set to not install kernel symbol files. .Bf -symbolic This option is recommended for those people who have small root partitions. .Ef .It Va WITHOUT_KVM Set to not build the .Nm libkvm library as a part of the base system. .Bf -symbolic The option has no effect yet. .Ef When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_KVM_SUPPORT .El .It Va WITHOUT_KVM_SUPPORT Set to build some programs without optional .Nm libkvm support. .It Va WITHOUT_LDNS Setting this variable will prevent the LDNS library from being built. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_LDNS_UTILS .It .Va WITHOUT_UNBOUND .El .It Va WITHOUT_LDNS_UTILS Setting this variable will prevent building the LDNS utilities .Xr drill 1 and .Xr host 1 . .It Va WITHOUT_LEGACY_CONSOLE Set to not build programs that support a legacy PC console; e.g., .Xr kbdcontrol 1 and .Xr vidcontrol 1 . .It Va WITHOUT_LIB32 On 64-bit platforms, set to not build 32-bit library set and a .Nm ld-elf32.so.1 runtime linker. .It Va WITHOUT_LIBCPLUSPLUS Set to avoid building libcxxrt and libc++. .It Va WITHOUT_LIBPTHREAD Set to not build the .Nm libpthread providing library, .Nm libthr . When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_LIBTHR .El .It Va WITH_LIBSOFT On armv6 only, set to enable soft float ABI compatibility libraries. This option is for transitioning to the new hard float ABI. .It Va WITHOUT_LIBTHR Set to not build the .Nm libthr (1:1 threading) library. .It Va WITHOUT_LLD Set to not build LLVM's lld linker. .Pp This is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64, riscv/riscv64sf and sparc64/sparc64. -When set, it enforces these options: -.Pp -.Bl -item -compact -.It -.Va WITHOUT_LLD_IS_LD -.El .It Va WITH_LLD Set to build LLVM's lld linker. .Pp This is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm64/aarch64 and i386/i386. .It Va WITHOUT_LLDB Set to not build the LLDB debugger. .Pp This is a default setting on arm/arm, arm/armeb, arm/armv6, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64, riscv/riscv64sf and sparc64/sparc64. .It Va WITH_LLDB Set to build the LLDB debugger. .Pp This is a default setting on amd64/amd64 and arm64/aarch64. .It Va WITHOUT_LLD_BOOTSTRAP Set to not build the LLD linker during the bootstrap phase of the build. To be able to build the system, either Binutils or LLD bootstrap must be enabled unless an alternate linker is provided via XLD. .Pp This is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64, riscv/riscv64sf and sparc64/sparc64. .It Va WITH_LLD_BOOTSTRAP Set to build the LLD linker during the bootstrap phase of the build. .Pp This is a default setting on arm64/aarch64. .It Va WITHOUT_LLD_IS_LD Set to use GNU binutils ld as the system linker, instead of LLVM's LLD. .Pp This is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64, riscv/riscv64sf and sparc64/sparc64. .It Va WITH_LLD_IS_LD Set to use LLVM's LLD as the system linker, instead of GNU binutils ld. .Pp This is a default setting on arm64/aarch64. -When set, these options are also in effect: -.Pp -.Bl -inset -compact -.It Va WITHOUT_SYSTEM_COMPILER -(unless -.Va WITH_SYSTEM_COMPILER -is set explicitly) -.El .It Va WITHOUT_LLVM_LIBUNWIND Set to use GCC's stack unwinder (instead of LLVM's libunwind). .Pp This is a default setting on arm/arm, arm/armeb, arm/armv6, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe and sparc64/sparc64. .It Va WITH_LLVM_LIBUNWIND Set to use LLVM's libunwind stack unwinder (instead of GCC's unwinder). .Pp This is a default setting on amd64/amd64, arm64/aarch64, i386/i386, riscv/riscv64 and riscv/riscv64sf. .It Va WITHOUT_LOCALES Set to not build localization files; see .Xr locale 1 . .It Va WITHOUT_LOCATE Set to not build .Xr locate 1 and related programs. .It Va WITHOUT_LPR Set to not build .Xr lpr 1 and related programs. .It Va WITHOUT_LS_COLORS Set to build .Xr ls 1 without support for colors to distinguish file types. .It Va WITHOUT_LZMA_SUPPORT Set to build some programs without optional lzma compression support. .It Va WITHOUT_MAIL Set to not build any mail support (MUA or MTA). When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_DMAGENT .It .Va WITHOUT_MAILWRAPPER .It .Va WITHOUT_SENDMAIL .El .It Va WITHOUT_MAILWRAPPER Set to not build the .Xr mailwrapper 8 MTA selector. .It Va WITHOUT_MAKE Set to not install .Xr make 1 and related support files. .It Va WITHOUT_MAN Set to not build manual pages. When set, these options are also in effect: .Pp .Bl -inset -compact .It Va WITHOUT_MAN_UTILS (unless .Va WITH_MAN_UTILS is set explicitly) .El .It Va WITHOUT_MANCOMPRESS Set to not to install compressed man pages. Only the uncompressed versions will be installed. .It Va WITHOUT_MAN_UTILS Set to not build utilities for manual pages, .Xr apropos 1 , .Xr catman 1 , .Xr makewhatis 1 , .Xr man 1 , .Xr whatis 1 , .Xr manctl 8 , and related support files. .It Va WITH_META_MODE Create .Xr make 1 meta files when building, which can provide a reliable incremental build when using .Xr filemon 4 . The meta file is created in OBJDIR as .Pa target.meta . These meta files track the command that was executed, its output, and the current directory. The .Xr filemon 4 module is required unless .Va NO_FILEMON is defined. When the module is loaded, any files used by the commands executed are tracked as dependencies for the target in its meta file. The target is considered out-of-date and rebuilt if any of these conditions are true compared to the last build: .Bl -bullet -compact .It The command to execute changes. .It The current working directory changes. .It The target's meta file is missing. .It The target's meta file is missing filemon data when filemon is loaded and a previous run did not have it loaded. .It [requires .Xr filemon 4 ] Files read, executed or linked to are newer than the target. .It [requires .Xr filemon 4 ] Files read, written, executed or linked are missing. .El The meta files can also be useful for debugging. .Pp The build hides commands that are executed unless .Va NO_SILENT is defined. Errors cause .Xr make 1 to show some of its environment for further debugging. .Pp The build operates as it normally would otherwise. This option originally invoked a different build system but that was renamed to .Va WITH_DIRDEPS_BUILD . .Pp This must be set in the environment, make command line, or .Pa /etc/src-env.conf , not .Pa /etc/src.conf . .It Va WITH_NAND Set to build the NAND Flash components. .It Va WITHOUT_NDIS Set to not build programs and libraries related to NDIS emulation support. .It Va WITHOUT_NETCAT Set to not build .Xr nc 1 utility. .It Va WITHOUT_NETGRAPH Set to not build applications to support .Xr netgraph 4 . When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_ATM .It .Va WITHOUT_BLUETOOTH .It .Va WITHOUT_NETGRAPH_SUPPORT .El .It Va WITHOUT_NETGRAPH_SUPPORT Set to build libraries, programs, and kernel modules without netgraph support. .It Va WITHOUT_NIS Set to not build .Xr NIS 8 support and related programs. If set, you might need to adopt your .Xr nsswitch.conf 5 and remove .Sq nis entries. .It Va WITHOUT_NLS Set to not build NLS catalogs. .It Va WITHOUT_NLS_CATALOGS Set to not build NLS catalog support for .Xr csh 1 . .It Va WITHOUT_NS_CACHING Set to disable name caching in the .Pa nsswitch subsystem. The generic caching daemon, .Xr nscd 8 , will not be built either if this option is set. .It Va WITHOUT_NTP Set to not build .Xr ntpd 8 and related programs. .It Va WITH_OFED Set to build the .Dq "OpenFabrics Enterprise Distribution" Infiniband software stack. .It Va WITH_OPENLDAP Enable building openldap support for kerberos. .It Va WITHOUT_OPENSSH Set to not build OpenSSH. .It Va WITHOUT_OPENSSL Set to not build OpenSSL. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_KERBEROS .It .Va WITHOUT_KERBEROS_SUPPORT .It .Va WITHOUT_OPENSSH .El .Pp When set, these options are also in effect: .Pp .Bl -inset -compact .It Va WITHOUT_GSSAPI (unless .Va WITH_GSSAPI is set explicitly) .El .It Va WITHOUT_PAM Set to not build PAM library and modules. .Bf -symbolic This option is deprecated and does nothing. .Ef When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_PAM_SUPPORT .El .It Va WITHOUT_PAM_SUPPORT Set to build some programs without PAM support, particularly .Xr ftpd 8 and .Xr ppp 8 . .It Va WITHOUT_PC_SYSINSTALL Set to not build .Xr pc-sysinstall 8 and related programs. .It Va WITHOUT_PF Set to not build PF firewall package. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_AUTHPF .El .It Va WITHOUT_PKGBOOTSTRAP Set to not build .Xr pkg 7 bootstrap tool. .It Va WITHOUT_PMC Set to not build .Xr pmccontrol 8 and related programs. .It Va WITHOUT_PORTSNAP Set to not build or install .Xr portsnap 8 and related files. .It Va WITHOUT_PPP Set to not build .Xr ppp 8 and related programs. .It Va WITHOUT_PROFILE Set to not build profiled libraries for use with .Xr gprof 8 . .Pp This is a default setting on riscv/riscv64 and riscv/riscv64sf. .It Va WITH_PROFILE Set to build profiled libraries for use with .Xr gprof 8 . .Pp This is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe and sparc64/sparc64. .It Va WITHOUT_QUOTAS Set to not build .Xr quota 1 and related programs. .It Va WITHOUT_RADIUS_SUPPORT Set to not build radius support into various applications, like .Xr pam_radius 8 and .Xr ppp 8 . .It Va WITH_RATELIMIT Set to build the system with rate limit support. .Pp This makes .Dv SO_MAX_PACING_RATE effective in .Xr getsockopt 2 , and .Ar txrlimit support in .Xr ifconfig 8 , by proxy. .It Va WITHOUT_RBOOTD Set to not build or install .Xr rbootd 8 . .It Va WITHOUT_RCMDS Disable building of the .Bx r-commands. This includes .Xr rlogin 1 , .Xr rsh 1 , etc. .It Va WITH_REPRODUCIBLE_BUILD Set to exclude build metadata (such as the build time, user, or host) from the kernel, boot loaders, and uname output, so that builds produce bit-for-bit identical output. .It Va WITHOUT_RESCUE Set to not build .Xr rescue 8 . .It Va WITHOUT_ROUTED Set to not build .Xr routed 8 utility. .It Va WITH_RPCBIND_WARMSTART_SUPPORT Set to build .Xr rpcbind 8 with warmstart support. .It Va WITHOUT_SENDMAIL Set to not build .Xr sendmail 8 and related programs. .It Va WITHOUT_SETUID_LOGIN Set this to disable the installation of .Xr login 1 as a set-user-ID root program. .It Va WITHOUT_SHAREDOCS Set to not build the .Bx 4.4 legacy docs. .It Va WITH_SHARED_TOOLCHAIN Set to build the toolchain binaries shared. The set includes .Xr cc 1 , .Xr make 1 and necessary utilities like assembler, linker and library archive manager. .It Va WITH_SORT_THREADS Set to enable threads in .Xr sort 1 . .It Va WITHOUT_SOURCELESS Set to not build kernel modules that include sourceless code (either microcode or native code for host CPU). When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_SOURCELESS_HOST .It .Va WITHOUT_SOURCELESS_UCODE .El .It Va WITHOUT_SOURCELESS_HOST Set to not build kernel modules that include sourceless native code for host CPU. .It Va WITHOUT_SOURCELESS_UCODE Set to not build kernel modules that include sourceless microcode. .It Va WITHOUT_SSP Set to not build world with propolice stack smashing protection. .Pp This is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf and mips/mips64hf. .It Va WITH_SSP Set to build world with propolice stack smashing protection. .Pp This is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, arm64/aarch64, i386/i386, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64, riscv/riscv64sf and sparc64/sparc64. .It Va WITH_STAGING Enable staging of files to a stage tree. This can be best thought of as auto-install to .Va DESTDIR with some extra meta data to ensure dependencies can be tracked. Depends on .Va WITH_DIRDEPS_BUILD . When set, these options are also in effect: .Pp .Bl -inset -compact .It Va WITH_STAGING_MAN (unless .Va WITHOUT_STAGING_MAN is set explicitly) .It Va WITH_STAGING_PROG (unless .Va WITHOUT_STAGING_PROG is set explicitly) .El .Pp This must be set in the environment, make command line, or .Pa /etc/src-env.conf , not .Pa /etc/src.conf . .It Va WITH_STAGING_MAN Enable staging of man pages to stage tree. .It Va WITH_STAGING_PROG Enable staging of PROGs to stage tree. .It Va WITH_STALE_STAGED Check staged files are not stale. .It Va WITH_SVN Set to install .Xr svnlite 1 as .Xr svn 1 . .It Va WITHOUT_SVNLITE Set to not build .Xr svnlite 1 and related programs. .It Va WITHOUT_SYMVER Set to disable symbol versioning when building shared libraries. .It Va WITHOUT_SYSCONS Set to not build .Xr syscons 4 support files such as keyboard maps, fonts, and screen output maps. .It Va WITH_SYSROOT Enable use of sysroot during build. Depends on .Va WITH_DIRDEPS_BUILD . .Pp This must be set in the environment, make command line, or .Pa /etc/src-env.conf , not .Pa /etc/src.conf . .It Va WITHOUT_SYSTEM_COMPILER Set to not opportunistically skip building a cross-compiler during the bootstrap phase of the build. Normally, if the currently installed compiler matches the planned bootstrap compiler type and revision, then it will not be built. This does not prevent a compiler from being built for installation though, only for building one for the build itself. The .Va WITHOUT_CLANG and .Va WITHOUT_GCC options control those. -.Pp -This is a default setting on -arm64/aarch64. -.It Va WITH_SYSTEM_COMPILER -Set to opportunistically skip building a cross-compiler during the -bootstrap phase of the build. -If the currently installed compiler matches the planned bootstrap compiler -type and revision, then it will not be built. -This does not prevent a compiler from being built for installation though, -only for building one for the build itself. -The -.Va WITHOUT_CLANG -and -.Va WITHOUT_GCC -options control those. -.Pp -This is a default setting on -amd64/amd64, arm/arm, arm/armeb, arm/armv6, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64, riscv/riscv64sf and sparc64/sparc64. .It Va WITHOUT_TALK Set to not build or install .Xr talk 1 and .Xr talkd 8 . .It Va WITHOUT_TCP_WRAPPERS Set to not build or install .Xr tcpd 8 , and related utilities. .It Va WITHOUT_TCSH Set to not build and install .Pa /bin/csh (which is .Xr tcsh 1 ) . .It Va WITHOUT_TELNET Set to not build .Xr telnet 1 and related programs. .It Va WITHOUT_TESTS Set to not build nor install the .Fx Test Suite in .Pa /usr/tests/ . See .Xr tests 7 for more details. This also disables the build of all test-related dependencies, including ATF. .Pp This is a default setting on riscv/riscv64 and riscv/riscv64sf. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_DTRACE_TESTS .It .Va WITHOUT_TESTS_SUPPORT .El .It Va WITHOUT_TESTS_SUPPORT Set to disables the build of all test-related dependencies, including ATF. .Pp This is a default setting on riscv/riscv64 and riscv/riscv64sf. .It Va WITHOUT_TEXTPROC Set to not build programs used for text processing. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_GROFF .El .It Va WITHOUT_TFTP Set to not build or install .Xr tftp 1 and .Xr tftpd 8 . .It Va WITHOUT_TIMED Set to not build or install .Xr timed 8 . .It Va WITHOUT_TOOLCHAIN Set to not install header or programs used for program development, compilers, debuggers etc. When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_BINUTILS .It .Va WITHOUT_CLANG .It .Va WITHOUT_CLANG_EXTRAS .It .Va WITHOUT_CLANG_FULL .It .Va WITHOUT_GCC .It .Va WITHOUT_GDB .It .Va WITHOUT_INCLUDES .It .Va WITHOUT_LLD .It .Va WITHOUT_LLDB .El .It Va WITHOUT_UNBOUND Set to not build .Xr unbound 8 and related programs. .It Va WITHOUT_USB Set to not build USB-related programs and libraries. .It Va WITHOUT_USB_GADGET_EXAMPLES Set to not build USB gadget kernel modules. .It Va WITHOUT_UTMPX Set to not build user accounting tools such as .Xr last 1 , .Xr users 1 , .Xr who 1 , .Xr ac 8 , .Xr lastlogin 8 and .Xr utx 8 . .It Va WITHOUT_VI Set to not build and install vi, view, ex and related programs. .It Va WITHOUT_VT Set to not build .Xr vt 4 support files (fonts and keymaps). .It Va WITHOUT_WARNS Set this to not add warning flags to the compiler invocations. Useful as a temporary workaround when code enters the tree which triggers warnings in environments that differ from the original developer. .It Va WITHOUT_WIRELESS Set to not build programs used for 802.11 wireless networks; especially .Xr wpa_supplicant 8 and .Xr hostapd 8 . When set, it enforces these options: .Pp .Bl -item -compact .It .Va WITHOUT_WIRELESS_SUPPORT .El .It Va WITHOUT_WIRELESS_SUPPORT Set to build libraries, programs, and kernel modules without 802.11 wireless support. .It Va WITHOUT_WPA_SUPPLICANT_EAPOL Build .Xr wpa_supplicant 8 without support for the IEEE 802.1X protocol and without support for EAP-PEAP, EAP-TLS, EAP-LEAP, and EAP-TTLS protocols (usable only via 802.1X). .It Va WITHOUT_ZFS Set to not build ZFS file system. .It Va WITHOUT_ZONEINFO Set to not build the timezone database. .El .Sh FILES .Bl -tag -compact -width Pa .It Pa /etc/src.conf .It Pa /etc/src-env.conf .It Pa /usr/share/mk/bsd.own.mk .El .Sh SEE ALSO .Xr make 1 , .Xr make.conf 5 , .Xr build 7 , .Xr ports 7 .Sh HISTORY The .Nm file appeared in .Fx 7.0 . .Sh AUTHORS This manual page was autogenerated by .An tools/build/options/makeman . Index: projects/clang500-import/sys/compat/linuxkpi/common/include/linux/compiler.h =================================================================== --- projects/clang500-import/sys/compat/linuxkpi/common/include/linux/compiler.h (revision 319250) +++ projects/clang500-import/sys/compat/linuxkpi/common/include/linux/compiler.h (revision 319251) @@ -1,102 +1,103 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. * Copyright (c) 2013-2016 Mellanox Technologies, Ltd. * Copyright (c) 2015 François Tigeot * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _LINUX_COMPILER_H_ #define _LINUX_COMPILER_H_ #include #define __user #define __kernel #define __safe #define __force #define __nocast #define __iomem #define __chk_user_ptr(x) 0 #define __chk_io_ptr(x) 0 #define __builtin_warning(x, y...) (1) #define __acquires(x) #define __releases(x) #define __acquire(x) 0 #define __release(x) 0 #define __cond_lock(x,c) (c) #define __bitwise #define __devinitdata #define __deprecated #define __init #define __devinit #define __devexit #define __exit #define __rcu +#define __malloc #define ___stringify(...) #__VA_ARGS__ #define __stringify(...) ___stringify(__VA_ARGS__) #define __attribute_const__ __attribute__((__const__)) #undef __always_inline #define __always_inline inline #define ____cacheline_aligned __aligned(CACHE_LINE_SIZE) #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) #define typeof(x) __typeof(x) #define uninitialized_var(x) x = x #define __always_unused __unused #define __must_check __result_use_check #define __printf(a,b) __printflike(a,b) #define barrier() __asm__ __volatile__("": : :"memory") #define ___PASTE(a,b) a##b #define __PASTE(a,b) ___PASTE(a,b) #define ACCESS_ONCE(x) (*(volatile __typeof(x) *)&(x)) #define WRITE_ONCE(x,v) do { \ barrier(); \ ACCESS_ONCE(x) = (v); \ barrier(); \ } while (0) #define READ_ONCE(x) ({ \ __typeof(x) __var = ({ \ barrier(); \ ACCESS_ONCE(x); \ }); \ barrier(); \ __var; \ }) #define lockless_dereference(p) READ_ONCE(p) #define _AT(T,X) ((T)(X)) #endif /* _LINUX_COMPILER_H_ */ Index: projects/clang500-import/sys/compat/linuxkpi/common/include/linux/device.h =================================================================== --- projects/clang500-import/sys/compat/linuxkpi/common/include/linux/device.h (revision 319250) +++ projects/clang500-import/sys/compat/linuxkpi/common/include/linux/device.h (revision 319251) @@ -1,493 +1,510 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. * Copyright (c) 2013-2016 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _LINUX_DEVICE_H_ #define _LINUX_DEVICE_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include enum irqreturn { IRQ_NONE = 0, IRQ_HANDLED, IRQ_WAKE_THREAD, }; typedef enum irqreturn irqreturn_t; struct device; struct class { const char *name; struct module *owner; struct kobject kobj; devclass_t bsdclass; void (*class_release)(struct class *class); void (*dev_release)(struct device *dev); char * (*devnode)(struct device *dev, umode_t *mode); }; +struct dev_pm_ops { + int (*suspend)(struct device *dev); + int (*suspend_late)(struct device *dev); + int (*resume)(struct device *dev); + int (*resume_early)(struct device *dev); + int (*freeze)(struct device *dev); + int (*freeze_late)(struct device *dev); + int (*thaw)(struct device *dev); + int (*poweroff)(struct device *dev); + int (*poweroff_late)(struct device *dev); + int (*restore)(struct device *dev); + int (*restore_early)(struct device *dev); + int (*runtime_suspend)(struct device *dev); + int (*runtime_resume)(struct device *dev); + int (*runtime_idle)(struct device *dev); +}; + struct device { struct device *parent; struct list_head irqents; device_t bsddev; /* * The following flag is used to determine if the LinuxKPI is * responsible for detaching the BSD device or not. If the * LinuxKPI got the BSD device using devclass_get_device(), it * must not try to detach or delete it, because it's already * done somewhere else. */ bool bsddev_attached_here; dev_t devt; struct class *class; void (*release)(struct device *dev); struct kobject kobj; uint64_t *dma_mask; void *driver_data; unsigned int irq; #define LINUX_IRQ_INVALID 65535 unsigned int msix; unsigned int msix_max; const struct attribute_group **groups; }; extern struct device linux_root_device; extern struct kobject linux_class_root; extern const struct kobj_type linux_dev_ktype; extern const struct kobj_type linux_class_ktype; struct class_attribute { struct attribute attr; ssize_t (*show)(struct class *, struct class_attribute *, char *); ssize_t (*store)(struct class *, struct class_attribute *, const char *, size_t); const void *(*namespace)(struct class *, const struct class_attribute *); }; #define CLASS_ATTR(_name, _mode, _show, _store) \ struct class_attribute class_attr_##_name = \ { { #_name, NULL, _mode }, _show, _store } struct device_attribute { struct attribute attr; ssize_t (*show)(struct device *, struct device_attribute *, char *); ssize_t (*store)(struct device *, struct device_attribute *, const char *, size_t); }; #define DEVICE_ATTR(_name, _mode, _show, _store) \ struct device_attribute dev_attr_##_name = \ { { #_name, NULL, _mode }, _show, _store } /* Simple class attribute that is just a static string */ struct class_attribute_string { struct class_attribute attr; char *str; }; static inline ssize_t show_class_attr_string(struct class *class, struct class_attribute *attr, char *buf) { struct class_attribute_string *cs; cs = container_of(attr, struct class_attribute_string, attr); return snprintf(buf, PAGE_SIZE, "%s\n", cs->str); } /* Currently read-only only */ #define _CLASS_ATTR_STRING(_name, _mode, _str) \ { __ATTR(_name, _mode, show_class_attr_string, NULL), _str } #define CLASS_ATTR_STRING(_name, _mode, _str) \ struct class_attribute_string class_attr_##_name = \ _CLASS_ATTR_STRING(_name, _mode, _str) #define dev_err(dev, fmt, ...) device_printf((dev)->bsddev, fmt, ##__VA_ARGS__) #define dev_warn(dev, fmt, ...) device_printf((dev)->bsddev, fmt, ##__VA_ARGS__) #define dev_info(dev, fmt, ...) device_printf((dev)->bsddev, fmt, ##__VA_ARGS__) #define dev_notice(dev, fmt, ...) device_printf((dev)->bsddev, fmt, ##__VA_ARGS__) #define dev_printk(lvl, dev, fmt, ...) \ device_printf((dev)->bsddev, fmt, ##__VA_ARGS__) #define dev_err_ratelimited(dev, ...) do { \ static linux_ratelimit_t __ratelimited; \ if (linux_ratelimited(&__ratelimited)) \ dev_err(dev, __VA_ARGS__); \ } while (0) #define dev_warn_ratelimited(dev, ...) do { \ static linux_ratelimit_t __ratelimited; \ if (linux_ratelimited(&__ratelimited)) \ dev_warn(dev, __VA_ARGS__); \ } while (0) static inline void * dev_get_drvdata(const struct device *dev) { return dev->driver_data; } static inline void dev_set_drvdata(struct device *dev, void *data) { dev->driver_data = data; } static inline struct device * get_device(struct device *dev) { if (dev) kobject_get(&dev->kobj); return (dev); } static inline char * dev_name(const struct device *dev) { return kobject_name(&dev->kobj); } #define dev_set_name(_dev, _fmt, ...) \ kobject_set_name(&(_dev)->kobj, (_fmt), ##__VA_ARGS__) static inline void put_device(struct device *dev) { if (dev) kobject_put(&dev->kobj); } static inline int class_register(struct class *class) { class->bsdclass = devclass_create(class->name); kobject_init(&class->kobj, &linux_class_ktype); kobject_set_name(&class->kobj, class->name); kobject_add(&class->kobj, &linux_class_root, class->name); return (0); } static inline void class_unregister(struct class *class) { kobject_put(&class->kobj); } static inline struct device *kobj_to_dev(struct kobject *kobj) { return container_of(kobj, struct device, kobj); } /* * Devices are registered and created for exporting to sysfs. Create * implies register and register assumes the device fields have been * setup appropriately before being called. */ static inline void device_initialize(struct device *dev) { device_t bsddev = NULL; int unit = -1; if (dev->devt) { unit = MINOR(dev->devt); bsddev = devclass_get_device(dev->class->bsdclass, unit); dev->bsddev_attached_here = false; } else if (dev->parent == NULL) { bsddev = devclass_get_device(dev->class->bsdclass, 0); dev->bsddev_attached_here = false; } else { dev->bsddev_attached_here = true; } if (bsddev == NULL && dev->parent != NULL) { bsddev = device_add_child(dev->parent->bsddev, dev->class->kobj.name, unit); } if (bsddev != NULL) device_set_softc(bsddev, dev); dev->bsddev = bsddev; MPASS(dev->bsddev != NULL); kobject_init(&dev->kobj, &linux_dev_ktype); } static inline int device_add(struct device *dev) { if (dev->bsddev != NULL) { if (dev->devt == 0) dev->devt = makedev(0, device_get_unit(dev->bsddev)); } kobject_add(&dev->kobj, &dev->class->kobj, dev_name(dev)); return (0); } static inline void device_create_release(struct device *dev) { kfree(dev); } static inline struct device * device_create_groups_vargs(struct class *class, struct device *parent, dev_t devt, void *drvdata, const struct attribute_group **groups, const char *fmt, va_list args) { struct device *dev = NULL; int retval = -ENODEV; if (class == NULL || IS_ERR(class)) goto error; dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) { retval = -ENOMEM; goto error; } dev->devt = devt; dev->class = class; dev->parent = parent; dev->groups = groups; dev->release = device_create_release; /* device_initialize() needs the class and parent to be set */ device_initialize(dev); dev_set_drvdata(dev, drvdata); retval = kobject_set_name_vargs(&dev->kobj, fmt, args); if (retval) goto error; retval = device_add(dev); if (retval) goto error; return dev; error: put_device(dev); return ERR_PTR(retval); } static inline struct device * device_create_with_groups(struct class *class, struct device *parent, dev_t devt, void *drvdata, const struct attribute_group **groups, const char *fmt, ...) { va_list vargs; struct device *dev; va_start(vargs, fmt); dev = device_create_groups_vargs(class, parent, devt, drvdata, groups, fmt, vargs); va_end(vargs); return dev; } static inline int device_register(struct device *dev) { device_t bsddev = NULL; int unit = -1; if (dev->bsddev != NULL) goto done; if (dev->devt) { unit = MINOR(dev->devt); bsddev = devclass_get_device(dev->class->bsdclass, unit); dev->bsddev_attached_here = false; } else if (dev->parent == NULL) { bsddev = devclass_get_device(dev->class->bsdclass, 0); dev->bsddev_attached_here = false; } else { dev->bsddev_attached_here = true; } if (bsddev == NULL && dev->parent != NULL) { bsddev = device_add_child(dev->parent->bsddev, dev->class->kobj.name, unit); } if (bsddev != NULL) { if (dev->devt == 0) dev->devt = makedev(0, device_get_unit(bsddev)); device_set_softc(bsddev, dev); } dev->bsddev = bsddev; done: kobject_init(&dev->kobj, &linux_dev_ktype); kobject_add(&dev->kobj, &dev->class->kobj, dev_name(dev)); return (0); } static inline void device_unregister(struct device *dev) { device_t bsddev; bsddev = dev->bsddev; dev->bsddev = NULL; if (bsddev != NULL && dev->bsddev_attached_here) { mtx_lock(&Giant); device_delete_child(device_get_parent(bsddev), bsddev); mtx_unlock(&Giant); } put_device(dev); } static inline void device_del(struct device *dev) { device_t bsddev; bsddev = dev->bsddev; dev->bsddev = NULL; if (bsddev != NULL && dev->bsddev_attached_here) { mtx_lock(&Giant); device_delete_child(device_get_parent(bsddev), bsddev); mtx_unlock(&Giant); } } struct device *device_create(struct class *class, struct device *parent, dev_t devt, void *drvdata, const char *fmt, ...); static inline void device_destroy(struct class *class, dev_t devt) { device_t bsddev; int unit; unit = MINOR(devt); bsddev = devclass_get_device(class->bsdclass, unit); if (bsddev != NULL) device_unregister(device_get_softc(bsddev)); } static inline void linux_class_kfree(struct class *class) { kfree(class); } static inline struct class * class_create(struct module *owner, const char *name) { struct class *class; int error; class = kzalloc(sizeof(*class), M_WAITOK); class->owner = owner; class->name = name; class->class_release = linux_class_kfree; error = class_register(class); if (error) { kfree(class); return (NULL); } return (class); } static inline void class_destroy(struct class *class) { if (class == NULL) return; class_unregister(class); } static inline int device_create_file(struct device *dev, const struct device_attribute *attr) { if (dev) return sysfs_create_file(&dev->kobj, &attr->attr); return -EINVAL; } static inline void device_remove_file(struct device *dev, const struct device_attribute *attr) { if (dev) sysfs_remove_file(&dev->kobj, &attr->attr); } static inline int class_create_file(struct class *class, const struct class_attribute *attr) { if (class) return sysfs_create_file(&class->kobj, &attr->attr); return -EINVAL; } static inline void class_remove_file(struct class *class, const struct class_attribute *attr) { if (class) sysfs_remove_file(&class->kobj, &attr->attr); } static inline int dev_to_node(struct device *dev) { return -1; } char *kvasprintf(gfp_t, const char *, va_list); char *kasprintf(gfp_t, const char *, ...); #endif /* _LINUX_DEVICE_H_ */ Index: projects/clang500-import/sys/compat/linuxkpi/common/include/linux/io.h =================================================================== --- projects/clang500-import/sys/compat/linuxkpi/common/include/linux/io.h (revision 319250) +++ projects/clang500-import/sys/compat/linuxkpi/common/include/linux/io.h (revision 319251) @@ -1,261 +1,262 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. * Copyright (c) 2013-2015 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _LINUX_IO_H_ #define _LINUX_IO_H_ #include #include #include #include +#include static inline uint32_t __raw_readl(const volatile void *addr) { return *(const volatile uint32_t *)addr; } static inline void __raw_writel(uint32_t b, volatile void *addr) { *(volatile uint32_t *)addr = b; } static inline uint64_t __raw_readq(const volatile void *addr) { return *(const volatile uint64_t *)addr; } static inline void __raw_writeq(uint64_t b, volatile void *addr) { *(volatile uint64_t *)addr = b; } /* * XXX This is all x86 specific. It should be bus space access. */ #define mmiowb() barrier() #undef writel static inline void writel(uint32_t b, void *addr) { *(volatile uint32_t *)addr = b; } #undef writeq static inline void writeq(uint64_t b, void *addr) { *(volatile uint64_t *)addr = b; } #undef writeb static inline void writeb(uint8_t b, void *addr) { *(volatile uint8_t *)addr = b; } #undef writew static inline void writew(uint16_t b, void *addr) { *(volatile uint16_t *)addr = b; } #undef ioread8 static inline uint8_t ioread8(const volatile void *addr) { return *(const volatile uint8_t *)addr; } #undef ioread16 static inline uint16_t ioread16(const volatile void *addr) { return *(const volatile uint16_t *)addr; } #undef ioread32 static inline uint32_t ioread32(const volatile void *addr) { return *(const volatile uint32_t *)addr; } #undef ioread32be static inline uint32_t ioread32be(const volatile void *addr) { return be32toh(*(const volatile uint32_t *)addr); } #undef iowrite8 static inline void iowrite8(uint8_t v, volatile void *addr) { *(volatile uint8_t *)addr = v; } #undef iowrite16 static inline void iowrite16(uint16_t v, volatile void *addr) { *(volatile uint16_t *)addr = v; } #undef iowrite32 static inline void iowrite32(uint32_t v, volatile void *addr) { *(volatile uint32_t *)addr = v; } #undef iowrite32be static inline void iowrite32be(uint32_t v, volatile void *addr) { *(volatile uint32_t *)addr = htobe32(v); } #undef readb static inline uint8_t readb(const volatile void *addr) { return *(const volatile uint8_t *)addr; } #undef readw static inline uint16_t readw(const volatile void *addr) { return *(const volatile uint16_t *)addr; } #undef readl static inline uint32_t readl(const volatile void *addr) { return *(const volatile uint32_t *)addr; } #if defined(__i386__) || defined(__amd64__) static inline void _outb(u_char data, u_int port) { __asm __volatile("outb %0, %w1" : : "a" (data), "Nd" (port)); } #endif #if defined(__i386__) || defined(__amd64__) void *_ioremap_attr(vm_paddr_t phys_addr, unsigned long size, int attr); #else #define _ioremap_attr(...) NULL #endif #define ioremap_nocache(addr, size) \ _ioremap_attr((addr), (size), VM_MEMATTR_UNCACHEABLE) #define ioremap_wc(addr, size) \ _ioremap_attr((addr), (size), VM_MEMATTR_WRITE_COMBINING) #define ioremap_wb(addr, size) \ _ioremap_attr((addr), (size), VM_MEMATTR_WRITE_BACK) #define ioremap_wt(addr, size) \ _ioremap_attr((addr), (size), VM_MEMATTR_WRITE_THROUGH) #define ioremap(addr, size) \ _ioremap_attr((addr), (size), VM_MEMATTR_UNCACHEABLE) void iounmap(void *addr); #define memset_io(a, b, c) memset((a), (b), (c)) #define memcpy_fromio(a, b, c) memcpy((a), (b), (c)) #define memcpy_toio(a, b, c) memcpy((a), (b), (c)) static inline void __iowrite32_copy(void *to, void *from, size_t count) { uint32_t *src; uint32_t *dst; int i; for (i = 0, src = from, dst = to; i < count; i++, src++, dst++) __raw_writel(*src, dst); } static inline void __iowrite64_copy(void *to, void *from, size_t count) { #ifdef __LP64__ uint64_t *src; uint64_t *dst; int i; for (i = 0, src = from, dst = to; i < count; i++, src++, dst++) __raw_writeq(*src, dst); #else __iowrite32_copy(to, from, count * 2); #endif } enum { MEMREMAP_WB = 1 << 0, MEMREMAP_WT = 1 << 1, MEMREMAP_WC = 1 << 2, }; static inline void * memremap(resource_size_t offset, size_t size, unsigned long flags) { void *addr = NULL; if ((flags & MEMREMAP_WB) && (addr = ioremap_wb(offset, size)) != NULL) goto done; if ((flags & MEMREMAP_WT) && (addr = ioremap_wt(offset, size)) != NULL) goto done; if ((flags & MEMREMAP_WC) && (addr = ioremap_wc(offset, size)) != NULL) goto done; done: return (addr); } static inline void memunmap(void *addr) { /* XXX May need to check if this is RAM */ iounmap(addr); } #endif /* _LINUX_IO_H_ */ Index: projects/clang500-import/sys/compat/linuxkpi/common/include/linux/kernel.h =================================================================== --- projects/clang500-import/sys/compat/linuxkpi/common/include/linux/kernel.h (revision 319250) +++ projects/clang500-import/sys/compat/linuxkpi/common/include/linux/kernel.h (revision 319251) @@ -1,438 +1,442 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. * Copyright (c) 2013-2016 Mellanox Technologies, Ltd. * Copyright (c) 2014-2015 François Tigeot * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _LINUX_KERNEL_H_ #define _LINUX_KERNEL_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define KERN_CONT "" #define KERN_EMERG "<0>" #define KERN_ALERT "<1>" #define KERN_CRIT "<2>" #define KERN_ERR "<3>" #define KERN_WARNING "<4>" #define KERN_NOTICE "<5>" #define KERN_INFO "<6>" #define KERN_DEBUG "<7>" #define U8_MAX ((u8)~0U) #define S8_MAX ((s8)(U8_MAX >> 1)) #define S8_MIN ((s8)(-S8_MAX - 1)) #define U16_MAX ((u16)~0U) #define S16_MAX ((s16)(U16_MAX >> 1)) #define S16_MIN ((s16)(-S16_MAX - 1)) #define U32_MAX ((u32)~0U) #define S32_MAX ((s32)(U32_MAX >> 1)) #define S32_MIN ((s32)(-S32_MAX - 1)) #define U64_MAX ((u64)~0ULL) #define S64_MAX ((s64)(U64_MAX >> 1)) #define S64_MIN ((s64)(-S64_MAX - 1)) #define S8_C(x) x #define U8_C(x) x ## U #define S16_C(x) x #define U16_C(x) x ## U #define S32_C(x) x #define U32_C(x) x ## U #define S64_C(x) x ## LL #define U64_C(x) x ## ULL -#define BUILD_BUG_ON(x) CTASSERT(!(x)) +#define BUILD_BUG_ON(x) CTASSERT(!(x)) +#define BUILD_BUG_ON_MSG(x, msg) BUILD_BUG_ON(x) +#define BUILD_BUG_ON_NOT_POWER_OF_2(x) BUILD_BUG_ON(!powerof2(x)) #define BUG() panic("BUG at %s:%d", __FILE__, __LINE__) #define BUG_ON(cond) do { \ if (cond) { \ panic("BUG ON %s failed at %s:%d", \ __stringify(cond), __FILE__, __LINE__); \ } \ } while (0) #define WARN_ON(cond) ({ \ bool __ret = (cond); \ if (__ret) { \ printf("WARNING %s failed at %s:%d\n", \ __stringify(cond), __FILE__, __LINE__); \ } \ unlikely(__ret); \ }) #define WARN_ON_SMP(cond) WARN_ON(cond) #define WARN_ON_ONCE(cond) ({ \ static bool __warn_on_once; \ bool __ret = (cond); \ if (__ret && !__warn_on_once) { \ __warn_on_once = 1; \ printf("WARNING %s failed at %s:%d\n", \ __stringify(cond), __FILE__, __LINE__); \ } \ unlikely(__ret); \ }) + +#define oops_in_progress SCHEDULER_STOPPED() #undef ALIGN #define ALIGN(x, y) roundup2((x), (y)) #undef PTR_ALIGN #define PTR_ALIGN(p, a) ((__typeof(p))ALIGN((uintptr_t)(p), (a))) #define DIV_ROUND_UP(x, n) howmany(x, n) #define DIV_ROUND_UP_ULL(x, n) DIV_ROUND_UP((unsigned long long)(x), (n)) #define FIELD_SIZEOF(t, f) sizeof(((t *)0)->f) #define printk(...) printf(__VA_ARGS__) #define vprintk(f, a) vprintf(f, a) struct va_format { const char *fmt; va_list *va; }; static inline int vscnprintf(char *buf, size_t size, const char *fmt, va_list args) { ssize_t ssize = size; int i; i = vsnprintf(buf, size, fmt, args); return ((i >= ssize) ? (ssize - 1) : i); } static inline int scnprintf(char *buf, size_t size, const char *fmt, ...) { va_list args; int i; va_start(args, fmt); i = vscnprintf(buf, size, fmt, args); va_end(args); return (i); } /* * The "pr_debug()" and "pr_devel()" macros should produce zero code * unless DEBUG is defined: */ #ifdef DEBUG #define pr_debug(fmt, ...) \ log(LOG_DEBUG, fmt, ##__VA_ARGS__) #define pr_devel(fmt, ...) \ log(LOG_DEBUG, pr_fmt(fmt), ##__VA_ARGS__) #else #define pr_debug(fmt, ...) \ ({ if (0) log(LOG_DEBUG, fmt, ##__VA_ARGS__); 0; }) #define pr_devel(fmt, ...) \ ({ if (0) log(LOG_DEBUG, pr_fmt(fmt), ##__VA_ARGS__); 0; }) #endif #ifndef pr_fmt #define pr_fmt(fmt) fmt #endif /* * Print a one-time message (analogous to WARN_ONCE() et al): */ #define printk_once(...) do { \ static bool __print_once; \ \ if (!__print_once) { \ __print_once = true; \ printk(__VA_ARGS__); \ } \ } while (0) /* * Log a one-time message (analogous to WARN_ONCE() et al): */ #define log_once(level,...) do { \ static bool __log_once; \ \ if (unlikely(!__log_once)) { \ __log_once = true; \ log(level, __VA_ARGS__); \ } \ } while (0) #define pr_emerg(fmt, ...) \ log(LOG_EMERG, pr_fmt(fmt), ##__VA_ARGS__) #define pr_alert(fmt, ...) \ log(LOG_ALERT, pr_fmt(fmt), ##__VA_ARGS__) #define pr_crit(fmt, ...) \ log(LOG_CRIT, pr_fmt(fmt), ##__VA_ARGS__) #define pr_err(fmt, ...) \ log(LOG_ERR, pr_fmt(fmt), ##__VA_ARGS__) #define pr_warning(fmt, ...) \ log(LOG_WARNING, pr_fmt(fmt), ##__VA_ARGS__) #define pr_warn(...) \ pr_warning(__VA_ARGS__) #define pr_warn_once(fmt, ...) \ log_once(LOG_WARNING, pr_fmt(fmt), ##__VA_ARGS__) #define pr_notice(fmt, ...) \ log(LOG_NOTICE, pr_fmt(fmt), ##__VA_ARGS__) #define pr_info(fmt, ...) \ log(LOG_INFO, pr_fmt(fmt), ##__VA_ARGS__) #define pr_info_once(fmt, ...) \ log_once(LOG_INFO, pr_fmt(fmt), ##__VA_ARGS__) #define pr_cont(fmt, ...) \ printk(KERN_CONT fmt, ##__VA_ARGS__) #define pr_warn_ratelimited(...) do { \ static linux_ratelimit_t __ratelimited; \ if (linux_ratelimited(&__ratelimited)) \ pr_warning(__VA_ARGS__); \ } while (0) #ifndef WARN #define WARN(condition, ...) ({ \ bool __ret_warn_on = (condition); \ if (unlikely(__ret_warn_on)) \ pr_warning(__VA_ARGS__); \ unlikely(__ret_warn_on); \ }) #endif #ifndef WARN_ONCE #define WARN_ONCE(condition, ...) ({ \ bool __ret_warn_on = (condition); \ if (unlikely(__ret_warn_on)) \ pr_warn_once(__VA_ARGS__); \ unlikely(__ret_warn_on); \ }) #endif #define container_of(ptr, type, member) \ ({ \ const __typeof(((type *)0)->member) *__p = (ptr); \ (type *)((uintptr_t)__p - offsetof(type, member)); \ }) #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) static inline unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base) { return (strtouq(cp, endp, base)); } static inline long long simple_strtoll(const char *cp, char **endp, unsigned int base) { return (strtoq(cp, endp, base)); } static inline unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base) { return (strtoul(cp, endp, base)); } static inline long simple_strtol(const char *cp, char **endp, unsigned int base) { return (strtol(cp, endp, base)); } static inline int kstrtoul(const char *cp, unsigned int base, unsigned long *res) { char *end; *res = strtoul(cp, &end, base); if (*cp == 0 || *end != 0) return (-EINVAL); return (0); } static inline int kstrtol(const char *cp, unsigned int base, long *res) { char *end; *res = strtol(cp, &end, base); if (*cp == 0 || *end != 0) return (-EINVAL); return (0); } static inline int kstrtoint(const char *cp, unsigned int base, int *res) { char *end; long temp; *res = temp = strtol(cp, &end, base); if (*cp == 0 || *end != 0) return (-EINVAL); if (temp != (int)temp) return (-ERANGE); return (0); } static inline int kstrtouint(const char *cp, unsigned int base, unsigned int *res) { char *end; unsigned long temp; *res = temp = strtoul(cp, &end, base); if (*cp == 0 || *end != 0) return (-EINVAL); if (temp != (unsigned int)temp) return (-ERANGE); return (0); } static inline int kstrtou32(const char *cp, unsigned int base, u32 *res) { char *end; unsigned long temp; *res = temp = strtoul(cp, &end, base); if (*cp == 0 || *end != 0) return (-EINVAL); if (temp != (u32)temp) return (-ERANGE); return (0); } #define min(x, y) ((x) < (y) ? (x) : (y)) #define max(x, y) ((x) > (y) ? (x) : (y)) #define min3(a, b, c) min(a, min(b,c)) #define max3(a, b, c) max(a, max(b,c)) #define min_t(type, x, y) ({ \ type __min1 = (x); \ type __min2 = (y); \ __min1 < __min2 ? __min1 : __min2; }) #define max_t(type, x, y) ({ \ type __max1 = (x); \ type __max2 = (y); \ __max1 > __max2 ? __max1 : __max2; }) #define clamp_t(type, _x, min, max) min_t(type, max_t(type, _x, min), max) #define clamp(x, lo, hi) min( max(x,lo), hi) #define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi) /* * This looks more complex than it should be. But we need to * get the type for the ~ right in round_down (it needs to be * as wide as the result!), and we want to evaluate the macro * arguments just once each. */ #define __round_mask(x, y) ((__typeof__(x))((y)-1)) #define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) #define round_down(x, y) ((x) & ~__round_mask(x, y)) #define smp_processor_id() PCPU_GET(cpuid) #define num_possible_cpus() mp_ncpus #define num_online_cpus() mp_ncpus #if defined(__i386__) || defined(__amd64__) extern bool linux_cpu_has_clflush; #define cpu_has_clflush linux_cpu_has_clflush #endif typedef struct pm_message { int event; } pm_message_t; /* Swap values of a and b */ #define swap(a, b) do { \ typeof(a) _swap_tmp = a; \ a = b; \ b = _swap_tmp; \ } while (0) #define DIV_ROUND_CLOSEST(x, divisor) (((x) + ((divisor) / 2)) / (divisor)) #define DIV_ROUND_CLOSEST_ULL(x, divisor) ({ \ __typeof(divisor) __d = (divisor); \ unsigned long long __ret = (x) + (__d) / 2; \ __ret /= __d; \ __ret; \ }) static inline uintmax_t mult_frac(uintmax_t x, uintmax_t multiplier, uintmax_t divisor) { uintmax_t q = (x / divisor); uintmax_t r = (x % divisor); return ((q * multiplier) + ((r * multiplier) / divisor)); } static inline int64_t abs64(int64_t x) { return (x < 0 ? -x : x); } typedef struct linux_ratelimit { struct timeval lasttime; int counter; } linux_ratelimit_t; static inline bool linux_ratelimited(linux_ratelimit_t *rl) { return (ppsratecheck(&rl->lasttime, &rl->counter, 1)); } #endif /* _LINUX_KERNEL_H_ */ Index: projects/clang500-import/sys/compat/linuxkpi/common/include/linux/module.h =================================================================== --- projects/clang500-import/sys/compat/linuxkpi/common/include/linux/module.h (revision 319250) +++ projects/clang500-import/sys/compat/linuxkpi/common/include/linux/module.h (revision 319251) @@ -1,102 +1,104 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. * Copyright (c) 2013, 2014 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _LINUX_MODULE_H_ #define _LINUX_MODULE_H_ #include #include #include #include #include #include #include #include #include #define MODULE_AUTHOR(name) #define MODULE_DESCRIPTION(name) #define MODULE_LICENSE(name) +#define MODULE_INFO(tag, info) +#define MODULE_FIRMWARE(firmware) #define THIS_MODULE ((struct module *)0) #define EXPORT_SYMBOL(name) #define EXPORT_SYMBOL_GPL(name) /* OFED pre-module initialization */ #define SI_SUB_OFED_PREINIT (SI_SUB_ROOT_CONF - 2) /* OFED default module initialization */ #define SI_SUB_OFED_MODINIT (SI_SUB_ROOT_CONF - 1) #include static inline void _module_run(void *arg) { void (*fn)(void); #ifdef OFED_DEBUG_INIT char name[1024]; caddr_t pc; long offset; pc = (caddr_t)arg; if (linker_search_symbol_name(pc, name, sizeof(name), &offset) != 0) printf("Running ??? (%p)\n", pc); else printf("Running %s (%p)\n", name, pc); #endif fn = arg; DROP_GIANT(); fn(); PICKUP_GIANT(); } #define module_init(fn) \ SYSINIT(fn, SI_SUB_OFED_MODINIT, SI_ORDER_FIRST, _module_run, (fn)) #define module_exit(fn) \ SYSUNINIT(fn, SI_SUB_OFED_MODINIT, SI_ORDER_SECOND, _module_run, (fn)) /* * The following two macros are a workaround for not having a module * load and unload order resolver: */ #define module_init_order(fn, order) \ SYSINIT(fn, SI_SUB_OFED_MODINIT, (order), _module_run, (fn)) #define module_exit_order(fn, order) \ SYSUNINIT(fn, SI_SUB_OFED_MODINIT, (order), _module_run, (fn)) #define module_get(module) #define module_put(module) #define try_module_get(module) 1 #endif /* _LINUX_MODULE_H_ */ Index: projects/clang500-import/sys/compat/linuxkpi/common/include/linux/pci.h =================================================================== --- projects/clang500-import/sys/compat/linuxkpi/common/include/linux/pci.h (revision 319250) +++ projects/clang500-import/sys/compat/linuxkpi/common/include/linux/pci.h (revision 319251) @@ -1,808 +1,812 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. * Copyright (c) 2013-2016 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _LINUX_PCI_H_ #define _LINUX_PCI_H_ #define CONFIG_PCI_MSI #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct pci_device_id { uint32_t vendor; uint32_t device; uint32_t subvendor; uint32_t subdevice; uint32_t class_mask; uintptr_t driver_data; }; #define MODULE_DEVICE_TABLE(bus, table) #define PCI_ANY_ID (-1) #define PCI_VENDOR_ID_APPLE 0x106b #define PCI_VENDOR_ID_ASUSTEK 0x1043 #define PCI_VENDOR_ID_ATI 0x1002 #define PCI_VENDOR_ID_DELL 0x1028 #define PCI_VENDOR_ID_HP 0x103c #define PCI_VENDOR_ID_IBM 0x1014 #define PCI_VENDOR_ID_INTEL 0x8086 #define PCI_VENDOR_ID_MELLANOX 0x15b3 +#define PCI_VENDOR_ID_REDHAT_QUMRANET 0x1af4 #define PCI_VENDOR_ID_SERVERWORKS 0x1166 #define PCI_VENDOR_ID_SONY 0x104d #define PCI_VENDOR_ID_TOPSPIN 0x1867 #define PCI_VENDOR_ID_VIA 0x1106 +#define PCI_SUBVENDOR_ID_REDHAT_QUMRANET 0x1af4 +#define PCI_DEVICE_ID_ATI_RADEON_QY 0x5159 #define PCI_DEVICE_ID_MELLANOX_TAVOR 0x5a44 #define PCI_DEVICE_ID_MELLANOX_TAVOR_BRIDGE 0x5a46 #define PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT 0x6278 #define PCI_DEVICE_ID_MELLANOX_ARBEL 0x6282 #define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c #define PCI_DEVICE_ID_MELLANOX_SINAI 0x6274 +#define PCI_SUBDEVICE_ID_QEMU 0x1100 #define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) #define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f) #define PCI_FUNC(devfn) ((devfn) & 0x07) #define PCI_VDEVICE(_vendor, _device) \ .vendor = PCI_VENDOR_ID_##_vendor, .device = (_device), \ .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID #define PCI_DEVICE(_vendor, _device) \ .vendor = (_vendor), .device = (_device), \ .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID #define to_pci_dev(n) container_of(n, struct pci_dev, dev) #define PCI_VENDOR_ID PCIR_DEVVENDOR #define PCI_COMMAND PCIR_COMMAND #define PCI_EXP_DEVCTL PCIER_DEVICE_CTL /* Device Control */ #define PCI_EXP_LNKCTL PCIER_LINK_CTL /* Link Control */ #define PCI_EXP_FLAGS_TYPE PCIEM_FLAGS_TYPE /* Device/Port type */ #define PCI_EXP_DEVCAP PCIER_DEVICE_CAP /* Device capabilities */ #define PCI_EXP_DEVSTA PCIER_DEVICE_STA /* Device Status */ #define PCI_EXP_LNKCAP PCIER_LINK_CAP /* Link Capabilities */ #define PCI_EXP_LNKSTA PCIER_LINK_STA /* Link Status */ #define PCI_EXP_SLTCAP PCIER_SLOT_CAP /* Slot Capabilities */ #define PCI_EXP_SLTCTL PCIER_SLOT_CTL /* Slot Control */ #define PCI_EXP_SLTSTA PCIER_SLOT_STA /* Slot Status */ #define PCI_EXP_RTCTL PCIER_ROOT_CTL /* Root Control */ #define PCI_EXP_RTCAP PCIER_ROOT_CAP /* Root Capabilities */ #define PCI_EXP_RTSTA PCIER_ROOT_STA /* Root Status */ #define PCI_EXP_DEVCAP2 PCIER_DEVICE_CAP2 /* Device Capabilities 2 */ #define PCI_EXP_DEVCTL2 PCIER_DEVICE_CTL2 /* Device Control 2 */ #define PCI_EXP_LNKCAP2 PCIER_LINK_CAP2 /* Link Capabilities 2 */ #define PCI_EXP_LNKCTL2 PCIER_LINK_CTL2 /* Link Control 2 */ #define PCI_EXP_LNKSTA2 PCIER_LINK_STA2 /* Link Status 2 */ #define PCI_EXP_FLAGS PCIER_FLAGS /* Capabilities register */ #define PCI_EXP_FLAGS_VERS PCIEM_FLAGS_VERSION /* Capability version */ #define PCI_EXP_TYPE_ROOT_PORT PCIEM_TYPE_ROOT_PORT /* Root Port */ #define PCI_EXP_TYPE_ENDPOINT PCIEM_TYPE_ENDPOINT /* Express Endpoint */ #define PCI_EXP_TYPE_LEG_END PCIEM_TYPE_LEGACY_ENDPOINT /* Legacy Endpoint */ #define PCI_EXP_TYPE_DOWNSTREAM PCIEM_TYPE_DOWNSTREAM_PORT /* Downstream Port */ #define PCI_EXP_FLAGS_SLOT PCIEM_FLAGS_SLOT /* Slot implemented */ #define PCI_EXP_TYPE_RC_EC PCIEM_TYPE_ROOT_EC /* Root Complex Event Collector */ #define PCI_EXP_LNKCAP_SLS_2_5GB 0x01 /* Supported Link Speed 2.5GT/s */ #define PCI_EXP_LNKCAP_SLS_5_0GB 0x02 /* Supported Link Speed 5.0GT/s */ #define PCI_EXP_LNKCAP_MLW 0x03f0 /* Maximum Link Width */ #define PCI_EXP_LNKCAP2_SLS_2_5GB 0x02 /* Supported Link Speed 2.5GT/s */ #define PCI_EXP_LNKCAP2_SLS_5_0GB 0x04 /* Supported Link Speed 5.0GT/s */ #define PCI_EXP_LNKCAP2_SLS_8_0GB 0x08 /* Supported Link Speed 8.0GT/s */ #define PCI_EXP_LNKCTL_HAWD PCIEM_LINK_CTL_HAWD #define PCI_EXP_LNKCAP_CLKPM 0x00040000 #define PCI_EXP_DEVSTA_TRPND 0x0020 #define IORESOURCE_MEM (1 << SYS_RES_MEMORY) #define IORESOURCE_IO (1 << SYS_RES_IOPORT) #define IORESOURCE_IRQ (1 << SYS_RES_IRQ) enum pci_bus_speed { PCI_SPEED_UNKNOWN = -1, PCIE_SPEED_2_5GT, PCIE_SPEED_5_0GT, PCIE_SPEED_8_0GT, }; enum pcie_link_width { PCIE_LNK_WIDTH_UNKNOWN = 0xFF, }; typedef int pci_power_t; #define PCI_D0 PCI_POWERSTATE_D0 #define PCI_D1 PCI_POWERSTATE_D1 #define PCI_D2 PCI_POWERSTATE_D2 #define PCI_D3hot PCI_POWERSTATE_D3 #define PCI_D3cold 4 #define PCI_POWER_ERROR PCI_POWERSTATE_UNKNOWN struct pci_dev; struct pci_driver { struct list_head links; char *name; const struct pci_device_id *id_table; int (*probe)(struct pci_dev *dev, const struct pci_device_id *id); void (*remove)(struct pci_dev *dev); int (*suspend) (struct pci_dev *dev, pm_message_t state); /* Device suspended */ int (*resume) (struct pci_dev *dev); /* Device woken up */ void (*shutdown) (struct pci_dev *dev); /* Device shutdown */ driver_t driver; devclass_t bsdclass; const struct pci_error_handlers *err_handler; }; extern struct list_head pci_drivers; extern struct list_head pci_devices; extern spinlock_t pci_lock; #define __devexit_p(x) x struct pci_dev { struct device dev; struct list_head links; struct pci_driver *pdrv; uint64_t dma_mask; uint16_t device; uint16_t vendor; unsigned int irq; unsigned int devfn; u8 revision; }; static inline struct resource_list_entry * linux_pci_get_rle(struct pci_dev *pdev, int type, int rid) { struct pci_devinfo *dinfo; struct resource_list *rl; dinfo = device_get_ivars(pdev->dev.bsddev); rl = &dinfo->resources; return resource_list_find(rl, type, rid); } static inline struct resource_list_entry * linux_pci_get_bar(struct pci_dev *pdev, int bar) { struct resource_list_entry *rle; bar = PCIR_BAR(bar); if ((rle = linux_pci_get_rle(pdev, SYS_RES_MEMORY, bar)) == NULL) rle = linux_pci_get_rle(pdev, SYS_RES_IOPORT, bar); return (rle); } static inline struct device * linux_pci_find_irq_dev(unsigned int irq) { struct pci_dev *pdev; struct device *found; found = NULL; spin_lock(&pci_lock); list_for_each_entry(pdev, &pci_devices, links) { if (irq == pdev->dev.irq || (irq >= pdev->dev.msix && irq < pdev->dev.msix_max)) { found = &pdev->dev; break; } } spin_unlock(&pci_lock); return (found); } static inline unsigned long pci_resource_start(struct pci_dev *pdev, int bar) { struct resource_list_entry *rle; if ((rle = linux_pci_get_bar(pdev, bar)) == NULL) return (0); return rle->start; } static inline unsigned long pci_resource_len(struct pci_dev *pdev, int bar) { struct resource_list_entry *rle; if ((rle = linux_pci_get_bar(pdev, bar)) == NULL) return (0); return rle->count; } static inline int pci_resource_type(struct pci_dev *pdev, int bar) { struct pci_map *pm; pm = pci_find_bar(pdev->dev.bsddev, PCIR_BAR(bar)); if (!pm) return (-1); if (PCI_BAR_IO(pm->pm_value)) return (SYS_RES_IOPORT); else return (SYS_RES_MEMORY); } /* * All drivers just seem to want to inspect the type not flags. */ static inline int pci_resource_flags(struct pci_dev *pdev, int bar) { int type; type = pci_resource_type(pdev, bar); if (type < 0) return (0); return (1 << type); } static inline const char * pci_name(struct pci_dev *d) { return device_get_desc(d->dev.bsddev); } static inline void * pci_get_drvdata(struct pci_dev *pdev) { return dev_get_drvdata(&pdev->dev); } static inline void pci_set_drvdata(struct pci_dev *pdev, void *data) { dev_set_drvdata(&pdev->dev, data); } static inline int pci_enable_device(struct pci_dev *pdev) { pci_enable_io(pdev->dev.bsddev, SYS_RES_IOPORT); pci_enable_io(pdev->dev.bsddev, SYS_RES_MEMORY); return (0); } static inline void pci_disable_device(struct pci_dev *pdev) { } static inline int pci_set_master(struct pci_dev *pdev) { pci_enable_busmaster(pdev->dev.bsddev); return (0); } static inline int pci_set_power_state(struct pci_dev *pdev, int state) { pci_set_powerstate(pdev->dev.bsddev, state); return (0); } static inline int pci_clear_master(struct pci_dev *pdev) { pci_disable_busmaster(pdev->dev.bsddev); return (0); } static inline int pci_request_region(struct pci_dev *pdev, int bar, const char *res_name) { int rid; int type; type = pci_resource_type(pdev, bar); if (type < 0) return (-ENODEV); rid = PCIR_BAR(bar); if (bus_alloc_resource_any(pdev->dev.bsddev, type, &rid, RF_ACTIVE) == NULL) return (-EINVAL); return (0); } static inline void pci_release_region(struct pci_dev *pdev, int bar) { struct resource_list_entry *rle; if ((rle = linux_pci_get_bar(pdev, bar)) == NULL) return; bus_release_resource(pdev->dev.bsddev, rle->type, rle->rid, rle->res); } static inline void pci_release_regions(struct pci_dev *pdev) { int i; for (i = 0; i <= PCIR_MAX_BAR_0; i++) pci_release_region(pdev, i); } static inline int pci_request_regions(struct pci_dev *pdev, const char *res_name) { int error; int i; for (i = 0; i <= PCIR_MAX_BAR_0; i++) { error = pci_request_region(pdev, i, res_name); if (error && error != -ENODEV) { pci_release_regions(pdev); return (error); } } return (0); } static inline void pci_disable_msix(struct pci_dev *pdev) { pci_release_msi(pdev->dev.bsddev); } static inline bus_addr_t pci_bus_address(struct pci_dev *pdev, int bar) { return (pci_resource_start(pdev, bar)); } #define PCI_CAP_ID_EXP PCIY_EXPRESS #define PCI_CAP_ID_PCIX PCIY_PCIX #define PCI_CAP_ID_AGP PCIY_AGP #define PCI_CAP_ID_PM PCIY_PMG #define PCI_EXP_DEVCTL PCIER_DEVICE_CTL #define PCI_EXP_DEVCTL_PAYLOAD PCIEM_CTL_MAX_PAYLOAD #define PCI_EXP_DEVCTL_READRQ PCIEM_CTL_MAX_READ_REQUEST #define PCI_EXP_LNKCTL PCIER_LINK_CTL #define PCI_EXP_LNKSTA PCIER_LINK_STA static inline int pci_find_capability(struct pci_dev *pdev, int capid) { int reg; if (pci_find_cap(pdev->dev.bsddev, capid, ®)) return (0); return (reg); } static inline int pci_pcie_cap(struct pci_dev *dev) { return pci_find_capability(dev, PCI_CAP_ID_EXP); } static inline int pci_read_config_byte(struct pci_dev *pdev, int where, u8 *val) { *val = (u8)pci_read_config(pdev->dev.bsddev, where, 1); return (0); } static inline int pci_read_config_word(struct pci_dev *pdev, int where, u16 *val) { *val = (u16)pci_read_config(pdev->dev.bsddev, where, 2); return (0); } static inline int pci_read_config_dword(struct pci_dev *pdev, int where, u32 *val) { *val = (u32)pci_read_config(pdev->dev.bsddev, where, 4); return (0); } static inline int pci_write_config_byte(struct pci_dev *pdev, int where, u8 val) { pci_write_config(pdev->dev.bsddev, where, val, 1); return (0); } static inline int pci_write_config_word(struct pci_dev *pdev, int where, u16 val) { pci_write_config(pdev->dev.bsddev, where, val, 2); return (0); } static inline int pci_write_config_dword(struct pci_dev *pdev, int where, u32 val) { pci_write_config(pdev->dev.bsddev, where, val, 4); return (0); } extern int pci_register_driver(struct pci_driver *pdrv); extern void pci_unregister_driver(struct pci_driver *pdrv); struct msix_entry { int entry; int vector; }; /* * Enable msix, positive errors indicate actual number of available * vectors. Negative errors are failures. * * NB: define added to prevent this definition of pci_enable_msix from * clashing with the native FreeBSD version. */ #define pci_enable_msix linux_pci_enable_msix static inline int pci_enable_msix(struct pci_dev *pdev, struct msix_entry *entries, int nreq) { struct resource_list_entry *rle; int error; int avail; int i; avail = pci_msix_count(pdev->dev.bsddev); if (avail < nreq) { if (avail == 0) return -EINVAL; return avail; } avail = nreq; if ((error = -pci_alloc_msix(pdev->dev.bsddev, &avail)) != 0) return error; /* * Handle case where "pci_alloc_msix()" may allocate less * interrupts than available and return with no error: */ if (avail < nreq) { pci_release_msi(pdev->dev.bsddev); return avail; } rle = linux_pci_get_rle(pdev, SYS_RES_IRQ, 1); pdev->dev.msix = rle->start; pdev->dev.msix_max = rle->start + avail; for (i = 0; i < nreq; i++) entries[i].vector = pdev->dev.msix + i; return (0); } #define pci_enable_msix_range linux_pci_enable_msix_range static inline int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, int maxvec) { int nvec = maxvec; int rc; if (maxvec < minvec) return (-ERANGE); do { rc = pci_enable_msix(dev, entries, nvec); if (rc < 0) { return (rc); } else if (rc > 0) { if (rc < minvec) return (-ENOSPC); nvec = rc; } } while (rc); return (nvec); } static inline int pci_channel_offline(struct pci_dev *pdev) { return false; } static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) { return -ENODEV; } static inline void pci_disable_sriov(struct pci_dev *dev) { } #define DEFINE_PCI_DEVICE_TABLE(_table) \ const struct pci_device_id _table[] __devinitdata /* XXX This should not be necessary. */ #define pcix_set_mmrbc(d, v) 0 #define pcix_get_max_mmrbc(d) 0 #define pcie_set_readrq(d, v) 0 #define PCI_DMA_BIDIRECTIONAL 0 #define PCI_DMA_TODEVICE 1 #define PCI_DMA_FROMDEVICE 2 #define PCI_DMA_NONE 3 #define pci_pool dma_pool #define pci_pool_destroy(...) dma_pool_destroy(__VA_ARGS__) #define pci_pool_alloc(...) dma_pool_alloc(__VA_ARGS__) #define pci_pool_free(...) dma_pool_free(__VA_ARGS__) #define pci_pool_create(_name, _pdev, _size, _align, _alloc) \ dma_pool_create(_name, &(_pdev)->dev, _size, _align, _alloc) #define pci_free_consistent(_hwdev, _size, _vaddr, _dma_handle) \ dma_free_coherent((_hwdev) == NULL ? NULL : &(_hwdev)->dev, \ _size, _vaddr, _dma_handle) #define pci_map_sg(_hwdev, _sg, _nents, _dir) \ dma_map_sg((_hwdev) == NULL ? NULL : &(_hwdev->dev), \ _sg, _nents, (enum dma_data_direction)_dir) #define pci_map_single(_hwdev, _ptr, _size, _dir) \ dma_map_single((_hwdev) == NULL ? NULL : &(_hwdev->dev), \ (_ptr), (_size), (enum dma_data_direction)_dir) #define pci_unmap_single(_hwdev, _addr, _size, _dir) \ dma_unmap_single((_hwdev) == NULL ? NULL : &(_hwdev)->dev, \ _addr, _size, (enum dma_data_direction)_dir) #define pci_unmap_sg(_hwdev, _sg, _nents, _dir) \ dma_unmap_sg((_hwdev) == NULL ? NULL : &(_hwdev)->dev, \ _sg, _nents, (enum dma_data_direction)_dir) #define pci_map_page(_hwdev, _page, _offset, _size, _dir) \ dma_map_page((_hwdev) == NULL ? NULL : &(_hwdev)->dev, _page,\ _offset, _size, (enum dma_data_direction)_dir) #define pci_unmap_page(_hwdev, _dma_address, _size, _dir) \ dma_unmap_page((_hwdev) == NULL ? NULL : &(_hwdev)->dev, \ _dma_address, _size, (enum dma_data_direction)_dir) #define pci_set_dma_mask(_pdev, mask) dma_set_mask(&(_pdev)->dev, (mask)) #define pci_dma_mapping_error(_pdev, _dma_addr) \ dma_mapping_error(&(_pdev)->dev, _dma_addr) #define pci_set_consistent_dma_mask(_pdev, _mask) \ dma_set_coherent_mask(&(_pdev)->dev, (_mask)) #define DECLARE_PCI_UNMAP_ADDR(x) DEFINE_DMA_UNMAP_ADDR(x); #define DECLARE_PCI_UNMAP_LEN(x) DEFINE_DMA_UNMAP_LEN(x); #define pci_unmap_addr dma_unmap_addr #define pci_unmap_addr_set dma_unmap_addr_set #define pci_unmap_len dma_unmap_len #define pci_unmap_len_set dma_unmap_len_set typedef unsigned int __bitwise pci_channel_state_t; typedef unsigned int __bitwise pci_ers_result_t; enum pci_channel_state { pci_channel_io_normal = 1, pci_channel_io_frozen = 2, pci_channel_io_perm_failure = 3, }; enum pci_ers_result { PCI_ERS_RESULT_NONE = 1, PCI_ERS_RESULT_CAN_RECOVER = 2, PCI_ERS_RESULT_NEED_RESET = 3, PCI_ERS_RESULT_DISCONNECT = 4, PCI_ERS_RESULT_RECOVERED = 5, }; /* PCI bus error event callbacks */ struct pci_error_handlers { pci_ers_result_t (*error_detected)(struct pci_dev *dev, enum pci_channel_state error); pci_ers_result_t (*mmio_enabled)(struct pci_dev *dev); pci_ers_result_t (*link_reset)(struct pci_dev *dev); pci_ers_result_t (*slot_reset)(struct pci_dev *dev); void (*resume)(struct pci_dev *dev); }; /* FreeBSD does not support SRIOV - yet */ static inline struct pci_dev *pci_physfn(struct pci_dev *dev) { return dev; } static inline bool pci_is_pcie(struct pci_dev *dev) { return !!pci_pcie_cap(dev); } static inline u16 pcie_flags_reg(struct pci_dev *dev) { int pos; u16 reg16; pos = pci_find_capability(dev, PCI_CAP_ID_EXP); if (!pos) return 0; pci_read_config_word(dev, pos + PCI_EXP_FLAGS, ®16); return reg16; } static inline int pci_pcie_type(struct pci_dev *dev) { return (pcie_flags_reg(dev) & PCI_EXP_FLAGS_TYPE) >> 4; } static inline int pcie_cap_version(struct pci_dev *dev) { return pcie_flags_reg(dev) & PCI_EXP_FLAGS_VERS; } static inline bool pcie_cap_has_lnkctl(struct pci_dev *dev) { int type = pci_pcie_type(dev); return pcie_cap_version(dev) > 1 || type == PCI_EXP_TYPE_ROOT_PORT || type == PCI_EXP_TYPE_ENDPOINT || type == PCI_EXP_TYPE_LEG_END; } static inline bool pcie_cap_has_devctl(const struct pci_dev *dev) { return true; } static inline bool pcie_cap_has_sltctl(struct pci_dev *dev) { int type = pci_pcie_type(dev); return pcie_cap_version(dev) > 1 || type == PCI_EXP_TYPE_ROOT_PORT || (type == PCI_EXP_TYPE_DOWNSTREAM && pcie_flags_reg(dev) & PCI_EXP_FLAGS_SLOT); } static inline bool pcie_cap_has_rtctl(struct pci_dev *dev) { int type = pci_pcie_type(dev); return pcie_cap_version(dev) > 1 || type == PCI_EXP_TYPE_ROOT_PORT || type == PCI_EXP_TYPE_RC_EC; } static bool pcie_capability_reg_implemented(struct pci_dev *dev, int pos) { if (!pci_is_pcie(dev)) return false; switch (pos) { case PCI_EXP_FLAGS_TYPE: return true; case PCI_EXP_DEVCAP: case PCI_EXP_DEVCTL: case PCI_EXP_DEVSTA: return pcie_cap_has_devctl(dev); case PCI_EXP_LNKCAP: case PCI_EXP_LNKCTL: case PCI_EXP_LNKSTA: return pcie_cap_has_lnkctl(dev); case PCI_EXP_SLTCAP: case PCI_EXP_SLTCTL: case PCI_EXP_SLTSTA: return pcie_cap_has_sltctl(dev); case PCI_EXP_RTCTL: case PCI_EXP_RTCAP: case PCI_EXP_RTSTA: return pcie_cap_has_rtctl(dev); case PCI_EXP_DEVCAP2: case PCI_EXP_DEVCTL2: case PCI_EXP_LNKCAP2: case PCI_EXP_LNKCTL2: case PCI_EXP_LNKSTA2: return pcie_cap_version(dev) > 1; default: return false; } } static inline int pcie_capability_read_dword(struct pci_dev *dev, int pos, u32 *dst) { if (pos & 3) return -EINVAL; if (!pcie_capability_reg_implemented(dev, pos)) return -EINVAL; return pci_read_config_dword(dev, pci_pcie_cap(dev) + pos, dst); } static inline int pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *dst) { if (pos & 3) return -EINVAL; if (!pcie_capability_reg_implemented(dev, pos)) return -EINVAL; return pci_read_config_word(dev, pci_pcie_cap(dev) + pos, dst); } static inline int pcie_capability_write_word(struct pci_dev *dev, int pos, u16 val) { if (pos & 1) return -EINVAL; if (!pcie_capability_reg_implemented(dev, pos)) return 0; return pci_write_config_word(dev, pci_pcie_cap(dev) + pos, val); } static inline int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed, enum pcie_link_width *width) { *speed = PCI_SPEED_UNKNOWN; *width = PCIE_LNK_WIDTH_UNKNOWN; return (0); } static inline int pci_num_vf(struct pci_dev *dev) { return (0); } #endif /* _LINUX_PCI_H_ */ Index: projects/clang500-import/sys/compat/linuxkpi/common/include/linux/preempt.h =================================================================== --- projects/clang500-import/sys/compat/linuxkpi/common/include/linux/preempt.h (revision 319250) +++ projects/clang500-import/sys/compat/linuxkpi/common/include/linux/preempt.h (revision 319251) @@ -1,37 +1,40 @@ /*- * Copyright (c) 2017 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _LINUX_PREEMPT_H_ #define _LINUX_PREEMPT_H_ #include #define in_interrupt() \ (curthread->td_intr_nesting_level || curthread->td_critnest) +#define preempt_disable() critical_enter() +#define preempt_enable() critical_exit() + #endif /* _LINUX_PREEMPT_H_ */ Index: projects/clang500-import/sys/compat/linuxkpi/common/include/linux/types.h =================================================================== --- projects/clang500-import/sys/compat/linuxkpi/common/include/linux/types.h (revision 319250) +++ projects/clang500-import/sys/compat/linuxkpi/common/include/linux/types.h (revision 319251) @@ -1,76 +1,78 @@ /*- * Copyright (c) 2010 Isilon Systems, Inc. * Copyright (c) 2010 iX Systems, Inc. * Copyright (c) 2010 Panasas, Inc. * Copyright (c) 2013-2017 Mellanox Technologies, Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _LINUX_TYPES_H_ #define _LINUX_TYPES_H_ #include #include #include #include #include #include #ifndef __bitwise__ #ifdef __CHECKER__ #define __bitwise__ __attribute__((bitwise)) #else #define __bitwise__ #endif #endif typedef uint16_t __le16; typedef uint16_t __be16; typedef uint32_t __le32; typedef uint32_t __be32; typedef uint64_t __le64; typedef uint64_t __be64; typedef unsigned int uint; typedef unsigned gfp_t; typedef uint64_t loff_t; typedef vm_paddr_t resource_size_t; typedef uint16_t __bitwise__ __sum16; typedef unsigned long pgoff_t; typedef u64 phys_addr_t; #define DECLARE_BITMAP(n, bits) \ unsigned long n[howmany(bits, sizeof(long) * 8)] +typedef unsigned long irq_hw_number_t; + struct rcu_head { void *raw[2]; } __aligned(sizeof(void *)); typedef void (*rcu_callback_t)(struct rcu_head *head); typedef void (*call_rcu_func_t)(struct rcu_head *head, rcu_callback_t func); typedef int linux_task_fn_t(void *data); #endif /* _LINUX_TYPES_H_ */ Index: projects/clang500-import/sys/dev/ena/ena.c =================================================================== --- projects/clang500-import/sys/dev/ena/ena.c (revision 319250) +++ projects/clang500-import/sys/dev/ena/ena.c (revision 319251) @@ -1,3769 +1,3841 @@ /*- * BSD LICENSE * * Copyright (c) 2015-2017 Amazon.com, Inc. or its affiliates. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ena.h" #include "ena_sysctl.h" /********************************************************* * Function prototypes *********************************************************/ static int ena_probe(device_t); static void ena_intr_msix_mgmnt(void *); static int ena_allocate_pci_resources(struct ena_adapter*); static void ena_free_pci_resources(struct ena_adapter *); static int ena_change_mtu(if_t, int); static inline void ena_alloc_counters(counter_u64_t *, int); static inline void ena_free_counters(counter_u64_t *, int); static inline void ena_reset_counters(counter_u64_t *, int); static void ena_init_io_rings_common(struct ena_adapter *, struct ena_ring *, uint16_t); static int ena_init_io_rings(struct ena_adapter *); static void ena_free_io_ring_resources(struct ena_adapter *, unsigned int); static void ena_free_all_io_rings_resources(struct ena_adapter *); static int ena_setup_tx_dma_tag(struct ena_adapter *); static int ena_free_tx_dma_tag(struct ena_adapter *); static int ena_setup_rx_dma_tag(struct ena_adapter *); static int ena_free_rx_dma_tag(struct ena_adapter *); static int ena_setup_tx_resources(struct ena_adapter *, int); static void ena_free_tx_resources(struct ena_adapter *, int); static int ena_setup_all_tx_resources(struct ena_adapter *); static void ena_free_all_tx_resources(struct ena_adapter *); static int ena_setup_rx_resources(struct ena_adapter *, unsigned int); static void ena_free_rx_resources(struct ena_adapter *, unsigned int); static int ena_setup_all_rx_resources(struct ena_adapter *); static void ena_free_all_rx_resources(struct ena_adapter *); static inline int ena_alloc_rx_mbuf(struct ena_adapter *, struct ena_ring *, struct ena_rx_buffer *); static void ena_free_rx_mbuf(struct ena_adapter *, struct ena_ring *, struct ena_rx_buffer *); static int ena_refill_rx_bufs(struct ena_ring *, uint32_t); static void ena_free_rx_bufs(struct ena_adapter *, unsigned int); static void ena_refill_all_rx_bufs(struct ena_adapter *); static void ena_free_all_rx_bufs(struct ena_adapter *); static void ena_free_tx_bufs(struct ena_adapter *, unsigned int); static void ena_free_all_tx_bufs(struct ena_adapter *); static void ena_destroy_all_tx_queues(struct ena_adapter *); static void ena_destroy_all_rx_queues(struct ena_adapter *); static void ena_destroy_all_io_queues(struct ena_adapter *); static int ena_create_io_queues(struct ena_adapter *); static int ena_tx_cleanup(struct ena_ring *); static int ena_rx_cleanup(struct ena_ring *); static int validate_tx_req_id(struct ena_ring *, uint16_t); static void ena_rx_hash_mbuf(struct ena_ring *, struct ena_com_rx_ctx *, struct mbuf *); static struct mbuf* ena_rx_mbuf(struct ena_ring *, struct ena_com_rx_buf_info *, struct ena_com_rx_ctx *, uint16_t *); static inline void ena_rx_checksum(struct ena_ring *, struct ena_com_rx_ctx *, struct mbuf *); static void ena_handle_msix(void *); static int ena_enable_msix(struct ena_adapter *); static void ena_setup_mgmnt_intr(struct ena_adapter *); static void ena_setup_io_intr(struct ena_adapter *); static int ena_request_mgmnt_irq(struct ena_adapter *); static int ena_request_io_irq(struct ena_adapter *); static void ena_free_mgmnt_irq(struct ena_adapter *); static void ena_free_io_irq(struct ena_adapter *); static void ena_free_irqs(struct ena_adapter*); static void ena_disable_msix(struct ena_adapter *); static void ena_unmask_all_io_irqs(struct ena_adapter *); static int ena_rss_configure(struct ena_adapter *); +static void ena_update_hw_stats(void *, int); static int ena_up_complete(struct ena_adapter *); static int ena_up(struct ena_adapter *); static void ena_down(struct ena_adapter *); static uint64_t ena_get_counter(if_t, ift_counter); static int ena_media_change(if_t); static void ena_media_status(if_t, struct ifmediareq *); static void ena_init(void *); static int ena_ioctl(if_t, u_long, caddr_t); static int ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *); static void ena_update_host_info(struct ena_admin_host_info *, if_t); static void ena_update_hwassist(struct ena_adapter *); static int ena_setup_ifnet(device_t, struct ena_adapter *, struct ena_com_dev_get_features_ctx *); static void ena_tx_csum(struct ena_com_tx_ctx *, struct mbuf *); -static int ena_xmit_mbuf(struct ena_ring *, struct mbuf *); +static int ena_xmit_mbuf(struct ena_ring *, struct mbuf **); static void ena_start_xmit(struct ena_ring *); static int ena_mq_start(if_t, struct mbuf *); static void ena_deferred_mq_start(void *, int); static void ena_qflush(if_t); static int ena_calc_io_queue_num(struct ena_adapter *, struct ena_com_dev_get_features_ctx *); static int ena_calc_queue_size(struct ena_adapter *, uint16_t *, uint16_t *, struct ena_com_dev_get_features_ctx *); static int ena_rss_init_default(struct ena_adapter *); static void ena_rss_init_default_deferred(void *); static void ena_config_host_info(struct ena_com_dev *); static int ena_attach(device_t); static int ena_detach(device_t); static int ena_device_init(struct ena_adapter *, device_t, struct ena_com_dev_get_features_ctx *, int *); static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *, int); static void ena_update_on_link_change(void *, struct ena_admin_aenq_entry *); static void unimplemented_aenq_handler(void *, struct ena_admin_aenq_entry *); static void ena_timer_service(void *); static char ena_version[] = DEVICE_NAME DRV_MODULE_NAME " v" DRV_MODULE_VERSION; static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD, 0, "ENA driver parameters"); /* * Tuneable number of buffers in the buf-ring (drbr) */ static int ena_buf_ring_size = 4096; SYSCTL_INT(_hw_ena, OID_AUTO, buf_ring_size, CTLFLAG_RWTUN, &ena_buf_ring_size, 0, "Size of the bufring"); static ena_vendor_info_t ena_vendor_info_array[] = { { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_PF, 0}, { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_LLQ_PF, 0}, { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_VF, 0}, { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_LLQ_VF, 0}, /* Last entry */ { 0, 0, 0 } }; /* * Contains pointers to event handlers, e.g. link state chage. */ static struct ena_aenq_handlers aenq_handlers; void ena_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error) { if (error) return; *(bus_addr_t *) arg = segs[0].ds_addr; return; } int ena_dma_alloc(device_t dmadev, bus_size_t size, ena_mem_handle_t *dma , int mapflags) { struct ena_adapter* adapter = device_get_softc(dmadev); uint32_t maxsize = ((size - 1)/PAGE_SIZE + 1) * PAGE_SIZE; uint64_t dma_space_addr = ENA_DMA_BIT_MASK(adapter->dma_width); int error; if (dma_space_addr == 0) dma_space_addr = BUS_SPACE_MAXADDR; error = bus_dma_tag_create(bus_get_dma_tag(dmadev), /* parent */ 8, 0, /* alignment, bounds */ dma_space_addr, /* lowaddr */ dma_space_addr, /* highaddr */ NULL, NULL, /* filter, filterarg */ maxsize, /* maxsize */ 1, /* nsegments */ maxsize, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &dma->tag); if (error) { device_printf(dmadev, "%s: bus_dma_tag_create failed: %d\n", __func__, error); goto fail_tag; } error = bus_dmamem_alloc(dma->tag, (void**) &dma->vaddr, BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->map); if (error) { device_printf(dmadev, "%s: bus_dmamem_alloc(%ju) failed: %d\n", __func__, (uintmax_t)size, error); goto fail_map_create; } dma->paddr = 0; error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr, size, ena_dmamap_callback, &dma->paddr, mapflags); if (error || dma->paddr == 0) { device_printf(dmadev, "%s: bus_dmamap_load failed: %d\n", __func__, error); goto fail_map_load; } return (0); fail_map_load: bus_dmamap_unload(dma->tag, dma->map); fail_map_create: bus_dmamem_free(dma->tag, dma->vaddr, dma->map); bus_dma_tag_destroy(dma->tag); fail_tag: dma->tag = NULL; return (error); } static int ena_allocate_pci_resources(struct ena_adapter* adapter) { device_t pdev = adapter->pdev; int rid; rid = PCIR_BAR(ENA_REG_BAR); adapter->memory = NULL; adapter->registers = bus_alloc_resource_any(pdev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->registers == NULL) { device_printf(pdev, "Unable to allocate bus resource: " "registers\n"); return (ENXIO); } return (0); } static void ena_free_pci_resources(struct ena_adapter *adapter) { device_t pdev = adapter->pdev; if (adapter->memory != NULL) { bus_release_resource(pdev, SYS_RES_MEMORY, PCIR_BAR(ENA_MEM_BAR), adapter->memory); } if (adapter->registers != NULL) { bus_release_resource(pdev, SYS_RES_MEMORY, PCIR_BAR(ENA_REG_BAR), adapter->registers); } return; } static int ena_probe(device_t dev) { ena_vendor_info_t *ent; char adapter_name[60]; uint16_t pci_vendor_id = 0; uint16_t pci_device_id = 0; pci_vendor_id = pci_get_vendor(dev); pci_device_id = pci_get_device(dev); ent = ena_vendor_info_array; while (ent->vendor_id != 0) { if ((pci_vendor_id == ent->vendor_id) && (pci_device_id == ent->device_id)) { ena_trace(ENA_DBG, "vendor=%x device=%x ", pci_vendor_id, pci_device_id); sprintf(adapter_name, DEVICE_DESC); device_set_desc_copy(dev, adapter_name); return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } static int ena_change_mtu(if_t ifp, int new_mtu) { struct ena_adapter *adapter = if_getsoftc(ifp); struct ena_com_dev_get_features_ctx get_feat_ctx; int rc, old_mtu, max_frame; rc = ena_com_get_dev_attr_feat(adapter->ena_dev, &get_feat_ctx); if (rc) { device_printf(adapter->pdev, "Cannot get attribute for ena device\n"); return (ENXIO); } /* Save old MTU in case of fail */ old_mtu = if_getmtu(ifp); /* Change MTU and calculate max frame */ if_setmtu(ifp, new_mtu); max_frame = ETHER_MAX_FRAME(ifp, ETHERTYPE_VLAN, 1); if ((new_mtu < ENA_MIN_FRAME_LEN) || (new_mtu > get_feat_ctx.dev_attr.max_mtu) || (max_frame > ENA_MAX_FRAME_LEN)) { device_printf(adapter->pdev, "Invalid MTU setting. " "new_mtu: %d\n", new_mtu); goto error; } rc = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu); if (rc != 0) goto error; return (0); error: if_setmtu(ifp, old_mtu); return (EINVAL); } static inline void ena_alloc_counters(counter_u64_t *begin, int size) { counter_u64_t *end = (counter_u64_t *)((char *)begin + size); for (; begin < end; ++begin) *begin = counter_u64_alloc(M_WAITOK); } static inline void ena_free_counters(counter_u64_t *begin, int size) { counter_u64_t *end = (counter_u64_t *)((char *)begin + size); for (; begin < end; ++begin) counter_u64_free(*begin); } static inline void ena_reset_counters(counter_u64_t *begin, int size) { counter_u64_t *end = (counter_u64_t *)((char *)begin + size); for (; begin < end; ++begin) counter_u64_zero(*begin); } static void ena_init_io_rings_common(struct ena_adapter *adapter, struct ena_ring *ring, uint16_t qid) { ring->qid = qid; ring->adapter = adapter; ring->ena_dev = adapter->ena_dev; } static int ena_init_io_rings(struct ena_adapter *adapter) { struct ena_com_dev *ena_dev; struct ena_ring *txr, *rxr; struct ena_que *que; int i; int rc; ena_dev = adapter->ena_dev; for (i = 0; i < adapter->num_queues; i++) { txr = &adapter->tx_ring[i]; rxr = &adapter->rx_ring[i]; /* TX/RX common ring state */ ena_init_io_rings_common(adapter, txr, i); ena_init_io_rings_common(adapter, rxr, i); /* TX specific ring state */ txr->ring_size = adapter->tx_ring_size; txr->tx_max_header_size = ena_dev->tx_max_header_size; txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type; txr->smoothed_interval = ena_com_get_nonadaptive_moderation_interval_tx(ena_dev); /* Allocate a buf ring */ txr->br = buf_ring_alloc(ena_buf_ring_size, M_DEVBUF, M_WAITOK, &txr->ring_mtx); if (txr->br == NULL) { device_printf(adapter->pdev, "Error while setting up bufring\n"); rc = ENOMEM; goto err_bufr_free; } /* Alloc TX statistics. */ ena_alloc_counters((counter_u64_t *)&txr->tx_stats, sizeof(txr->tx_stats)); /* RX specific ring state */ rxr->ring_size = adapter->rx_ring_size; rxr->rx_small_copy_len = adapter->small_copy_len; rxr->smoothed_interval = ena_com_get_nonadaptive_moderation_interval_rx(ena_dev); /* Alloc RX statistics. */ ena_alloc_counters((counter_u64_t *)&rxr->rx_stats, sizeof(rxr->rx_stats)); /* Initialize locks */ snprintf(txr->mtx_name, nitems(txr->mtx_name), "%s:tx(%d)", device_get_nameunit(adapter->pdev), i); snprintf(rxr->mtx_name, nitems(rxr->mtx_name), "%s:rx(%d)", device_get_nameunit(adapter->pdev), i); mtx_init(&txr->ring_mtx, txr->mtx_name, NULL, MTX_DEF); mtx_init(&rxr->ring_mtx, rxr->mtx_name, NULL, MTX_DEF); que = &adapter->que[i]; que->adapter = adapter; que->id = i; que->tx_ring = txr; que->rx_ring = rxr; txr->que = que; rxr->que = que; } return 0; err_bufr_free: while (i--) ena_free_io_ring_resources(adapter, i); return (rc); } static void ena_free_io_ring_resources(struct ena_adapter *adapter, unsigned int qid) { struct ena_ring *txr = &adapter->tx_ring[qid]; struct ena_ring *rxr = &adapter->rx_ring[qid]; ena_free_counters((counter_u64_t *)&txr->tx_stats, sizeof(txr->tx_stats)); ena_free_counters((counter_u64_t *)&rxr->rx_stats, sizeof(rxr->rx_stats)); mtx_destroy(&txr->ring_mtx); mtx_destroy(&rxr->ring_mtx); drbr_free(txr->br, M_DEVBUF); } static void ena_free_all_io_rings_resources(struct ena_adapter *adapter) { int i; for (i = 0; i < adapter->num_queues; i++) ena_free_io_ring_resources(adapter, i); } static int ena_setup_tx_dma_tag(struct ena_adapter *adapter) { int ret; /* Create DMA tag for Tx buffers */ ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev), 1, 0, /* alignment, bounds */ ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr */ ENA_DMA_BIT_MASK(adapter->dma_width), /* highaddr */ NULL, NULL, /* filter, filterarg */ ENA_TSO_MAXSIZE, /* maxsize */ adapter->max_tx_sgl_size, /* nsegments */ ENA_TSO_MAXSIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &adapter->tx_buf_tag); if (ret != 0) device_printf(adapter->pdev, "Unable to create Tx DMA tag\n"); return (ret); } static int ena_free_tx_dma_tag(struct ena_adapter *adapter) { int ret; ret = bus_dma_tag_destroy(adapter->tx_buf_tag); if (ret == 0) adapter->tx_buf_tag = NULL; return (ret); } static int ena_setup_rx_dma_tag(struct ena_adapter *adapter) { int ret; /* Create DMA tag for Rx buffers*/ ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev), /* parent */ 1, 0, /* alignment, bounds */ ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr */ ENA_DMA_BIT_MASK(adapter->dma_width), /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUM16BYTES, /* maxsize */ 1, /* nsegments */ MJUM16BYTES, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &adapter->rx_buf_tag); if (ret != 0) device_printf(adapter->pdev, "Unable to create Rx DMA tag\n"); return (ret); } static int ena_free_rx_dma_tag(struct ena_adapter *adapter) { int ret; ret = bus_dma_tag_destroy(adapter->rx_buf_tag); if (ret == 0) adapter->rx_buf_tag = NULL; return (ret); } /** * ena_setup_tx_resources - allocate Tx resources (Descriptors) * @adapter: network interface device structure * @qid: queue index * * Returns 0 on success, otherwise on failure. **/ static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid) { struct ena_que *que = &adapter->que[qid]; struct ena_ring *tx_ring = que->tx_ring; int size, i, err; #ifdef RSS cpuset_t cpu_mask; #endif size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size; tx_ring->tx_buffer_info = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO); if (!tx_ring->tx_buffer_info) goto err_tx_buffer_info; size = sizeof(uint16_t) * tx_ring->ring_size; tx_ring->free_tx_ids = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO); if (!tx_ring->free_tx_ids) goto err_tx_reqs; /* Req id stack for TX OOO completions */ for (i = 0; i < tx_ring->ring_size; i++) tx_ring->free_tx_ids[i] = i; /* Reset TX statistics. */ ena_reset_counters((counter_u64_t *)&tx_ring->tx_stats, sizeof(tx_ring->tx_stats)); tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; /* Make sure that drbr is empty */ drbr_flush(adapter->ifp, tx_ring->br); /* ... and create the buffer DMA maps */ for (i = 0; i < tx_ring->ring_size; i++) { err = bus_dmamap_create(adapter->tx_buf_tag, 0, &tx_ring->tx_buffer_info[i].map); if (err != 0) { device_printf(adapter->pdev, "Unable to create Tx DMA map for buffer %d\n", i); goto err_tx_map; } } /* Allocate taskqueues */ TASK_INIT(&tx_ring->enqueue_task, 0, ena_deferred_mq_start, tx_ring); tx_ring->enqueue_tq = taskqueue_create_fast("ena_tx_enque", M_NOWAIT, taskqueue_thread_enqueue, &tx_ring->enqueue_tq); if (tx_ring->enqueue_tq == NULL) { device_printf(adapter->pdev, "Unable to create taskqueue for enqueue task\n"); i = tx_ring->ring_size; goto err_tx_map; } /* RSS set cpu for thread */ #ifdef RSS CPU_SETOF(que->cpu, &cpu_mask); taskqueue_start_threads_cpuset(&tx_ring->enqueue_tq, 1, PI_NET, &cpu_mask, "%s tx_ring enq (bucket %d)", device_get_nameunit(adapter->pdev), que->cpu); #else /* RSS */ taskqueue_start_threads(&tx_ring->enqueue_tq, 1, PI_NET, "%s txeq %d", device_get_nameunit(adapter->pdev), que->cpu); #endif /* RSS */ return (0); err_tx_map: while (i--) { bus_dmamap_destroy(adapter->tx_buf_tag, tx_ring->tx_buffer_info[i].map); } ENA_MEM_FREE(adapter->ena_dev->dmadev, tx_ring->free_tx_ids); err_tx_reqs: ENA_MEM_FREE(adapter->ena_dev->dmadev, tx_ring->tx_buffer_info); err_tx_buffer_info: return (ENOMEM); } /** * ena_free_tx_resources - Free Tx Resources per Queue * @adapter: network interface device structure * @qid: queue index * * Free all transmit software resources **/ static void ena_free_tx_resources(struct ena_adapter *adapter, int qid) { struct ena_ring *tx_ring = &adapter->tx_ring[qid]; while (taskqueue_cancel(tx_ring->enqueue_tq, &tx_ring->enqueue_task, NULL)) taskqueue_drain(tx_ring->enqueue_tq, &tx_ring->enqueue_task); taskqueue_free(tx_ring->enqueue_tq); /* Flush buffer ring, */ drbr_flush(adapter->ifp, tx_ring->br); /* Free buffer DMA maps, */ + ENA_RING_MTX_LOCK(tx_ring); for (int i = 0; i < tx_ring->ring_size; i++) { m_freem(tx_ring->tx_buffer_info[i].mbuf); tx_ring->tx_buffer_info[i].mbuf = NULL; bus_dmamap_unload(adapter->tx_buf_tag, tx_ring->tx_buffer_info[i].map); bus_dmamap_destroy(adapter->tx_buf_tag, tx_ring->tx_buffer_info[i].map); } + ENA_RING_MTX_UNLOCK(tx_ring); /* And free allocated memory. */ ENA_MEM_FREE(adapter->ena_dev->dmadev, tx_ring->tx_buffer_info); tx_ring->tx_buffer_info = NULL; ENA_MEM_FREE(adapter->ena_dev->dmadev, tx_ring->free_tx_ids); tx_ring->free_tx_ids = NULL; } /** * ena_setup_all_tx_resources - allocate all queues Tx resources * @adapter: network interface device structure * * Returns 0 on success, otherwise on failure. **/ static int ena_setup_all_tx_resources(struct ena_adapter *adapter) { int i, rc; for (i = 0; i < adapter->num_queues; i++) { rc = ena_setup_tx_resources(adapter, i); if (!rc) continue; device_printf(adapter->pdev, "Allocation for Tx Queue %u failed\n", i); goto err_setup_tx; } return (0); err_setup_tx: /* Rewind the index freeing the rings as we go */ while (i--) ena_free_tx_resources(adapter, i); return (rc); } /** * ena_free_all_tx_resources - Free Tx Resources for All Queues * @adapter: network interface device structure * * Free all transmit software resources **/ static void ena_free_all_tx_resources(struct ena_adapter *adapter) { int i; for (i = 0; i < adapter->num_queues; i++) ena_free_tx_resources(adapter, i); return; } /** * ena_setup_rx_resources - allocate Rx resources (Descriptors) * @adapter: network interface device structure * @qid: queue index * * Returns 0 on success, otherwise on failure. **/ static int ena_setup_rx_resources(struct ena_adapter *adapter, unsigned int qid) { struct ena_que *que = &adapter->que[qid]; struct ena_ring *rx_ring = que->rx_ring; int size, err, i; #ifdef RSS cpuset_t cpu_mask; #endif size = sizeof(struct ena_rx_buffer) * rx_ring->ring_size; /* * Alloc extra element so in rx path * we can always prefetch rx_info + 1 */ size += sizeof(struct ena_rx_buffer); rx_ring->rx_buffer_info = ENA_MEM_ALLOC(adapter->ena_dev->dmadev, size); if (!rx_ring->rx_buffer_info) return (ENOMEM); /* Reset RX statistics. */ ena_reset_counters((counter_u64_t *)&rx_ring->rx_stats, sizeof(rx_ring->rx_stats)); rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; /* ... and create the buffer DMA maps */ for (i = 0; i < rx_ring->ring_size; i++) { err = bus_dmamap_create(adapter->rx_buf_tag, 0, &(rx_ring->rx_buffer_info[i].map)); if (err != 0) { device_printf(adapter->pdev, "Unable to create Rx DMA map for buffer %d\n", i); goto err_rx_dma; } } /* Create LRO for the ring */ if (adapter->ifp->if_capenable & IFCAP_LRO) { int err = tcp_lro_init(&rx_ring->lro); if (err) { device_printf(adapter->pdev, "LRO[%d] Initialization failed!\n", qid); } else { ena_trace(ENA_INFO, "RX Soft LRO[%d] Initialized\n", qid); rx_ring->lro.ifp = adapter->ifp; } } return (0); err_rx_dma: while (i--) { bus_dmamap_destroy(adapter->rx_buf_tag, rx_ring->rx_buffer_info[i].map); } ENA_MEM_FREE(adapter->ena_dev->dmadev, rx_ring->rx_buffer_info); rx_ring->rx_buffer_info = NULL; ena_trace(ENA_ALERT, "RX resource allocation fail"); return (ENOMEM); } /** * ena_free_rx_resources - Free Rx Resources * @adapter: network interface device structure * @qid: queue index * * Free all receive software resources **/ static void ena_free_rx_resources(struct ena_adapter *adapter, unsigned int qid) { struct ena_ring *rx_ring = &adapter->rx_ring[qid]; ena_trace(ENA_INFO, "%s qid %d\n", __func__, qid); /* Free buffer DMA maps, */ for (int i = 0; i < rx_ring->ring_size; i++) { m_freem(rx_ring->rx_buffer_info[i].mbuf); rx_ring->rx_buffer_info[i].mbuf = NULL; bus_dmamap_unload(adapter->rx_buf_tag, rx_ring->rx_buffer_info[i].map); bus_dmamap_destroy(adapter->rx_buf_tag, rx_ring->rx_buffer_info[i].map); } /* free LRO resources, */ tcp_lro_free(&rx_ring->lro); /* free allocated memory */ ENA_MEM_FREE(adapter->ena_dev->dmadev, rx_ring->rx_buffer_info); rx_ring->rx_buffer_info = NULL; return; } /** * ena_setup_all_rx_resources - allocate all queues Rx resources * @adapter: network interface device structure * * Returns 0 on success, otherwise on failure. **/ static int ena_setup_all_rx_resources(struct ena_adapter *adapter) { int i, rc = 0; for (i = 0; i < adapter->num_queues; i++) { rc = ena_setup_rx_resources(adapter, i); if (!rc) continue; device_printf(adapter->pdev, "Allocation for Rx Queue %u failed\n", i); goto err_setup_rx; } return (0); err_setup_rx: /* rewind the index freeing the rings as we go */ while (i--) ena_free_rx_resources(adapter, i); return (rc); } /** * ena_free_all_rx_resources - Free Rx resources for all queues * @adapter: network interface device structure * * Free all receive software resources **/ static void ena_free_all_rx_resources(struct ena_adapter *adapter) { int i; for (i = 0; i < adapter->num_queues; i++) ena_free_rx_resources(adapter, i); return; } static inline int ena_alloc_rx_mbuf(struct ena_adapter *adapter, struct ena_ring *rx_ring, struct ena_rx_buffer *rx_info) { struct ena_com_buf *ena_buf; bus_dma_segment_t segs[1]; int nsegs, error; /* if previous allocated frag is not used */ if (rx_info->mbuf != NULL) return (0); ENA_RING_MTX_LOCK(rx_ring); /* Get mbuf using UMA allocator */ rx_info->mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUM16BYTES); ENA_RING_MTX_UNLOCK(rx_ring); if (!rx_info->mbuf) { counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1); return (ENOMEM); } /* Set mbuf length*/ rx_info->mbuf->m_pkthdr.len = rx_info->mbuf->m_len = MJUM16BYTES; /* Map packets for DMA */ ena_trace(ENA_DBG | ENA_RSC | ENA_RXPTH, "Using tag %p for buffers' DMA mapping, mbuf %p len: %d", adapter->rx_buf_tag,rx_info->mbuf, rx_info->mbuf->m_len); error = bus_dmamap_load_mbuf_sg(adapter->rx_buf_tag, rx_info->map, rx_info->mbuf, segs, &nsegs, BUS_DMA_NOWAIT); if (error || (nsegs != 1)) { device_printf(adapter->pdev, "failed to map mbuf, error: %d, " "nsegs: %d\n", error, nsegs); counter_u64_add(rx_ring->rx_stats.dma_mapping_err, 1); goto exit; } bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map, BUS_DMASYNC_PREREAD); ena_buf = &rx_info->ena_buf; ena_buf->paddr = segs[0].ds_addr; ena_buf->len = MJUM16BYTES; ena_trace(ENA_DBG | ENA_RSC | ENA_RXPTH, "ALLOC RX BUF: mbuf %p, rx_info %p, len %d, paddr %#jx\n", rx_info->mbuf, rx_info,ena_buf->len, (uintmax_t)ena_buf->paddr); return (0); exit: m_freem(rx_info->mbuf); rx_info->mbuf = NULL; return (EFAULT); } static void ena_free_rx_mbuf(struct ena_adapter *adapter, struct ena_ring *rx_ring, struct ena_rx_buffer *rx_info) { if (!rx_info->mbuf) return; bus_dmamap_unload(adapter->rx_buf_tag, rx_info->map); m_freem(rx_info->mbuf); rx_info->mbuf = NULL; return; } /** * ena_refill_rx_bufs - Refills ring with descriptors * @rx_ring: the ring which we want to feed with free descriptors * @num: number of descriptors to refill * Refills the ring with newly allocated DMA-mapped mbufs for receiving **/ static int ena_refill_rx_bufs(struct ena_ring *rx_ring, uint32_t num) { struct ena_adapter *adapter = rx_ring->adapter; uint16_t next_to_use; uint32_t i; int rc; ena_trace(ENA_DBG | ENA_RXPTH | ENA_RSC, "refill qid: %d", rx_ring->qid); next_to_use = rx_ring->next_to_use; for (i = 0; i < num; i++) { ena_trace(ENA_DBG | ENA_RXPTH | ENA_RSC, "RX buffer - next to use: %d", next_to_use); struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[next_to_use]; rc = ena_alloc_rx_mbuf(adapter, rx_ring, rx_info); if (rc < 0) { device_printf(adapter->pdev, "failed to alloc buffer for rx queue\n"); break; } rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq, &rx_info->ena_buf, next_to_use); if (unlikely(rc)) { device_printf(adapter->pdev, "failed to add buffer for rx queue %d\n", rx_ring->qid); break; } next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use, rx_ring->ring_size); } if (i < num) { counter_u64_add(rx_ring->rx_stats.refil_partial, 1); device_printf(adapter->pdev, "refilled rx queue %d with %d pages only\n", rx_ring->qid, i); } if (i != 0) { wmb(); ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq); } rx_ring->next_to_use = next_to_use; return (i); } static void ena_free_rx_bufs(struct ena_adapter *adapter, unsigned int qid) { struct ena_ring *rx_ring = &adapter->rx_ring[qid]; unsigned int i; for (i = 0; i < rx_ring->ring_size; i++) { struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i]; if (rx_info->mbuf) ena_free_rx_mbuf(adapter, rx_ring, rx_info); } return; } /** * ena_refill_all_rx_bufs - allocate all queues Rx buffers * @adapter: network interface device structure * */ static void ena_refill_all_rx_bufs(struct ena_adapter *adapter) { struct ena_ring *rx_ring; int i, rc, bufs_num; for (i = 0; i < adapter->num_queues; i++) { rx_ring = &adapter->rx_ring[i]; bufs_num = rx_ring->ring_size - 1; rc = ena_refill_rx_bufs(rx_ring, bufs_num); if (unlikely(rc != bufs_num)) device_printf(adapter->pdev, "refilling Queue %d failed. allocated %d buffers" " from: %d\n", i, rc, bufs_num); } } static void ena_free_all_rx_bufs(struct ena_adapter *adapter) { int i; for (i = 0; i < adapter->num_queues; i++) ena_free_rx_bufs(adapter, i); return; } /** * ena_free_tx_bufs - Free Tx Buffers per Queue * @adapter: network interface device structure * @qid: queue index **/ static void ena_free_tx_bufs(struct ena_adapter *adapter, unsigned int qid) { struct ena_ring *tx_ring = &adapter->tx_ring[qid]; + ENA_RING_MTX_LOCK(tx_ring); for (int i = 0; i < tx_ring->ring_size; i++) { struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i]; if (tx_info->mbuf == NULL) continue; ena_trace(ENA_DBG | ENA_TXPTH | ENA_RSC, "free uncompleted Tx mbufs qid[%d] idx: 0x%x", qid, i); bus_dmamap_unload(adapter->tx_buf_tag, tx_info->map); m_free(tx_info->mbuf); tx_info->mbuf = NULL; } + ENA_RING_MTX_UNLOCK(tx_ring); return; } static void ena_free_all_tx_bufs(struct ena_adapter *adapter) { for (int i = 0; i < adapter->num_queues; i++) ena_free_tx_bufs(adapter, i); return; } static void ena_destroy_all_tx_queues(struct ena_adapter *adapter) { uint16_t ena_qid; int i; for (i = 0; i < adapter->num_queues; i++) { ena_qid = ENA_IO_TXQ_IDX(i); ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); } } static void ena_destroy_all_rx_queues(struct ena_adapter *adapter) { uint16_t ena_qid; int i; for (i = 0; i < adapter->num_queues; i++) { ena_qid = ENA_IO_RXQ_IDX(i); ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); } } static void ena_destroy_all_io_queues(struct ena_adapter *adapter) { ena_destroy_all_tx_queues(adapter); ena_destroy_all_rx_queues(adapter); } static int validate_tx_req_id(struct ena_ring *tx_ring, uint16_t req_id) { struct ena_tx_buffer *tx_info = NULL; if (likely(req_id < tx_ring->ring_size)) { tx_info = &tx_ring->tx_buffer_info[req_id]; if (tx_info->mbuf) return 0; } counter_u64_add(tx_ring->tx_stats.bad_req_id, 1); return (EFAULT); } static int ena_create_io_queues(struct ena_adapter *adapter) { struct ena_com_dev *ena_dev = adapter->ena_dev; struct ena_com_create_io_ctx ctx; struct ena_ring *ring; uint16_t ena_qid; uint32_t msix_vector; int rc, i; /* Create TX queues */ for (i = 0; i < adapter->num_queues; i++) { msix_vector = ENA_IO_IRQ_IDX(i); ena_qid = ENA_IO_TXQ_IDX(i); ctx.mem_queue_type = ena_dev->tx_mem_queue_type; ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX; ctx.queue_size = adapter->tx_ring_size; ctx.msix_vector = msix_vector; ctx.qid = ena_qid; rc = ena_com_create_io_queue(ena_dev, &ctx); if (rc) { device_printf(adapter->pdev, "Failed to create io TX queue #%d rc: %d\n", i, rc); goto err_tx; } ring = &adapter->tx_ring[i]; rc = ena_com_get_io_handlers(ena_dev, ena_qid, &ring->ena_com_io_sq, &ring->ena_com_io_cq); if (rc) { device_printf(adapter->pdev, "Failed to get TX queue handlers. TX queue num" " %d rc: %d\n", i, rc); ena_com_destroy_io_queue(ena_dev, ena_qid); goto err_tx; } } /* Create RX queues */ for (i = 0; i < adapter->num_queues; i++) { msix_vector = ENA_IO_IRQ_IDX(i); ena_qid = ENA_IO_RXQ_IDX(i); ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; ctx.queue_size = adapter->rx_ring_size; ctx.msix_vector = msix_vector; ctx.qid = ena_qid; rc = ena_com_create_io_queue(ena_dev, &ctx); if (rc) { device_printf(adapter->pdev, "Failed to create io RX queue[%d] rc: %d\n", i, rc); goto err_rx; } ring = &adapter->rx_ring[i]; rc = ena_com_get_io_handlers(ena_dev, ena_qid, &ring->ena_com_io_sq, &ring->ena_com_io_cq); if (rc) { device_printf(adapter->pdev, "Failed to get RX queue handlers. RX queue num" " %d rc: %d\n", i, rc); ena_com_destroy_io_queue(ena_dev, ena_qid); goto err_rx; } } return (0); err_rx: while (i--) ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i)); i = adapter->num_queues; err_tx: while (i--) ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i)); return (ENXIO); } /** * ena_tx_cleanup - clear sent packets and corresponding descriptors * @tx_ring: ring for which we want to clean packets * * Once packets are sent, we ask the device in a loop for no longer used * descriptors. We find the related mbuf chain in a map (index in an array) * and free it, then update ring state. * This is performed in "endless" loop, updating ring pointers every * TX_COMMIT. The first check of free descriptor is performed before the actual * loop, then repeated at the loop end. **/ static int ena_tx_cleanup(struct ena_ring *tx_ring) { struct ena_adapter *adapter; struct ena_com_io_cq* io_cq; uint16_t next_to_clean; uint16_t req_id; uint16_t ena_qid; unsigned int total_done = 0; int rc; int commit = TX_COMMIT; int budget = TX_BUDGET; int work_done; adapter = tx_ring->que->adapter; ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id); io_cq = &adapter->ena_dev->io_cq_queues[ena_qid]; next_to_clean = tx_ring->next_to_clean; do { struct ena_tx_buffer *tx_info; struct mbuf *mbuf; rc = ena_com_tx_comp_req_id_get(io_cq, &req_id); if (rc != 0) break; rc = validate_tx_req_id(tx_ring, req_id); if (rc) break; tx_info = &tx_ring->tx_buffer_info[req_id]; mbuf = tx_info->mbuf; tx_info->mbuf = NULL; bintime_clear(&tx_info->timestamp); if (tx_info->num_of_bufs != 0) { /* Map is no longer required */ bus_dmamap_unload(adapter->tx_buf_tag, tx_info->map); } m_freem(mbuf); total_done += tx_info->tx_descs; tx_ring->free_tx_ids[next_to_clean] = req_id; next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean, tx_ring->ring_size); if (--commit == 0) { commit = TX_COMMIT; /* update ring state every TX_COMMIT descriptor */ tx_ring->next_to_clean = next_to_clean; ena_com_comp_ack(&adapter->ena_dev->io_sq_queues[ena_qid], total_done); ena_com_update_dev_comp_head(io_cq); total_done = 0; } } while (--budget); work_done = TX_BUDGET - budget; /* If there is still something to commit update ring state */ if (commit != TX_COMMIT) { tx_ring->next_to_clean = next_to_clean; ena_com_comp_ack(&adapter->ena_dev->io_sq_queues[ena_qid], total_done); ena_com_update_dev_comp_head(io_cq); } taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task); return (work_done); } static void ena_rx_hash_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx, struct mbuf *mbuf) { struct ena_adapter *adapter = rx_ring->adapter; if (adapter->rss_support) { mbuf->m_pkthdr.flowid = ena_rx_ctx->hash; if (ena_rx_ctx->frag && ena_rx_ctx->l3_proto != ENA_ETH_IO_L4_PROTO_UNKNOWN) { M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH); return; } switch (ena_rx_ctx->l3_proto) { case ENA_ETH_IO_L3_PROTO_IPV4: switch (ena_rx_ctx->l4_proto) { case ENA_ETH_IO_L4_PROTO_TCP: M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4); break; case ENA_ETH_IO_L4_PROTO_UDP: M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4); break; default: M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4); } break; case ENA_ETH_IO_L3_PROTO_IPV6: switch (ena_rx_ctx->l4_proto) { case ENA_ETH_IO_L4_PROTO_TCP: M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6); break; case ENA_ETH_IO_L4_PROTO_UDP: M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6); break; default: M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6); } break; case ENA_ETH_IO_L3_PROTO_UNKNOWN: M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE); break; default: M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH); } } else { mbuf->m_pkthdr.flowid = rx_ring->qid; M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE); } } /** * ena_rx_mbuf - assemble mbuf from descriptors * @rx_ring: ring for which we want to clean packets * @ena_bufs: buffer info * @ena_rx_ctx: metadata for this packet(s) * @next_to_clean: ring pointer * **/ static struct mbuf* ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs, struct ena_com_rx_ctx *ena_rx_ctx, uint16_t *next_to_clean) { struct mbuf *mbuf; struct ena_rx_buffer *rx_info; struct ena_adapter *adapter; unsigned int len, buf = 0; unsigned int descs = ena_rx_ctx->descs; adapter = rx_ring->adapter; rx_info = &rx_ring->rx_buffer_info[*next_to_clean]; ENA_ASSERT(rx_info->mbuf, "Invalid alloc frag buffer\n"); len = ena_bufs[0].len; ena_trace(ENA_DBG | ENA_RXPTH, "rx_info %p, mbuf %p, paddr %jx", rx_info, rx_info->mbuf, (uintmax_t)rx_info->ena_buf.paddr); mbuf = rx_info->mbuf; mbuf->m_flags |= M_PKTHDR; mbuf->m_pkthdr.len = len; mbuf->m_len = len; mbuf->m_pkthdr.rcvif = rx_ring->que->adapter->ifp; /* Fill mbuf with hash key and it's interpretation for optimization */ ena_rx_hash_mbuf(rx_ring, ena_rx_ctx, mbuf); ena_trace(ENA_DBG | ENA_RXPTH, "rx mbuf 0x%p, flags=0x%x, len: %d", mbuf, mbuf->m_flags, mbuf->m_pkthdr.len); /* DMA address is not needed anymore, unmap it */ bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map); rx_info->mbuf = NULL; *next_to_clean = ENA_RX_RING_IDX_NEXT(*next_to_clean, rx_ring->ring_size); /* * While we have more than 1 descriptors for one rcvd packet, append * other mbufs to the main one */ while (--descs) { rx_info = &rx_ring->rx_buffer_info[*next_to_clean]; len = ena_bufs[++buf].len; if (!m_append(mbuf, len, rx_info->mbuf->m_data)) { counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1); ena_trace(ENA_WARNING, "Failed to append Rx mbuf %p", mbuf); } /* Free already appended mbuf, it won't be useful anymore */ bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map); m_freem(rx_info->mbuf); rx_info->mbuf = NULL; *next_to_clean = ENA_RX_RING_IDX_NEXT(*next_to_clean, rx_ring->ring_size); } return (mbuf); } /** * ena_rx_checksum - indicate in mbuf if hw indicated a good cksum **/ static inline void ena_rx_checksum(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx, struct mbuf *mbuf) { /* if IP and error */ if ((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) && (ena_rx_ctx->l3_csum_err)) { /* ipv4 checksum error */ mbuf->m_pkthdr.csum_flags = 0; counter_u64_add(rx_ring->rx_stats.bad_csum, 1); return; } /* if TCP/UDP */ if ((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) || (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)) { if (ena_rx_ctx->l4_csum_err) { /* TCP/UDP checksum error */ mbuf->m_pkthdr.csum_flags = 0; counter_u64_add(rx_ring->rx_stats.bad_csum, 1); } else { mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED; mbuf->m_pkthdr.csum_flags |= CSUM_IP_VALID; } } return; } /** * ena_rx_cleanup - handle rx irq * @arg: ring for which irq is being handled **/ static int ena_rx_cleanup(struct ena_ring *rx_ring) { struct ena_adapter *adapter; struct mbuf *mbuf; struct ena_com_rx_ctx ena_rx_ctx; struct ena_com_io_cq* io_cq; struct ena_com_io_sq* io_sq; /* struct ena_eth_io_intr_reg intr_reg; */ if_t ifp; uint16_t ena_qid; uint16_t next_to_clean; uint32_t refill_required; uint32_t refill_threshold; uint32_t do_if_input = 0; unsigned int qid; int rc; int budget = RX_BUDGET; adapter = rx_ring->que->adapter; ifp = adapter->ifp; qid = rx_ring->que->id; ena_qid = ENA_IO_RXQ_IDX(qid); io_cq = &adapter->ena_dev->io_cq_queues[ena_qid]; io_sq = &adapter->ena_dev->io_sq_queues[ena_qid]; next_to_clean = rx_ring->next_to_clean; do { ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; ena_rx_ctx.max_bufs = adapter->max_rx_sgl_size; ena_rx_ctx.descs = 0; rc = ena_com_rx_pkt(io_cq, io_sq, &ena_rx_ctx); if (unlikely(rc)) goto error; if (unlikely(ena_rx_ctx.descs == 0)) break; /* Receive mbuf from the ring */ mbuf = ena_rx_mbuf(rx_ring, rx_ring->ena_bufs, &ena_rx_ctx, &next_to_clean); /* Exit if we failed to retrieve a buffer */ if (unlikely(!mbuf)) { next_to_clean = ENA_RX_RING_IDX_ADD(next_to_clean, ena_rx_ctx.descs, rx_ring->ring_size); break; } ena_trace(ENA_DBG | ENA_RXPTH, "Rx: %d bytes", mbuf->m_pkthdr.len); if ((ifp->if_capenable & IFCAP_RXCSUM) || (ifp->if_capenable & IFCAP_RXCSUM_IPV6)) { ena_rx_checksum(rx_ring, &ena_rx_ctx, mbuf); } counter_u64_add(rx_ring->rx_stats.bytes, mbuf->m_pkthdr.len); /* * LRO is only for IP/TCP packets and TCP checksum of the packet * should be computed by hardware. */ do_if_input = 1; if ((ifp->if_capenable & IFCAP_LRO) && (mbuf->m_pkthdr.csum_flags & CSUM_IP_VALID) && ena_rx_ctx.l4_proto == ENA_ETH_IO_L4_PROTO_TCP) { /* * Send to the stack if: * - LRO not enabled, or * - no LRO resources, or * - lro enqueue fails */ if (rx_ring->lro.lro_cnt != 0 && tcp_lro_rx(&rx_ring->lro, mbuf, 0) == 0) do_if_input = 0; } if (do_if_input) { ena_trace(ENA_DBG | ENA_RXPTH, "calling if_input() with mbuf %p", mbuf); (*ifp->if_input)(ifp, mbuf); } counter_u64_add(rx_ring->rx_stats.cnt, 1); } while (--budget); rx_ring->next_to_clean = next_to_clean; refill_required = ena_com_sq_empty_space(io_sq); refill_threshold = rx_ring->ring_size / ENA_RX_REFILL_THRESH_DEVIDER; if (refill_required > refill_threshold) { ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq); ena_refill_rx_bufs(rx_ring, refill_required); } tcp_lro_flush_all(&rx_ring->lro); return (RX_BUDGET - budget); error: counter_u64_add(rx_ring->rx_stats.bad_desc_num, 1); return (RX_BUDGET - budget); } /********************************************************************* * * MSIX & Interrupt Service routine * **********************************************************************/ /** * ena_handle_msix - MSIX Interrupt Handler for admin/async queue * @arg: interrupt number **/ static void ena_intr_msix_mgmnt(void *arg) { struct ena_adapter *adapter = (struct ena_adapter *)arg; ena_com_admin_q_comp_intr_handler(adapter->ena_dev); if (likely(adapter->running)) ena_com_aenq_intr_handler(adapter->ena_dev, arg); } /** * ena_handle_msix - MSIX Interrupt Handler for Tx/Rx * @arg: interrupt number **/ static void ena_handle_msix(void *arg) { struct ena_que *que = arg; struct ena_adapter *adapter = que->adapter; if_t ifp = adapter->ifp; struct ena_ring *tx_ring; struct ena_ring *rx_ring; struct ena_com_io_cq* io_cq; struct ena_eth_io_intr_reg intr_reg; int qid, ena_qid; int txc, rxc, i; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; ena_trace(ENA_DBG, "MSI-X TX/RX routine"); tx_ring = que->tx_ring; rx_ring = que->rx_ring; qid = que->id; ena_qid = ENA_IO_TXQ_IDX(qid); io_cq = &adapter->ena_dev->io_cq_queues[ena_qid]; for (i = 0; i < CLEAN_BUDGET; ++i) { rxc = ena_rx_cleanup(rx_ring); /* Protection from calling ena_tx_cleanup from ena_start_xmit */ ENA_RING_MTX_LOCK(tx_ring); txc = ena_tx_cleanup(tx_ring); ENA_RING_MTX_UNLOCK(tx_ring); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; if (txc != TX_BUDGET && rxc != RX_BUDGET) break; } /* Signal that work is done and unmask interrupt */ ena_com_update_intr_reg(&intr_reg, RX_IRQ_INTERVAL, TX_IRQ_INTERVAL, true); ena_com_unmask_intr(io_cq, &intr_reg); } static int ena_enable_msix(struct ena_adapter *adapter) { device_t dev = adapter->pdev; int i, msix_vecs, rc = 0; /* Reserved the max msix vectors we might need */ msix_vecs = ENA_MAX_MSIX_VEC(adapter->num_queues); adapter->msix_entries = ENA_MEM_ALLOC(adapter->ena_dev->dmadev, msix_vecs * sizeof(struct msix_entry)); if (!adapter->msix_entries) { device_printf(dev, "Failed to allocate msix_entries, vectors %d\n", msix_vecs); rc = ENOMEM; goto error; } device_printf(dev, "Allocated msix_entries, vectors (cnt: %d)\n", msix_vecs); for (i = 0; i < msix_vecs; i++) { adapter->msix_entries[i].entry = i; /* Vectors must start from 1 */ adapter->msix_entries[i].vector = i + 1; } rc = pci_alloc_msix(dev, &msix_vecs); if (rc != 0) { device_printf(dev, "Failed to enable MSIX, vectors %d rc %d\n", msix_vecs, rc); ENA_MEM_FREE(adapter->ena_dev->dmadev, adapter->msix_entries); adapter->msix_entries = NULL; rc = ENOSPC; goto error; } adapter->msix_vecs = msix_vecs; adapter->msix_enabled = true; error: return (rc); } static void ena_setup_mgmnt_intr(struct ena_adapter *adapter) { snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name, ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s", device_get_nameunit(adapter->pdev)); /* * Handler is NULL on purpose, it will be set * when mgmnt interrupt is acquired */ adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler = NULL; adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter; adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector = adapter->msix_entries[ENA_MGMNT_IRQ_IDX].vector; return; } static void ena_setup_io_intr(struct ena_adapter *adapter) { static int last_bind_cpu = -1; int irq_idx; ena_trace(ENA_DBG, "enter"); for (int i = 0; i < adapter->num_queues; i++) { irq_idx = ENA_IO_IRQ_IDX(i); snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE, "%s-TxRx-%d", device_get_nameunit(adapter->pdev), i); adapter->irq_tbl[irq_idx].handler = ena_handle_msix; adapter->irq_tbl[irq_idx].data = &adapter->que[i]; adapter->irq_tbl[irq_idx].vector = adapter->msix_entries[irq_idx].vector; ena_trace(ENA_INFO | ENA_IOQ, "ena_setup_io_intr vector: %d\n", adapter->msix_entries[irq_idx].vector); #ifdef RSS adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu = rss_getcpu(i % rss_getnumbuckets()); #else /* * We still want to bind rings to the corresponding cpu * using something similar to the RSS round-robin technique. */ if (last_bind_cpu < 0) last_bind_cpu = CPU_FIRST(); adapter->que[i].cpu = adapter->irq_tbl[irq_idx].cpu = last_bind_cpu; last_bind_cpu = CPU_NEXT(last_bind_cpu); #endif } return; } static int ena_request_mgmnt_irq(struct ena_adapter *adapter) { struct ena_irq *irq; unsigned long flags; int rc, rcc; flags = RF_ACTIVE | RF_SHAREABLE; irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX]; irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ, &irq->vector, flags); if (irq->res == NULL) { device_printf(adapter->pdev, "could not allocate " "irq vector: %d\n", irq->vector); rc = ENXIO; goto exit_res; } if ((rc = bus_activate_resource(adapter->pdev, SYS_RES_IRQ, irq->vector, irq->res)) != 0) { device_printf(adapter->pdev, "could not activate " "irq vector: %d\n", irq->vector); goto exit_intr; } if ((rc = bus_setup_intr(adapter->pdev, irq->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ena_intr_msix_mgmnt, irq->data, &irq->cookie)) != 0) { device_printf(adapter->pdev, "failed to register " "interrupt handler for irq %ju: %d\n", rman_get_start(irq->res), rc); goto exit_intr; } irq->requested = true; return (rc); exit_intr: device_printf(adapter->pdev, "exit_intr: releasing resource" " for irq %d\n", irq->vector); rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ, irq->vector, irq->res); if (rcc) device_printf(adapter->pdev, "dev has no parent while " "releasing res for irq: %d\n", irq->vector); irq->res = NULL; exit_res: return (rc); } static int ena_request_io_irq(struct ena_adapter *adapter) { struct ena_irq *irq; unsigned long flags = 0; int rc = 0, i, rcc; if (!adapter->msix_enabled) { device_printf(adapter->pdev, "failed to request irq\n"); return (EINVAL); } else { flags = RF_ACTIVE | RF_SHAREABLE; } for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) { irq = &adapter->irq_tbl[i]; if (irq->requested) continue; irq->res = bus_alloc_resource_any(adapter->pdev, SYS_RES_IRQ, &irq->vector, flags); if (irq->res == NULL) { device_printf(adapter->pdev, "could not allocate " "irq vector: %d\n", irq->vector); goto err; } if ((rc = bus_setup_intr(adapter->pdev, irq->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, irq->handler, irq->data, &irq->cookie)) != 0) { device_printf(adapter->pdev, "failed to register " "interrupt handler for irq %ju: %d\n", rman_get_start(irq->res), rc); goto err; } irq->requested = true; #ifdef RSS device_printf(adapter->pdev, "queue %d - RSS bucket %d\n", i - ENA_IO_IRQ_FIRST_IDX, irq->cpu); #else device_printf(adapter->pdev, "queue %d - cpu %d\n", i - ENA_IO_IRQ_FIRST_IDX, irq->cpu); #endif } return (rc); err: for (; i >= ENA_IO_IRQ_FIRST_IDX; i--) { irq = &adapter->irq_tbl[i]; rcc = 0; /* Once we entered err: section and irq->requested is true we free both intr and resources */ if (irq->requested == true) rcc = bus_teardown_intr(adapter->pdev, irq->res, irq->cookie); if (rcc) device_printf(adapter->pdev, "could not release" " irq: %d, error: %d\n", irq->vector, rcc); /* If we entred err: section without irq->requested set we know it was bus_alloc_resource_any() that needs cleanup, provided res is not NULL. In case res is NULL no work in needed in this iteration */ rcc = 0; if (irq->res != NULL) { rcc = bus_release_resource(adapter->pdev, SYS_RES_IRQ, irq->vector, irq->res); } if (rcc) device_printf(adapter->pdev, "dev has no parent while " "releasing res for irq: %d\n", irq->vector); irq->requested = false; irq->res = NULL; } return (rc); } static void ena_free_mgmnt_irq(struct ena_adapter *adapter) { struct ena_irq *irq; int rc; irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX]; if (irq->requested) { ena_trace(ENA_INFO | ENA_ADMQ, "tear down irq: %d\n", irq->vector); rc = bus_teardown_intr(adapter->pdev, irq->res, irq->cookie); if (rc) device_printf(adapter->pdev, "failed to tear " "down irq: %d\n", irq->vector); irq->requested = 0; } if (irq->res != NULL) { ena_trace(ENA_INFO | ENA_ADMQ, "release resource irq: %d\n", irq->vector); rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ, irq->vector, irq->res); irq->res = NULL; if (rc) device_printf(adapter->pdev, "dev has no parent while " "releasing res for irq: %d\n", irq->vector); } return; } static void ena_free_io_irq(struct ena_adapter *adapter) { struct ena_irq *irq; int rc; for (int i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) { irq = &adapter->irq_tbl[i]; if (irq->requested) { ena_trace(ENA_INFO | ENA_IOQ, "tear down irq: %d\n", irq->vector); rc = bus_teardown_intr(adapter->pdev, irq->res, irq->cookie); if (rc) { device_printf(adapter->pdev, "failed to tear " "down irq: %d\n", irq->vector); } irq->requested = 0; } if (irq->res != NULL) { ena_trace(ENA_INFO | ENA_IOQ, "release resource irq: %d\n", irq->vector); rc = bus_release_resource(adapter->pdev, SYS_RES_IRQ, irq->vector, irq->res); irq->res = NULL; if (rc) { device_printf(adapter->pdev, "dev has no parent" " while releasing res for irq: %d\n", irq->vector); } } } return; } static void ena_free_irqs(struct ena_adapter* adapter) { ena_free_io_irq(adapter); ena_free_mgmnt_irq(adapter); ena_disable_msix(adapter); } static void ena_disable_msix(struct ena_adapter *adapter) { pci_release_msi(adapter->pdev); adapter->msix_vecs = 0; ENA_MEM_FREE(adapter->ena_dev->dmadev, adapter->msix_entries); adapter->msix_entries = NULL; } static void ena_unmask_all_io_irqs(struct ena_adapter *adapter) { struct ena_com_io_cq* io_cq; struct ena_eth_io_intr_reg intr_reg; uint16_t ena_qid; int i; /* Unmask interrupts for all queues */ for (i = 0; i < adapter->num_queues; i++) { ena_qid = ENA_IO_TXQ_IDX(i); io_cq = &adapter->ena_dev->io_cq_queues[ena_qid]; ena_com_update_intr_reg(&intr_reg, 0, 0, true); ena_com_unmask_intr(io_cq, &intr_reg); } } /* Configure the Rx forwarding */ static int ena_rss_configure(struct ena_adapter *adapter) { struct ena_com_dev *ena_dev = adapter->ena_dev; int rc; /* Set indirect table */ rc = ena_com_indirect_table_set(ena_dev); if (unlikely(rc && rc != EPERM)) return rc; /* Configure hash function (if supported) */ rc = ena_com_set_hash_function(ena_dev); if (unlikely(rc && (rc != EPERM))) return rc; /* Configure hash inputs (if supported) */ rc = ena_com_set_hash_ctrl(ena_dev); if (unlikely(rc && (rc != EPERM))) return rc; return 0; } +static void +ena_update_hw_stats(void *arg, int pending) +{ + struct ena_adapter *adapter = arg; + int rc; + + for (;;) { + if (!adapter->up) + return; + + rc = ena_update_stats_counters(adapter); + if (rc) + ena_trace(ENA_WARNING, + "Error updating stats counters, rc = %d", rc); + + pause("ena update hw stats", hz); + } +} + static int ena_up_complete(struct ena_adapter *adapter) { int rc; if (adapter->rss_support) { rc = ena_rss_configure(adapter); if (rc) return (rc); } ena_change_mtu(adapter->ifp, adapter->ifp->if_mtu); ena_refill_all_rx_bufs(adapter); ena_unmask_all_io_irqs(adapter); return (0); } static int ena_up(struct ena_adapter *adapter) { int rc = 0; if (!device_is_attached(adapter->pdev)) { device_printf(adapter->pdev, "device is not attached!\n"); return (ENXIO); } if (!adapter->running) { device_printf(adapter->pdev, "device is not running!\n"); return (ENXIO); } if (!adapter->up) { device_printf(adapter->pdev, "device is going UP\n"); /* setup interrupts for IO queues */ ena_setup_io_intr(adapter); rc = ena_request_io_irq(adapter); if (rc) { ena_trace(ENA_ALERT, "err_req_irq"); goto err_req_irq; } /* allocate transmit descriptors */ rc = ena_setup_all_tx_resources(adapter); if (rc) { ena_trace(ENA_ALERT, "err_setup_tx"); goto err_setup_tx; } /* allocate receive descriptors */ rc = ena_setup_all_rx_resources(adapter); if (rc) { ena_trace(ENA_ALERT, "err_setup_rx"); goto err_setup_rx; } /* create IO queues for Rx & Tx */ rc = ena_create_io_queues(adapter); if (rc) { ena_trace(ENA_ALERT, "create IO queues failed"); goto err_io_que; } if (adapter->link_status) if_link_state_change(adapter->ifp, LINK_STATE_UP); rc = ena_up_complete(adapter); if (rc) goto err_up_complete; counter_u64_add(adapter->dev_stats.interface_up, 1); ena_update_hwassist(adapter); if_setdrvflagbits(adapter->ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); callout_reset_sbt(&adapter->timer_service, SBT_1S, SBT_1S, ena_timer_service, (void *)adapter, 0); + taskqueue_enqueue(adapter->stats_tq, &adapter->stats_task); + adapter->up = true; } return (0); err_up_complete: ena_destroy_all_io_queues(adapter); err_io_que: ena_free_all_rx_resources(adapter); err_setup_rx: ena_free_all_tx_resources(adapter); err_setup_tx: ena_free_io_irq(adapter); err_req_irq: return (rc); } int ena_update_stats_counters(struct ena_adapter *adapter) { struct ena_admin_basic_stats ena_stats; struct ena_hw_stats *stats = &adapter->hw_stats; int rc = 0; if (!adapter->up) return (rc); rc = ena_com_get_dev_basic_stats(adapter->ena_dev, &ena_stats); if (rc) return (rc); stats->tx_bytes = ((uint64_t)ena_stats.tx_bytes_high << 32) | ena_stats.tx_bytes_low; stats->rx_bytes = ((uint64_t)ena_stats.rx_bytes_high << 32) | ena_stats.rx_bytes_low; stats->rx_packets = ((uint64_t)ena_stats.rx_pkts_high << 32) | ena_stats.rx_pkts_low; stats->tx_packets = ((uint64_t)ena_stats.tx_pkts_high << 32) | ena_stats.tx_pkts_low; stats->rx_drops = ((uint64_t)ena_stats.rx_drops_high << 32) | ena_stats.rx_drops_low; return (0); } static uint64_t ena_get_counter(if_t ifp, ift_counter cnt) { struct ena_adapter *adapter; struct ena_hw_stats *stats; - int rc; adapter = if_getsoftc(ifp); - - /* - * Update only when asking for first counter and interface is up. - * Usually asks for all statistics in sequence. - */ - if (adapter->up) { - if (cnt == 0) { - rc = ena_update_stats_counters(adapter); - if (rc) { - ena_trace(ENA_WARNING, - "Error updating stats counters, rc = %d", - rc); - } - } - } stats = &adapter->hw_stats; switch (cnt) { case IFCOUNTER_IPACKETS: return (stats->rx_packets); case IFCOUNTER_OPACKETS: return (stats->tx_packets); case IFCOUNTER_IBYTES: return (stats->rx_bytes); case IFCOUNTER_OBYTES: return (stats->tx_bytes); case IFCOUNTER_IQDROPS: return (stats->rx_drops); default: return (if_get_counter_default(ifp, cnt)); } } static int ena_media_change(if_t ifp) { /* Media Change is not supported by firmware */ return (0); } static void ena_media_status(if_t ifp, struct ifmediareq *ifmr) { struct ena_adapter *adapter = if_getsoftc(ifp); ena_trace(ENA_DBG, "enter"); ENA_DEV_LOCK; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_status) { ENA_DEV_UNLOCK; ena_trace(ENA_WARNING, "link_status = false"); return; } ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= IFM_10G_T | IFM_FDX; ENA_DEV_UNLOCK; return; } static void ena_init(void *arg) { struct ena_adapter *adapter = (struct ena_adapter *)arg; if (adapter->up == false) ena_up(adapter); return; } static int ena_ioctl(if_t ifp, u_long command, caddr_t data) { struct ena_adapter *adapter; struct ifreq *ifr; int rc; adapter = ifp->if_softc; ifr = (struct ifreq *)data; /* * Acquiring lock to prevent from running up and down routines parallel. */ - sx_xlock(&adapter->ioctl_sx); - rc = 0; switch (command) { case SIOCSIFMTU: + sx_xlock(&adapter->ioctl_sx); ena_down(adapter); ena_change_mtu(ifp, ifr->ifr_mtu); rc = ena_up(adapter); + sx_unlock(&adapter->ioctl_sx); break; case SIOCSIFFLAGS: if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if (ifp->if_flags & (IFF_PROMISC | IFF_ALLMULTI)) { device_printf(adapter->pdev, "ioctl promisc/allmulti\n"); } } else { + sx_xlock(&adapter->ioctl_sx); rc = ena_up(adapter); + sx_unlock(&adapter->ioctl_sx); } } else { - if (ifp->if_drv_flags & IFF_DRV_RUNNING) + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + sx_xlock(&adapter->ioctl_sx); ena_down(adapter); + sx_unlock(&adapter->ioctl_sx); + } } break; case SIOCADDMULTI: case SIOCDELMULTI: break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: rc = ifmedia_ioctl(ifp, ifr, &adapter->media, command); break; case SIOCSIFCAP: { int reinit = 0; if (ifr->ifr_reqcap != ifp->if_capenable) { ifp->if_capenable = ifr->ifr_reqcap; reinit = 1; } if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { + sx_xlock(&adapter->ioctl_sx); ena_down(adapter); rc = ena_up(adapter); + sx_unlock(&adapter->ioctl_sx); } } break; default: rc = ether_ioctl(ifp, command, data); break; } - sx_unlock(&adapter->ioctl_sx); - return (rc); } static int ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *feat) { int caps = 0; if (feat->offload.tx & (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK | ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK | ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK)) caps |= IFCAP_TXCSUM; if (feat->offload.tx & (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK | ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)) caps |= IFCAP_TXCSUM_IPV6; if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) caps |= IFCAP_TSO4; if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK) caps |= IFCAP_TSO6; if (feat->offload.rx_supported & (ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK | ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK)) caps |= IFCAP_RXCSUM; if (feat->offload.rx_supported & ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) caps |= IFCAP_RXCSUM_IPV6; caps |= IFCAP_LRO | IFCAP_JUMBO_MTU; return (caps); } static void ena_update_host_info(struct ena_admin_host_info *host_info, if_t ifp) { host_info->supported_network_features[0] = (uint32_t)if_getcapabilities(ifp); } static void ena_update_hwassist(struct ena_adapter *adapter) { if_t ifp = adapter->ifp; uint32_t feat = adapter->tx_offload_cap; int cap = if_getcapenable(ifp); int flags = 0; if_clearhwassist(ifp); if (cap & IFCAP_TXCSUM) { if (feat & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK) flags |= CSUM_IP; if (feat & (ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK | ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)) flags |= CSUM_IP_UDP | CSUM_IP_TCP; } if (cap & IFCAP_TXCSUM_IPV6) flags |= CSUM_IP6_UDP | CSUM_IP6_TCP; if (cap & IFCAP_TSO4) flags |= CSUM_IP_TSO; if (cap & IFCAP_TSO6) flags |= CSUM_IP6_TSO; if_sethwassistbits(ifp, flags, 0); } static int ena_setup_ifnet(device_t pdev, struct ena_adapter *adapter, struct ena_com_dev_get_features_ctx *feat) { if_t ifp; int caps = 0; ena_trace(ENA_DBG, "enter"); ifp = adapter->ifp = if_gethandle(IFT_ETHER); if (ifp == 0) { device_printf(pdev, "can not allocate ifnet structure\n"); return (ENXIO); } if_initname(ifp, device_get_name(pdev), device_get_unit(pdev)); if_setdev(ifp, pdev); if_setsoftc(ifp, adapter); if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); if_setinitfn(ifp, ena_init); if_settransmitfn(ifp, ena_mq_start); if_setqflushfn(ifp, ena_qflush); if_setioctlfn(ifp, ena_ioctl); if_setgetcounterfn(ifp, ena_get_counter); if_setsendqlen(ifp, adapter->tx_ring_size); if_setsendqready(ifp); if_setmtu(ifp, ETHERMTU); if_setbaudrate(ifp, 0); /* Zeroize capabilities... */ if_setcapabilities(ifp, 0); if_setcapenable(ifp, 0); /* check hardware support */ caps = ena_get_dev_offloads(feat); /* ... and set them */ if_setcapabilitiesbit(ifp, caps, 0); /* TSO parameters */ ifp->if_hw_tsomax = ENA_TSO_MAXSIZE; ifp->if_hw_tsomaxsegcount = ENA_TSO_NSEGS; ifp->if_hw_tsomaxsegsize = MCLBYTES; if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); if_setcapenable(ifp, if_getcapabilities(ifp)); /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&adapter->media, IFM_IMASK, ena_media_change, ena_media_status); ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); ether_ifattach(ifp, adapter->mac_addr); return (0); } static void ena_down(struct ena_adapter *adapter) { if (adapter->up) { device_printf(adapter->pdev, "device is going DOWN\n"); callout_drain(&adapter->timer_service); adapter->up = false; if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); + /* Drain task responsible for updating hw stats */ + while (taskqueue_cancel(adapter->stats_tq, &adapter->stats_task, NULL)) + taskqueue_drain(adapter->stats_tq, &adapter->stats_task); + ena_free_io_irq(adapter); ena_destroy_all_io_queues(adapter); ena_free_all_tx_bufs(adapter); ena_free_all_rx_bufs(adapter); ena_free_all_tx_resources(adapter); ena_free_all_rx_resources(adapter); counter_u64_add(adapter->dev_stats.interface_down, 1); } return; } static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct mbuf *mbuf) { struct ena_com_tx_meta *ena_meta; struct ether_vlan_header *eh; u32 mss; bool offload; uint16_t etype; int ehdrlen; struct ip *ip; int iphlen; struct tcphdr *th; offload = false; ena_meta = &ena_tx_ctx->ena_meta; mss = mbuf->m_pkthdr.tso_segsz; if (mss != 0) offload = true; if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0) offload = true; if ((mbuf->m_pkthdr.csum_flags & CSUM_OFFLOAD) != 0) offload = true; if (offload == false) { ena_tx_ctx->meta_valid = 0; return; } /* Determine where frame payload starts. */ eh = mtod(mbuf, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { etype = ntohs(eh->evl_proto); ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { etype = ntohs(eh->evl_encap_proto); ehdrlen = ETHER_HDR_LEN; } ip = (struct ip *)(mbuf->m_data + ehdrlen); iphlen = ip->ip_hl << 2; th = (struct tcphdr *)((caddr_t)ip + iphlen); if ((mbuf->m_pkthdr.csum_flags & CSUM_IP) != 0) { ena_tx_ctx->l3_csum_enable = 1; } if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0) { ena_tx_ctx->tso_enable = 1; ena_meta->l4_hdr_len = (th->th_off); } switch (etype) { case ETHERTYPE_IP: ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4; if (ip->ip_off == 0) ena_tx_ctx->df = 1; break; case ETHERTYPE_IPV6: ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6; default: break; } if (ip->ip_p == IPPROTO_TCP) { ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP; if (mbuf->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) ena_tx_ctx->l4_csum_enable = 1; else ena_tx_ctx->l4_csum_enable = 0; } else if (ip->ip_p == IPPROTO_UDP) { ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP; if (mbuf->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) ena_tx_ctx->l4_csum_enable = 1; else ena_tx_ctx->l4_csum_enable = 0; } else { ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UNKNOWN; ena_tx_ctx->l4_csum_enable = 0; } ena_meta->mss = mss; ena_meta->l3_hdr_len = iphlen; ena_meta->l3_hdr_offset = ehdrlen; ena_tx_ctx->meta_valid = 1; } static int -ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf *mbuf) +ena_check_and_defragment_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf) { struct ena_adapter *adapter; + struct mbuf *defrag_mbuf; + int num_frags; + + adapter = tx_ring->adapter; + num_frags = ena_mbuf_count(*mbuf); + + /* One segment must be reserved for configuration descriptor. */ + if (num_frags < adapter->max_tx_sgl_size) + return (0); + counter_u64_add(tx_ring->tx_stats.defragment, 1); + + defrag_mbuf = m_defrag(*mbuf, M_NOWAIT); + if (defrag_mbuf == NULL) { + counter_u64_add(tx_ring->tx_stats.defragment_err, 1); + return (ENOMEM); + } + + /* If mbuf was defragmented succesfully, original mbuf is released. */ + *mbuf = defrag_mbuf; + + return (0); +} + +static int +ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf) +{ + struct ena_adapter *adapter; struct ena_tx_buffer *tx_info; struct ena_com_tx_ctx ena_tx_ctx; struct ena_com_dev *ena_dev; struct ena_com_buf *ena_buf; struct ena_com_io_sq* io_sq; bus_dma_segment_t segs[ENA_BUS_DMA_SEGS]; void *push_hdr; uint16_t next_to_use; uint16_t req_id; uint16_t push_len; uint16_t ena_qid; uint32_t len, nsegs, header_len; int i, rc; - int nb_hw_desc, num_frags; + int nb_hw_desc; ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id); adapter = tx_ring->que->adapter; ena_dev = adapter->ena_dev; io_sq = &adapter->ena_dev->io_sq_queues[ena_qid]; - ENA_ASSERT(mbuf, "mbuf is NULL\n"); + ENA_ASSERT(*mbuf, "mbuf is NULL\n"); - num_frags = ena_mbuf_count(mbuf); - if (num_frags > (adapter->max_tx_sgl_size - 2)) { - device_printf(adapter->pdev, - "too many fragments. Last fragment: %d!\n", num_frags); - return (ENA_COM_INVAL); + rc = ena_check_and_defragment_mbuf(tx_ring, mbuf); + if (rc) { + ena_trace(ENA_WARNING, + "Failed to defragment mbuf! err: %d", rc); + return (rc); } next_to_use = tx_ring->next_to_use; req_id = tx_ring->free_tx_ids[next_to_use]; tx_info = &tx_ring->tx_buffer_info[req_id]; - tx_info->mbuf = mbuf; + tx_info->mbuf = *mbuf; tx_info->num_of_bufs = 0; ena_buf = tx_info->bufs; - len = mbuf->m_len; + len = (*mbuf)->m_len; - ena_trace(ENA_DBG | ENA_TXPTH, "Tx: %d bytes", mbuf->m_pkthdr.len); + ena_trace(ENA_DBG | ENA_TXPTH, "Tx: %d bytes", (*mbuf)->m_pkthdr.len); push_len = 0; header_len = min_t(uint32_t, len, tx_ring->tx_max_header_size); push_hdr = NULL; rc = bus_dmamap_load_mbuf_sg(adapter->tx_buf_tag, tx_info->map, - mbuf, segs, &nsegs, BUS_DMA_NOWAIT); + *mbuf, segs, &nsegs, BUS_DMA_NOWAIT); if (rc || (nsegs == 0)) { ena_trace(ENA_WARNING, "dmamap load failed! err: %d nsegs: %d", rc, nsegs); counter_u64_add(tx_ring->tx_stats.dma_mapping_err, 1); tx_info->mbuf = NULL; if (rc == ENOMEM) return (ENA_COM_NO_MEM); else return (ENA_COM_INVAL); } for (i = 0; i < nsegs; i++) { ena_buf->len = segs[i].ds_len; ena_buf->paddr = segs[i].ds_addr; ena_buf++; } tx_info->num_of_bufs = nsegs; memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx)); ena_tx_ctx.ena_bufs = tx_info->bufs; ena_tx_ctx.push_header = push_hdr; ena_tx_ctx.num_bufs = tx_info->num_of_bufs; ena_tx_ctx.req_id = req_id; ena_tx_ctx.header_len = header_len; /* Set flags and meta data */ - ena_tx_csum(&ena_tx_ctx, mbuf); + ena_tx_csum(&ena_tx_ctx, *mbuf); /* Prepare the packet's descriptors and send them to device */ rc = ena_com_prepare_tx(io_sq, &ena_tx_ctx, &nb_hw_desc); if (rc != 0) { ena_trace(ENA_WARNING, "failed to prepare tx bufs\n"); counter_enter(); counter_u64_add_protected(tx_ring->tx_stats.queue_stop, 1); counter_u64_add_protected(tx_ring->tx_stats.prepare_ctx_err, 1); counter_exit(); goto dma_error; } counter_enter(); counter_u64_add_protected(tx_ring->tx_stats.cnt, 1); - counter_u64_add_protected(tx_ring->tx_stats.bytes, mbuf->m_pkthdr.len); + counter_u64_add_protected(tx_ring->tx_stats.bytes, (*mbuf)->m_pkthdr.len); counter_exit(); tx_info->tx_descs = nb_hw_desc; getbinuptime(&tx_info->timestamp); tx_info->print_once = true; tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use, tx_ring->ring_size); bus_dmamap_sync(adapter->tx_buf_tag, tx_info->map, BUS_DMASYNC_PREWRITE); return (0); dma_error: tx_info->mbuf = NULL; bus_dmamap_unload(adapter->tx_buf_tag, tx_info->map); return (rc); } static void ena_start_xmit(struct ena_ring *tx_ring) { struct mbuf *mbuf; struct ena_adapter *adapter = tx_ring->adapter; struct ena_com_io_sq* io_sq; int ena_qid; int acum_pkts = 0; int ret = 0; if ((adapter->ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; if (!adapter->link_status) return; ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id); io_sq = &adapter->ena_dev->io_sq_queues[ena_qid]; while ((mbuf = drbr_peek(adapter->ifp, tx_ring->br)) != NULL) { ena_trace(ENA_DBG | ENA_TXPTH, "\ndequeued mbuf %p with flags %#x and" " header csum flags %#jx", mbuf, mbuf->m_flags, mbuf->m_pkthdr.csum_flags); if (ena_com_sq_empty_space(io_sq) < ENA_TX_CLEANUP_TRESHOLD) ena_tx_cleanup(tx_ring); - if ((ret = ena_xmit_mbuf(tx_ring, mbuf)) != 0) { + if ((ret = ena_xmit_mbuf(tx_ring, &mbuf)) != 0) { if (ret == ENA_COM_NO_MEM) { drbr_putback(adapter->ifp, tx_ring->br, mbuf); } else if (ret == ENA_COM_NO_SPACE) { drbr_putback(adapter->ifp, tx_ring->br, mbuf); } else { m_freem(mbuf); drbr_advance(adapter->ifp, tx_ring->br); } break; } if ((adapter->ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; drbr_advance(adapter->ifp, tx_ring->br); acum_pkts++; BPF_MTAP(adapter->ifp, mbuf); if (acum_pkts == DB_THRESHOLD) { acum_pkts = 0; wmb(); /* Trigger the dma engine */ ena_com_write_sq_doorbell(io_sq); counter_u64_add(tx_ring->tx_stats.doorbells, 1); } } if (acum_pkts) { wmb(); /* Trigger the dma engine */ ena_com_write_sq_doorbell(io_sq); counter_u64_add(tx_ring->tx_stats.doorbells, 1); } if (ena_com_sq_empty_space(io_sq) < ENA_TX_CLEANUP_TRESHOLD) ena_tx_cleanup(tx_ring); } static void ena_deferred_mq_start(void *arg, int pending) { struct ena_ring *tx_ring = (struct ena_ring *)arg; struct ifnet *ifp = tx_ring->adapter->ifp; while (drbr_empty(ifp, tx_ring->br) == FALSE && (ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) { ENA_RING_MTX_LOCK(tx_ring); ena_start_xmit(tx_ring); ENA_RING_MTX_UNLOCK(tx_ring); } } static int ena_mq_start(if_t ifp, struct mbuf *m) { struct ena_adapter *adapter = ifp->if_softc; struct ena_ring *tx_ring; int ret, is_drbr_empty; uint32_t i; if ((adapter->ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return (ENODEV); /* Which queue to use */ /* * If everything is setup correctly, it should be the * same bucket that the current CPU we're on is. * It should improve performance. */ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { #ifdef RSS if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m), &i) == 0) { i = i % adapter->num_queues; } else #endif { i = m->m_pkthdr.flowid % adapter->num_queues; } } else { i = curcpu % adapter->num_queues; } tx_ring = &adapter->tx_ring[i]; /* Check if drbr is empty before putting packet */ is_drbr_empty = drbr_empty(ifp, tx_ring->br); ret = drbr_enqueue(ifp, tx_ring->br, m); if (ret) { taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task); return (ret); } if (is_drbr_empty && ENA_RING_MTX_TRYLOCK(tx_ring)) { ena_start_xmit(tx_ring); ENA_RING_MTX_UNLOCK(tx_ring); } else { taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task); } return (0); } static void ena_qflush(if_t ifp) { struct ena_adapter *adapter = ifp->if_softc; struct ena_ring *tx_ring = adapter->tx_ring; int i; for(i = 0; i < adapter->num_queues; ++i, ++tx_ring) if (drbr_empty(ifp, tx_ring->br) == FALSE) { ENA_RING_MTX_LOCK(tx_ring); drbr_flush(ifp, tx_ring->br); ENA_RING_MTX_UNLOCK(tx_ring); } if_qflush(ifp); return; } static int ena_calc_io_queue_num(struct ena_adapter *adapter, struct ena_com_dev_get_features_ctx *get_feat_ctx) { int io_sq_num, io_cq_num, io_queue_num; io_sq_num = get_feat_ctx->max_queues.max_sq_num; io_cq_num = get_feat_ctx->max_queues.max_sq_num; io_queue_num = min_t(int, mp_ncpus, ENA_MAX_NUM_IO_QUEUES); io_queue_num = min_t(int, io_queue_num, io_sq_num); io_queue_num = min_t(int, io_queue_num, io_cq_num); /* 1 IRQ for for mgmnt and 1 IRQ for each TX/RX pair */ io_queue_num = min_t(int, io_queue_num, pci_msix_count(adapter->pdev) - 1); #ifdef RSS io_queue_num = min_t(int, io_queue_num, rss_getnumbuckets()); #endif return io_queue_num; } static int ena_calc_queue_size(struct ena_adapter *adapter, uint16_t *max_tx_sgl_size, uint16_t *max_rx_sgl_size, struct ena_com_dev_get_features_ctx *feat) { uint32_t queue_size = ENA_DEFAULT_RING_SIZE; uint32_t v; uint32_t q; queue_size = min_t(uint32_t, queue_size, feat->max_queues.max_cq_depth); queue_size = min_t(uint32_t, queue_size, feat->max_queues.max_sq_depth); /* round down to the nearest power of 2 */ v = queue_size; while (v != 0) { if (powerof2(queue_size)) break; v /= 2; q = rounddown2(queue_size, v); if (q != 0) { queue_size = q; break; } } if (unlikely(!queue_size)) { device_printf(adapter->pdev, "Invalid queue size\n"); return ENA_COM_FAULT; } *max_tx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS, feat->max_queues.max_packet_tx_descs); *max_rx_sgl_size = min_t(uint16_t, ENA_PKT_MAX_BUFS, feat->max_queues.max_packet_rx_descs); return queue_size; } static int ena_rss_init_default(struct ena_adapter *adapter) { struct ena_com_dev *ena_dev = adapter->ena_dev; device_t dev = adapter->pdev; int qid, rc, i; rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE); if (unlikely(rc)) { device_printf(dev, "Cannot init RSS\n"); goto err_rss_init; } for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) { #ifdef RSS qid = rss_get_indirection_to_bucket(i); qid = qid % adapter->num_queues; #else qid = i % adapter->num_queues; #endif rc = ena_com_indirect_table_fill_entry(ena_dev, i, ENA_IO_RXQ_IDX(qid)); if (unlikely(rc && (rc != EPERM))) { device_printf(dev, "Cannot fill indirect table\n"); goto err_fill_indir; } } rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_CRC32, NULL, ENA_HASH_KEY_SIZE, 0xFFFFFFFF); if (unlikely(rc && (rc != EPERM))) { device_printf(dev, "Cannot fill hash function\n"); goto err_fill_indir; } rc = ena_com_set_default_hash_ctrl(ena_dev); if (unlikely(rc && (rc != EPERM))) { device_printf(dev, "Cannot fill hash control\n"); goto err_fill_indir; } return (0); err_fill_indir: ena_com_rss_destroy(ena_dev); err_rss_init: return (rc); } static void ena_rss_init_default_deferred(void *arg) { struct ena_adapter *adapter; devclass_t dc; int max; int rc; dc = devclass_find("ena"); if (dc == NULL) { ena_trace(ENA_DBG, "No devclass ena\n"); return; } max = devclass_get_maxunit(dc); while (max-- >= 0) { adapter = devclass_get_softc(dc, max); if (adapter != NULL) { rc = ena_rss_init_default(adapter); adapter->rss_support = true; if (rc) { device_printf(adapter->pdev, "WARNING: RSS was not properly initialized," " it will affect bandwith\n"); adapter->rss_support = false; } } } } SYSINIT(ena_rss_init, SI_SUB_KICK_SCHEDULER, SI_ORDER_SECOND, ena_rss_init_default_deferred, NULL); static void ena_config_host_info(struct ena_com_dev *ena_dev) { struct ena_admin_host_info *host_info; int rc; /* Allocate only the host info */ rc = ena_com_allocate_host_info(ena_dev); if (rc) { ena_trace(ENA_ALERT, "Cannot allocate host info\n"); return; } host_info = ena_dev->host_attr.host_info; host_info->os_type = ENA_ADMIN_OS_FREEBSD; host_info->kernel_ver = osreldate; sprintf(host_info->kernel_ver_str, "%d", osreldate); host_info->os_dist = 0; strncpy(host_info->os_dist_str, osrelease, sizeof(host_info->os_dist_str) - 1); host_info->driver_version = (DRV_MODULE_VER_MAJOR) | (DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) | (DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT); rc = ena_com_set_host_attributes(ena_dev); if (rc) { if (rc == EPERM) ena_trace(ENA_WARNING, "Cannot set host attributes\n"); else ena_trace(ENA_ALERT, "Cannot set host attributes\n"); goto err; } return; err: ena_com_delete_host_info(ena_dev); } static int ena_device_init(struct ena_adapter *adapter, device_t pdev, struct ena_com_dev_get_features_ctx *get_feat_ctx, int *wd_active) { struct ena_com_dev* ena_dev = adapter->ena_dev; bool readless_supported; uint32_t aenq_groups; int dma_width; int rc; rc = ena_com_mmio_reg_read_request_init(ena_dev); if (rc) { device_printf(pdev, "failed to init mmio read less\n"); return rc; } /* * The PCIe configuration space revision id indicate if mmio reg * read is disabled */ readless_supported = !(pci_get_revid(pdev) & ENA_MMIO_DISABLE_REG_READ); ena_com_set_mmio_read_mode(ena_dev, readless_supported); rc = ena_com_dev_reset(ena_dev); if (rc) { device_printf(pdev, "Can not reset device\n"); goto err_mmio_read_less; } rc = ena_com_validate_version(ena_dev); if (rc) { device_printf(pdev, "device version is too low\n"); goto err_mmio_read_less; } dma_width = ena_com_get_dma_width(ena_dev); if (dma_width < 0) { device_printf(pdev, "Invalid dma width value %d", dma_width); rc = dma_width; goto err_mmio_read_less; } adapter->dma_width = dma_width; /* ENA admin level init */ rc = ena_com_admin_init(ena_dev, &aenq_handlers, true); if (rc) { device_printf(pdev, "Can not initialize ena admin queue with device\n"); goto err_mmio_read_less; } /* * To enable the msix interrupts the driver needs to know the number * of queues. So the driver uses polling mode to retrieve this * information */ ena_com_set_admin_polling_mode(ena_dev, true); ena_config_host_info(ena_dev); /* Get Device Attributes */ rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx); if (rc) { device_printf(pdev, "Cannot get attribute for ena device rc: %d\n", rc); goto err_admin_init; } aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) | BIT(ENA_ADMIN_FATAL_ERROR) | BIT(ENA_ADMIN_WARNING) | BIT(ENA_ADMIN_NOTIFICATION) | BIT(ENA_ADMIN_KEEP_ALIVE); aenq_groups &= get_feat_ctx->aenq.supported_groups; rc = ena_com_set_aenq_config(ena_dev, aenq_groups); if (rc) { device_printf(pdev, "Cannot configure aenq groups rc: %d\n", rc); goto err_admin_init; } *wd_active = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE)); return 0; err_admin_init: ena_com_delete_host_info(ena_dev); ena_com_admin_destroy(ena_dev); err_mmio_read_less: ena_com_mmio_reg_read_request_destroy(ena_dev); return rc; } static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter, int io_vectors) { struct ena_com_dev *ena_dev = adapter->ena_dev; int rc; rc = ena_enable_msix(adapter); if (rc) { device_printf(adapter->pdev, "Error with MSI-X enablement\n"); return rc; } ena_setup_mgmnt_intr(adapter); rc = ena_request_mgmnt_irq(adapter); if (rc) { device_printf(adapter->pdev, "Cannot setup mgmnt queue intr\n"); goto err_disable_msix; } ena_com_set_admin_polling_mode(ena_dev, false); ena_com_admin_aenq_enable(ena_dev); return 0; err_disable_msix: ena_disable_msix(adapter); return rc; } /* Function called on ENA_ADMIN_KEEP_ALIVE event */ static void ena_keep_alive_wd(void *adapter_data, struct ena_admin_aenq_entry *aenq_e) { struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; sbintime_t stime; stime = getsbinuptime(); atomic_store_rel_64(&adapter->keep_alive_timestamp, stime); } /* Check for keep alive expiration */ static void check_for_missing_keep_alive(struct ena_adapter *adapter) { sbintime_t timestamp, time; if (adapter->wd_active == 0) return; if (adapter->keep_alive_timeout == 0) return; timestamp = atomic_load_acq_64(&adapter->keep_alive_timestamp); time = getsbinuptime() - timestamp; if (unlikely(time > adapter->keep_alive_timeout)) { device_printf(adapter->pdev, "Keep alive watchdog timeout.\n"); counter_u64_add(adapter->dev_stats.wd_expired, 1); adapter->trigger_reset = true; } } /* Check if admin queue is enabled */ static void check_for_admin_com_state(struct ena_adapter *adapter) { if (unlikely(!ena_com_get_admin_running_state(adapter->ena_dev))) { device_printf(adapter->pdev, "ENA admin queue is not in running state!\n"); counter_u64_add(adapter->dev_stats.admin_q_pause, 1); adapter->trigger_reset = true; } } /* * Check for TX which were not completed on time. * Timeout is defined by "missing_tx_timeout". * Reset will be performed if number of incompleted * transactions exceeds "missing_tx_threshold". */ static void check_for_missing_tx_completions(struct ena_adapter *adapter) { struct ena_ring *tx_ring; struct ena_tx_buffer *tx_info; struct bintime curtime, time; int i, j, budget, missed_tx; /* Make sure the driver doesn't turn the device in other process */ rmb(); if (!adapter->up) return; if (adapter->trigger_reset) return; if (adapter->missing_tx_timeout == 0) return; budget = adapter->missing_tx_max_queues; getbinuptime(&curtime); for (i = adapter->next_monitored_tx_qid; i < adapter->num_queues; i++) { tx_ring = &adapter->tx_ring[i]; missed_tx = 0; for (j = 0; j < tx_ring->ring_size; j++) { tx_info = &tx_ring->tx_buffer_info[j]; if (!bintime_isset(&tx_info->timestamp)) continue; time = curtime; bintime_sub(&time, &tx_info->timestamp); /* Check again if packet is still waiting */ if (bintime_isset(&tx_info->timestamp) && unlikely( bttosbt(time) > adapter->missing_tx_timeout)) { if (tx_info->print_once) device_printf(adapter->pdev, "Found a Tx that wasn't completed " "on time, qid %d, index %d.\n", tx_ring->qid, j); tx_info->print_once = false; missed_tx++; if (unlikely(missed_tx > adapter->missing_tx_threshold)) { device_printf(adapter->pdev, "The number of lost tx completion " "is above the threshold (%d > %d). " "Reset the device\n", missed_tx, adapter->missing_tx_threshold); adapter->trigger_reset = true; return; } } } budget--; if (!budget) { i++; break; } } adapter->next_monitored_tx_qid = i % adapter->num_queues; } static void ena_timer_service(void *data) { struct ena_adapter *adapter = (struct ena_adapter *)data; struct ena_admin_host_info *host_info = adapter->ena_dev->host_attr.host_info; check_for_missing_keep_alive(adapter); check_for_admin_com_state(adapter); check_for_missing_tx_completions(adapter); if (host_info) ena_update_host_info(host_info, adapter->ifp); if (unlikely(adapter->trigger_reset)) { device_printf(adapter->pdev, "Trigger reset is on\n"); taskqueue_enqueue(adapter->reset_tq, &adapter->reset_task); return; } /* * Schedule another timeout one second from now. */ callout_schedule_sbt(&adapter->timer_service, SBT_1S, SBT_1S, 0); } static void ena_reset_task(void *arg, int pending) { struct ena_com_dev_get_features_ctx get_feat_ctx; struct ena_adapter *adapter = (struct ena_adapter *)arg; struct ena_com_dev *ena_dev = adapter->ena_dev; bool dev_up; int rc; if (unlikely(!adapter->trigger_reset)) { device_printf(adapter->pdev, "device reset scheduled but trigger_reset is off\n"); return; } sx_xlock(&adapter->ioctl_sx); callout_drain(&adapter->timer_service); dev_up = adapter->up; ena_com_set_admin_running_state(ena_dev, false); ena_free_mgmnt_irq(adapter); ena_down(adapter); ena_com_dev_reset(ena_dev); ena_disable_msix(adapter); ena_com_abort_admin_commands(ena_dev); ena_com_wait_for_abort_completion(ena_dev); ena_com_admin_destroy(ena_dev); ena_com_mmio_reg_read_request_destroy(ena_dev); adapter->trigger_reset = false; /* Finished destroy part. Restart the device */ rc = ena_device_init(adapter, adapter->pdev, &get_feat_ctx, &adapter->wd_active); if (rc) { device_printf(adapter->pdev, "ENA device init failed! (err: %d)\n", rc); goto err_dev_free; } rc = ena_enable_msix_and_set_admin_interrupts(adapter, adapter->num_queues); if (rc) { device_printf(adapter->pdev, "Enable MSI-X failed\n"); goto err_com_free; } /* If the interface was up before the reset bring it up */ if (dev_up) { rc = ena_up(adapter); if (rc) { device_printf(adapter->pdev, "Failed to create I/O queues\n"); goto err_msix_free; } } callout_reset_sbt(&adapter->timer_service, SBT_1S, SBT_1S, ena_timer_service, (void *)adapter, 0); sx_unlock(&adapter->ioctl_sx); return; err_msix_free: ena_com_dev_reset(ena_dev); ena_free_mgmnt_irq(adapter); ena_disable_msix(adapter); err_com_free: ena_com_admin_destroy(ena_dev); err_dev_free: device_printf(adapter->pdev, "ENA reset failed!\n"); adapter->running = false; sx_unlock(&adapter->ioctl_sx); } /** * ena_attach - Device Initialization Routine * @pdev: device information struct * * Returns 0 on success, otherwise on failure. * * ena_attach initializes an adapter identified by a device structure. * The OS initialization, configuring of the adapter private structure, * and a hardware reset occur. **/ static int ena_attach(device_t pdev) { struct ena_com_dev_get_features_ctx get_feat_ctx; static int version_printed; struct ena_adapter *adapter; struct ena_com_dev *ena_dev = NULL; uint16_t tx_sgl_size = 0; uint16_t rx_sgl_size = 0; int io_queue_num; int queue_size; int rc; struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; adapter = device_get_softc(pdev); adapter->pdev = pdev; ctx = device_get_sysctl_ctx(pdev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(pdev)); mtx_init(&adapter->global_mtx, "ENA global mtx", NULL, MTX_DEF); sx_init(&adapter->ioctl_sx, "ENA ioctl sx"); /* Sysctl calls for Watchdog service */ SYSCTL_ADD_INT(ctx, children, OID_AUTO, "wd_active", CTLFLAG_RWTUN, &adapter->wd_active, 0, "Watchdog is active"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "keep_alive_timeout", CTLFLAG_RWTUN, &adapter->keep_alive_timeout, "Timeout for Keep Alive messages"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "missing_tx_timeout", CTLFLAG_RWTUN, &adapter->missing_tx_timeout, "Timeout for TX completion"); SYSCTL_ADD_U32(ctx, children, OID_AUTO, "missing_tx_max_queues", CTLFLAG_RWTUN, &adapter->missing_tx_max_queues, 0, "Number of TX queues to check per run"); SYSCTL_ADD_U32(ctx, children, OID_AUTO, "missing_tx_threshold", CTLFLAG_RWTUN, &adapter->missing_tx_threshold, 0, "Max number of timeouted packets"); /* Set up the timer service */ callout_init_mtx(&adapter->timer_service, &adapter->global_mtx, 0); adapter->keep_alive_timeout = DEFAULT_KEEP_ALIVE_TO; adapter->missing_tx_timeout = DEFAULT_TX_CMP_TO; adapter->missing_tx_max_queues = DEFAULT_TX_MONITORED_QUEUES; adapter->missing_tx_threshold = DEFAULT_TX_CMP_THRESHOLD; if (version_printed++ == 0) device_printf(pdev, "%s\n", ena_version); rc = ena_allocate_pci_resources(adapter); if (rc) { device_printf(pdev, "PCI resource allocation failed!\n"); ena_free_pci_resources(adapter); goto err_pci_res; } /* Allocate memory for ena_dev structure */ ena_dev = ENA_MEM_ALLOC(pdev, sizeof(struct ena_com_dev)); if (!ena_dev) { device_printf(pdev, "allocating ena_dev failed\n"); rc = ENOMEM; goto err_select_region; } adapter->ena_dev = ena_dev; ena_dev->dmadev = pdev; ena_dev->bus = malloc(sizeof(struct ena_bus), M_DEVBUF, M_WAITOK | M_ZERO); /* Store register resources */ ((struct ena_bus*)(ena_dev->bus))->reg_bar_t = rman_get_bustag(adapter->registers); ((struct ena_bus*)(ena_dev->bus))->reg_bar_h = rman_get_bushandle(adapter->registers); if (((struct ena_bus*)(ena_dev->bus))->reg_bar_h == 0) { device_printf(pdev, "failed to pmap registers bar\n"); rc = ENXIO; goto err_dev_free; } ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; /* Device initialization */ rc = ena_device_init(adapter, pdev, &get_feat_ctx, &adapter->wd_active); if (rc) { device_printf(pdev, "ENA device init failed! (err: %d)\n", rc); rc = ENXIO; goto err_bus_free; } adapter->keep_alive_timestamp = getsbinuptime(); adapter->tx_offload_cap = get_feat_ctx.offload.tx; /* Set for sure that interface is not up */ adapter->up = false; memcpy(adapter->mac_addr, get_feat_ctx.dev_attr.mac_addr, ETHER_ADDR_LEN); adapter->small_copy_len = ENA_DEFAULT_SMALL_PACKET_LEN; /* calculate IO queue number to create */ io_queue_num = ena_calc_io_queue_num(adapter, &get_feat_ctx); ENA_ASSERT(io_queue_num > 0, "Invalid queue number: %d\n", io_queue_num); adapter->num_queues = io_queue_num; /* calculatre ring sizes */ queue_size = ena_calc_queue_size(adapter,&tx_sgl_size, &rx_sgl_size, &get_feat_ctx); if ((queue_size <= 0) || (io_queue_num <= 0)) { rc = ENA_COM_FAULT; goto err_com_free; } adapter->tx_ring_size = queue_size; adapter->rx_ring_size = queue_size; adapter->max_tx_sgl_size = tx_sgl_size; adapter->max_rx_sgl_size = rx_sgl_size; /* set up dma tags for rx and tx buffers */ rc = ena_setup_tx_dma_tag(adapter); if (rc) goto dma_tx_err; rc = ena_setup_rx_dma_tag(adapter); if (rc) goto dma_rx_err; /* initialize rings basic information */ device_printf(pdev, "initalize %d io queues\n", io_queue_num); rc = ena_init_io_rings(adapter); if (rc) { device_printf(pdev,"Error with initialization of IO rings\n"); goto err_io_init; } /* setup network interface */ rc = ena_setup_ifnet(pdev, adapter, &get_feat_ctx); if (rc) { device_printf(pdev,"Error with network interface setup\n"); goto err_com_free; } rc = ena_enable_msix_and_set_admin_interrupts(adapter, io_queue_num); if (rc) { device_printf(pdev, "Failed to enable and set the admin interrupts\n"); goto err_ifp_free; } + /* Initialize reset task queue */ + TASK_INIT(&adapter->reset_task, 0, ena_reset_task, adapter); + adapter->reset_tq = taskqueue_create("ena_reset_enqueue", + M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &adapter->reset_tq); + if (adapter->reset_tq == NULL) { + device_printf(adapter->pdev, + "Unable to create reset task queue\n"); + goto err_reset_tq; + } + taskqueue_start_threads(&adapter->reset_tq, 1, PI_NET, + "%s rstq", device_get_nameunit(adapter->pdev)); + + /* Initialize task queue responsible for updating hw stats */ + TASK_INIT(&adapter->stats_task, 0, ena_update_hw_stats, adapter); + adapter->stats_tq = taskqueue_create_fast("ena_stats_update", + M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &adapter->stats_tq); + if (adapter->stats_tq == NULL) { + device_printf(adapter->pdev, + "Unable to create taskqueue for updating hw stats\n"); + goto err_stats_tq; + } + taskqueue_start_threads(&adapter->stats_tq, 1, PI_REALTIME, + "%s stats tq", device_get_nameunit(adapter->pdev)); + /* Initialize statistics */ ena_alloc_counters((counter_u64_t *)&adapter->dev_stats, sizeof(struct ena_stats_dev)); ena_update_stats_counters(adapter); ena_sysctl_add_nodes(adapter); /* Tell the stack that the interface is not active */ if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); - /* Initialize reset task queue */ - TASK_INIT(&adapter->reset_task, 0, ena_reset_task, adapter); - adapter->reset_tq = taskqueue_create("ena_reset_enqueue", - M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &adapter->reset_tq); - taskqueue_start_threads(&adapter->reset_tq, 1, PI_NET, - "%s rstq", device_get_nameunit(adapter->pdev)); - adapter->running = true; return (0); +err_stats_tq: + taskqueue_free(adapter->reset_tq); +err_reset_tq: + ena_free_mgmnt_irq(adapter); + ena_disable_msix(adapter); err_ifp_free: if_detach(adapter->ifp); if_free(adapter->ifp); err_com_free: ena_free_all_io_rings_resources(adapter); err_io_init: ena_free_rx_dma_tag(adapter); dma_rx_err: ena_free_tx_dma_tag(adapter); dma_tx_err: ena_com_admin_destroy(ena_dev); ena_com_delete_host_info(ena_dev); err_bus_free: free(ena_dev->bus, M_DEVBUF); err_dev_free: free(ena_dev, M_DEVBUF); err_select_region: ena_free_pci_resources(adapter); err_pci_res: return (rc); } /** * ena_detach - Device Removal Routine * @pdev: device information struct * * ena_detach is called by the device subsystem to alert the driver * that it should release a PCI device. **/ static int ena_detach(device_t pdev) { struct ena_adapter *adapter = device_get_softc(pdev); struct ena_com_dev *ena_dev = adapter->ena_dev; int rc; /* Make sure VLANS are not using driver */ if (adapter->ifp->if_vlantrunk != NULL) { device_printf(adapter->pdev ,"VLAN is in use, detach first\n"); return (EBUSY); } /* Free reset task and callout */ callout_drain(&adapter->timer_service); while (taskqueue_cancel(adapter->reset_tq, &adapter->reset_task, NULL)) taskqueue_drain(adapter->reset_tq, &adapter->reset_task); taskqueue_free(adapter->reset_tq); + sx_xlock(&adapter->ioctl_sx); ena_down(adapter); + sx_unlock(&adapter->ioctl_sx); + + taskqueue_free(adapter->stats_tq); if (adapter->ifp != NULL) { ether_ifdetach(adapter->ifp); if_free(adapter->ifp); } ena_free_all_io_rings_resources(adapter); ena_free_counters((counter_u64_t *)&adapter->dev_stats, sizeof(struct ena_stats_dev)); if (adapter->rss_support) ena_com_rss_destroy(ena_dev); rc = ena_free_rx_dma_tag(adapter); if (rc) device_printf(adapter->pdev, "Unmapped RX DMA tag associations\n"); rc = ena_free_tx_dma_tag(adapter); if (rc) device_printf(adapter->pdev, "Unmapped TX DMA tag associations\n"); /* Reset the device only if the device is running. */ if (adapter->running) ena_com_dev_reset(ena_dev); ena_com_delete_host_info(ena_dev); ena_com_admin_destroy(ena_dev); ena_free_irqs(adapter); ena_com_mmio_reg_read_request_destroy(ena_dev); ena_free_pci_resources(adapter); mtx_destroy(&adapter->global_mtx); sx_destroy(&adapter->ioctl_sx); if (ena_dev->bus != NULL) free(ena_dev->bus, M_DEVBUF); if (ena_dev != NULL) free(ena_dev, M_DEVBUF); return (bus_generic_detach(pdev)); } /****************************************************************************** ******************************** AENQ Handlers ******************************* *****************************************************************************/ /** * ena_update_on_link_change: * Notify the network interface about the change in link status **/ static void ena_update_on_link_change(void *adapter_data, struct ena_admin_aenq_entry *aenq_e) { struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; struct ena_admin_aenq_link_change_desc *aenq_desc; int status; if_t ifp; aenq_desc = (struct ena_admin_aenq_link_change_desc *)aenq_e; ifp = adapter->ifp; status = aenq_desc->flags & ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK; if (status != 0) { device_printf(adapter->pdev, "link is UP\n"); if_link_state_change(ifp, LINK_STATE_UP); } else if (status == 0) { device_printf(adapter->pdev, "link is DOWN\n"); if_link_state_change(ifp, LINK_STATE_DOWN); } else { device_printf(adapter->pdev, "invalid value recvd\n"); BUG(); } adapter->link_status = status; return; } /** * This handler will called for unknown event group or unimplemented handlers **/ static void unimplemented_aenq_handler(void *data, struct ena_admin_aenq_entry *aenq_e) { return; } static struct ena_aenq_handlers aenq_handlers = { .handlers = { [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change, [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd, }, .unimplemented_handler = unimplemented_aenq_handler }; /********************************************************************* * FreeBSD Device Interface Entry Points *********************************************************************/ static device_method_t ena_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ena_probe), DEVMETHOD(device_attach, ena_attach), DEVMETHOD(device_detach, ena_detach), DEVMETHOD_END }; static driver_t ena_driver = { "ena", ena_methods, sizeof(struct ena_adapter), }; devclass_t ena_devclass; DRIVER_MODULE(ena, pci, ena_driver, ena_devclass, 0, 0); MODULE_DEPEND(ena, pci, 1, 1, 1); MODULE_DEPEND(ena, ether, 1, 1, 1); /*********************************************************************/ Index: projects/clang500-import/sys/dev/ena/ena.h =================================================================== --- projects/clang500-import/sys/dev/ena/ena.h (revision 319250) +++ projects/clang500-import/sys/dev/ena/ena.h (revision 319251) @@ -1,434 +1,440 @@ /*- * BSD LICENSE * * Copyright (c) 2015-2017 Amazon.com, Inc. or its affiliates. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ * */ #ifndef ENA_H #define ENA_H #include #include "ena-com/ena_com.h" #include "ena-com/ena_eth_com.h" #define DRV_MODULE_VER_MAJOR 0 #define DRV_MODULE_VER_MINOR 7 #define DRV_MODULE_VER_SUBMINOR 0 #define DRV_MODULE_NAME "ena" #ifndef DRV_MODULE_VERSION #define DRV_MODULE_VERSION \ __XSTRING(DRV_MODULE_VER_MAJOR) "." \ __XSTRING(DRV_MODULE_VER_MINOR) "." \ __XSTRING(DRV_MODULE_VER_SUBMINOR) #endif #define DEVICE_NAME "Elastic Network Adapter (ENA)" #define DEVICE_DESC "ENA adapter" /* Calculate DMA mask - width for ena cannot exceed 48, so it is safe */ #define ENA_DMA_BIT_MASK(x) ((1ULL << (x)) - 1ULL) /* 1 for AENQ + ADMIN */ #define ENA_MAX_MSIX_VEC(io_queues) (1 + (io_queues)) #define ENA_REG_BAR 0 #define ENA_MEM_BAR 2 #define ENA_BUS_DMA_SEGS 32 #define ENA_DEFAULT_RING_SIZE 1024 #define ENA_DEFAULT_SMALL_PACKET_LEN 128 #define ENA_DEFAULT_MAX_RX_BUFF_ALLOC_SIZE 1536 #define ENA_RX_REFILL_THRESH_DEVIDER 8 #define ENA_MAX_PUSH_PKT_SIZE 128 #define ENA_NAME_MAX_LEN 20 #define ENA_IRQNAME_SIZE 40 #define ENA_PKT_MAX_BUFS 19 #define ENA_STALL_TIMEOUT 100 #define ENA_RX_RSS_TABLE_LOG_SIZE 7 #define ENA_RX_RSS_TABLE_SIZE (1 << ENA_RX_RSS_TABLE_LOG_SIZE) #define ENA_HASH_KEY_SIZE 40 #define ENA_DMA_BITS_MASK 40 #define ENA_MAX_FRAME_LEN 10000 #define ENA_MIN_FRAME_LEN 60 #define ENA_RX_HASH_KEY_NUM 10 #define ENA_RX_THASH_TABLE_SIZE (1 << 8) #define ENA_TX_CLEANUP_TRESHOLD 128 #define DB_THRESHOLD 64 #define TX_COMMIT 32 /* * TX budget for cleaning. It should be half of the RX budget to reduce amount * of TCP retransmissions. */ #define TX_BUDGET 128 /* RX cleanup budget. -1 stands for infinity. */ #define RX_BUDGET 256 /* * How many times we can repeat cleanup in the io irq handling routine if the * RX or TX budget was depleted. */ #define CLEAN_BUDGET 8 #define RX_IRQ_INTERVAL 20 #define TX_IRQ_INTERVAL 50 #define ENA_MAX_MTU 9216 #define ENA_TSO_MAXSIZE PAGE_SIZE #define ENA_TSO_NSEGS ENA_PKT_MAX_BUFS #define ENA_RX_OFFSET NET_SKB_PAD + NET_IP_ALIGN #define ENA_MMIO_DISABLE_REG_READ BIT(0) #define ENA_TX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1)) #define ENA_RX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1)) #define ENA_RX_RING_IDX_ADD(idx, n, ring_size) \ (((idx) + (n)) & ((ring_size) - 1)) #define ENA_IO_TXQ_IDX(q) (2 * (q)) #define ENA_IO_RXQ_IDX(q) (2 * (q) + 1) #define ENA_MGMNT_IRQ_IDX 0 #define ENA_IO_IRQ_FIRST_IDX 1 #define ENA_IO_IRQ_IDX(q) (ENA_IO_IRQ_FIRST_IDX + (q)) /* * ENA device should send keep alive msg every 1 sec. * We wait for 6 sec just to be on the safe side. */ #define DEFAULT_KEEP_ALIVE_TO (SBT_1S * 6) /* Time in jiffies before concluding the transmitter is hung. */ #define DEFAULT_TX_CMP_TO (SBT_1S * 5) /* Number of queues to check for missing queues per timer tick */ #define DEFAULT_TX_MONITORED_QUEUES (4) /* Max number of timeouted packets before device reset */ #define DEFAULT_TX_CMP_THRESHOLD (128) /* * Supported PCI vendor and devices IDs */ #define PCI_VENDOR_ID_AMAZON 0x1d0f #define PCI_DEV_ID_ENA_PF 0x0ec2 #define PCI_DEV_ID_ENA_LLQ_PF 0x1ec2 #define PCI_DEV_ID_ENA_VF 0xec20 #define PCI_DEV_ID_ENA_LLQ_VF 0xec21 struct msix_entry { int entry; int vector; }; typedef struct _ena_vendor_info_t { unsigned int vendor_id; unsigned int device_id; unsigned int index; } ena_vendor_info_t; struct ena_irq { /* Interrupt resources */ struct resource *res; driver_intr_t *handler; void *data; void *cookie; unsigned int vector; bool requested; int cpu; char name[ENA_IRQNAME_SIZE]; }; struct ena_que { struct ena_adapter *adapter; struct ena_ring *tx_ring; struct ena_ring *rx_ring; uint32_t id; int cpu; }; struct ena_tx_buffer { struct mbuf *mbuf; /* # of ena desc for this specific mbuf * (includes data desc and metadata desc) */ unsigned int tx_descs; /* # of buffers used by this mbuf */ unsigned int num_of_bufs; bus_dmamap_t map; /* Used to detect missing tx packets */ struct bintime timestamp; bool print_once; struct ena_com_buf bufs[ENA_PKT_MAX_BUFS]; } __aligned(CACHE_LINE_SIZE); struct ena_rx_buffer { struct mbuf *mbuf; bus_dmamap_t map; struct ena_com_buf ena_buf; } __aligned(CACHE_LINE_SIZE); struct ena_stats_tx { counter_u64_t cnt; counter_u64_t bytes; counter_u64_t queue_stop; counter_u64_t prepare_ctx_err; counter_u64_t queue_wakeup; counter_u64_t dma_mapping_err; /* Not counted */ counter_u64_t unsupported_desc_num; /* Not counted */ counter_u64_t napi_comp; /* Not counted */ counter_u64_t tx_poll; counter_u64_t doorbells; counter_u64_t missing_tx_comp; counter_u64_t bad_req_id; + counter_u64_t defragment; + counter_u64_t defragment_err; }; struct ena_stats_rx { counter_u64_t cnt; counter_u64_t bytes; counter_u64_t refil_partial; counter_u64_t bad_csum; /* Not counted */ counter_u64_t page_alloc_fail; counter_u64_t mbuf_alloc_fail; counter_u64_t dma_mapping_err; counter_u64_t bad_desc_num; /* Not counted */ counter_u64_t small_copy_len_pkt; }; struct ena_ring { /* Holds the empty requests for TX out of order completions */ uint16_t *free_tx_ids; struct ena_com_dev *ena_dev; struct ena_adapter *adapter; struct ena_com_io_cq *ena_com_io_cq; struct ena_com_io_sq *ena_com_io_sq; /* The maximum length the driver can push to the device (For LLQ) */ enum ena_admin_placement_policy_type tx_mem_queue_type; uint16_t rx_small_copy_len; uint16_t qid; uint16_t mtu; uint8_t tx_max_header_size; struct ena_com_rx_buf_info ena_bufs[ENA_PKT_MAX_BUFS]; uint32_t smoothed_interval; enum ena_intr_moder_level moder_tbl_idx; struct ena_que *que; struct lro_ctrl lro; uint16_t next_to_use; uint16_t next_to_clean; union { struct ena_tx_buffer *tx_buffer_info; /* contex of tx packet */ struct ena_rx_buffer *rx_buffer_info; /* contex of rx packet */ }; int ring_size; /* number of tx/rx_buffer_info's entries */ struct buf_ring *br; /* only for TX */ struct mtx ring_mtx; char mtx_name[16]; struct task enqueue_task; struct taskqueue *enqueue_tq; struct task cmpl_task; struct taskqueue *cmpl_tq; union { struct ena_stats_tx tx_stats; struct ena_stats_rx rx_stats; }; } __aligned(CACHE_LINE_SIZE); struct ena_stats_dev { /* Not counted */ counter_u64_t tx_timeout; /* Not counted */ counter_u64_t io_suspend; /* Not counted */ counter_u64_t io_resume; /* Not counted */ counter_u64_t wd_expired; counter_u64_t interface_up; counter_u64_t interface_down; /* Not counted */ counter_u64_t admin_q_pause; }; struct ena_hw_stats { uint64_t rx_packets; uint64_t tx_packets; uint64_t rx_bytes; uint64_t tx_bytes; uint64_t rx_drops; }; /* Board specific private data structure */ struct ena_adapter { struct ena_com_dev *ena_dev; /* OS defined structs */ if_t ifp; device_t pdev; struct ifmedia media; /* OS resources */ struct resource * memory; struct resource * registers; struct mtx global_mtx; struct sx ioctl_sx; /* MSI-X */ uint32_t msix_enabled; struct msix_entry *msix_entries; int msix_vecs; /* DMA tags used throughout the driver adapter for Tx and Rx */ bus_dma_tag_t tx_buf_tag; bus_dma_tag_t rx_buf_tag; int dma_width; /* * RX packets that shorter that this len will be copied to the skb * header */ unsigned int small_copy_len; uint16_t max_tx_sgl_size; uint16_t max_rx_sgl_size; uint32_t tx_offload_cap; /* Tx fast path data */ int num_queues; unsigned int tx_usecs, rx_usecs; /* Interrupt coalescing */ unsigned int tx_ring_size; unsigned int rx_ring_size; /* RSS*/ uint8_t rss_ind_tbl[ENA_RX_RSS_TABLE_SIZE]; bool rss_support; uint32_t msg_enable; uint8_t mac_addr[ETHER_ADDR_LEN]; /* mdio and phy*/ char name[ENA_NAME_MAX_LEN]; bool link_status; bool trigger_reset; bool up; bool running; uint32_t wol; /* Queue will represent one TX and one RX ring */ struct ena_que que[ENA_MAX_NUM_IO_QUEUES] __aligned(CACHE_LINE_SIZE); /* TX */ struct ena_ring tx_ring[ENA_MAX_NUM_IO_QUEUES] __aligned(CACHE_LINE_SIZE); /* RX */ struct ena_ring rx_ring[ENA_MAX_NUM_IO_QUEUES] __aligned(CACHE_LINE_SIZE); struct ena_irq irq_tbl[ENA_MAX_MSIX_VEC(ENA_MAX_NUM_IO_QUEUES)]; /* Timer service */ struct callout timer_service; sbintime_t keep_alive_timestamp; uint32_t next_monitored_tx_qid; struct task reset_task; struct taskqueue *reset_tq; int wd_active; sbintime_t keep_alive_timeout; sbintime_t missing_tx_timeout; uint32_t missing_tx_max_queues; uint32_t missing_tx_threshold; + + /* Task updating hw stats */ + struct task stats_task; + struct taskqueue *stats_tq; /* Statistics */ struct ena_stats_dev dev_stats; struct ena_hw_stats hw_stats; }; #define ENA_DEV_LOCK mtx_lock(&adapter->global_mtx) #define ENA_DEV_UNLOCK mtx_unlock(&adapter->global_mtx) #define ENA_RING_MTX_LOCK(_ring) mtx_lock(&(_ring)->ring_mtx) #define ENA_RING_MTX_TRYLOCK(_ring) mtx_trylock(&(_ring)->ring_mtx) #define ENA_RING_MTX_UNLOCK(_ring) mtx_unlock(&(_ring)->ring_mtx) struct ena_dev *ena_efa_enadev_get(device_t pdev); int ena_register_adapter(struct ena_adapter *adapter); void ena_unregister_adapter(struct ena_adapter *adapter); int ena_update_stats_counters(struct ena_adapter *adapter); static inline int ena_mbuf_count(struct mbuf *mbuf) { int count = 1; while ((mbuf = mbuf->m_next) != NULL) ++count; return count; } #endif /* !(ENA_H) */ Index: projects/clang500-import/sys/dev/ena/ena_sysctl.c =================================================================== --- projects/clang500-import/sys/dev/ena/ena_sysctl.c (revision 319250) +++ projects/clang500-import/sys/dev/ena/ena_sysctl.c (revision 319251) @@ -1,243 +1,251 @@ /*- * BSD LICENSE * * Copyright (c) 2015-2017 Amazon.com, Inc. or its affiliates. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "ena_sysctl.h" static int ena_sysctl_update_stats(SYSCTL_HANDLER_ARGS); static void ena_sysctl_add_stats(struct ena_adapter *); void ena_sysctl_add_nodes(struct ena_adapter *adapter) { ena_sysctl_add_stats(adapter); } static void ena_sysctl_add_stats(struct ena_adapter *adapter) { device_t dev; struct ena_ring *tx_ring; struct ena_ring *rx_ring; struct ena_hw_stats *hw_stats; struct ena_stats_dev *dev_stats; struct ena_stats_tx *tx_stats; struct ena_stats_rx *rx_stats; struct ena_com_stats_admin *admin_stats; struct sysctl_ctx_list *ctx; struct sysctl_oid *tree; struct sysctl_oid_list *child; struct sysctl_oid *queue_node, *tx_node, *rx_node, *hw_node; struct sysctl_oid *admin_node; struct sysctl_oid_list *queue_list, *tx_list, *rx_list, *hw_list; struct sysctl_oid_list *admin_list; #define QUEUE_NAME_LEN 32 char namebuf[QUEUE_NAME_LEN]; int i; dev = adapter->pdev; ctx = device_get_sysctl_ctx(dev); tree = device_get_sysctl_tree(dev); child = SYSCTL_CHILDREN(tree); tx_ring = adapter->tx_ring; rx_ring = adapter->rx_ring; hw_stats = &adapter->hw_stats; dev_stats = &adapter->dev_stats; admin_stats = &adapter->ena_dev->admin_queue.stats; SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "tx_timeout", CTLFLAG_RD, &dev_stats->tx_timeout, "Driver TX timeouts"); SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "io_suspend", CTLFLAG_RD, &dev_stats->io_suspend, "IO queue suspends"); SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "io_resume", CTLFLAG_RD, &dev_stats->io_resume, "IO queue resumes"); SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "wd_expired", CTLFLAG_RD, &dev_stats->wd_expired, "Watchdog expiry count"); SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_up", CTLFLAG_RD, &dev_stats->interface_up, "Network interface up count"); SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_down", CTLFLAG_RD, &dev_stats->interface_down, "Network interface down count"); SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_q_pause", CTLFLAG_RD, &dev_stats->admin_q_pause, "Admin queue pauses"); for (i = 0; i < adapter->num_queues; ++i, ++tx_ring, ++rx_ring) { snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); /* TX specific stats */ tx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, "tx_ring", CTLFLAG_RD, NULL, "TX ring"); tx_list = SYSCTL_CHILDREN(tx_node); tx_stats = &tx_ring->tx_stats; SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "count", CTLFLAG_RD, &tx_stats->cnt, "Packets sent"); SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "bytes", CTLFLAG_RD, &tx_stats->bytes, "Bytes sent"); SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "prepare_ctx_err", CTLFLAG_RD, &tx_stats->prepare_ctx_err, "TX buffer preparation failures"); SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "queue_wakeup", CTLFLAG_RD, &tx_stats->queue_wakeup, "Queue wakeups"); SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "dma_mapping_err", CTLFLAG_RD, &tx_stats->dma_mapping_err, "DMA mapping failures"); SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "unsupported_desc_num", CTLFLAG_RD, &tx_stats->unsupported_desc_num, "Excessive descriptor packet discards"); SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "napi_comp", CTLFLAG_RD, &tx_stats->napi_comp, "Napi completions"); SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "tx_poll", CTLFLAG_RD, &tx_stats->tx_poll, "TX poll count"); SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "doorbells", CTLFLAG_RD, &tx_stats->doorbells, "Queue doorbells"); SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "missing_tx_comp", CTLFLAG_RD, &tx_stats->missing_tx_comp, "TX completions missed"); SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "bad_req_id", CTLFLAG_RD, &tx_stats->bad_req_id, "Bad request id count"); SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "stops", CTLFLAG_RD, &tx_stats->queue_stop, "Queue stops"); + SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, + "defragmentations", CTLFLAG_RD, + &tx_stats->defragment, + "Mbuf defragmentations"); + SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, + "defragmentation_err", CTLFLAG_RD, + &tx_stats->defragment_err, + "Mbuf defragmentation failures"); /* RX specific stats */ rx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, "rx_ring", CTLFLAG_RD, NULL, "RX ring"); rx_list = SYSCTL_CHILDREN(rx_node); rx_stats = &rx_ring->rx_stats; SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "count", CTLFLAG_RD, &rx_stats->cnt, "Packets received"); SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bytes", CTLFLAG_RD, &rx_stats->bytes, "Bytes received"); SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "refil_partial", CTLFLAG_RD, &rx_stats->refil_partial, "Partial refilled mbufs"); SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_csum", CTLFLAG_RD, &rx_stats->bad_csum, "Bad RX checksum"); SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "page_alloc_fail", CTLFLAG_RD, &rx_stats->page_alloc_fail, "Failed page allocs"); SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "mbuf_alloc_fail", CTLFLAG_RD, &rx_stats->mbuf_alloc_fail, "Failed mbuf allocs"); SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "dma_mapping_err", CTLFLAG_RD, &rx_stats->dma_mapping_err, "DMA mapping errors"); SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_desc_num", CTLFLAG_RD, &rx_stats->bad_desc_num, "Bad descriptor count"); SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "small_copy_len_pkt", CTLFLAG_RD, &rx_stats->small_copy_len_pkt, "Small copy packet count"); } /* Stats read from device */ hw_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "hw_stats", CTLFLAG_RD, NULL, "Statistics from hardware"); hw_list = SYSCTL_CHILDREN(hw_node); SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "rx_packets", CTLFLAG_RD, &hw_stats->rx_packets, 0, "Packets received"); SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &hw_stats->tx_packets, 0, "Packets transmitted"); SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &hw_stats->rx_bytes, 0, "Bytes received"); SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "tx_bytes", CTLFLAG_RD, &hw_stats->tx_bytes, 0, "Bytes transmitted"); SYSCTL_ADD_U64(ctx, hw_list, OID_AUTO, "rx_drops", CTLFLAG_RD, &hw_stats->rx_drops, 0, "Receive packet drops"); SYSCTL_ADD_PROC(ctx, hw_list, OID_AUTO, "update_stats", CTLTYPE_INT|CTLFLAG_RD, adapter, 0, ena_sysctl_update_stats, "A", "Update stats from hardware"); /* ENA Admin queue stats */ admin_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "admin_stats", CTLFLAG_RD, NULL, "ENA Admin Queue statistics"); admin_list = SYSCTL_CHILDREN(admin_node); SYSCTL_ADD_U32(ctx, admin_list, OID_AUTO, "aborted_cmd", CTLFLAG_RD, &admin_stats->aborted_cmd, 0, "Aborted commands"); SYSCTL_ADD_U32(ctx, admin_list, OID_AUTO, "sumbitted_cmd", CTLFLAG_RD, &admin_stats->submitted_cmd, 0, "Submitted commands"); SYSCTL_ADD_U32(ctx, admin_list, OID_AUTO, "completed_cmd", CTLFLAG_RD, &admin_stats->completed_cmd, 0, "Completed commands"); SYSCTL_ADD_U32(ctx, admin_list, OID_AUTO, "out_of_space", CTLFLAG_RD, &admin_stats->out_of_space, 0, "Queue out of space"); SYSCTL_ADD_U32(ctx, admin_list, OID_AUTO, "no_completion", CTLFLAG_RD, &admin_stats->no_completion, 0, "Commands not completed"); } static int ena_sysctl_update_stats(SYSCTL_HANDLER_ARGS) { struct ena_adapter *adapter = (struct ena_adapter *)arg1; int rc; if (adapter->up) ena_update_stats_counters(adapter); rc = sysctl_handle_string(oidp, "", 1, req); return (rc); } Index: projects/clang500-import/sys/kern/kern_mutex.c =================================================================== --- projects/clang500-import/sys/kern/kern_mutex.c (revision 319250) +++ projects/clang500-import/sys/kern/kern_mutex.c (revision 319251) @@ -1,1182 +1,1182 @@ /*- * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Berkeley Software Design Inc's name may not be used to endorse or * promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $ * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $ */ /* * Machine independent bits of mutex implementation. */ #include __FBSDID("$FreeBSD$"); #include "opt_adaptive_mutexes.h" #include "opt_ddb.h" #include "opt_hwpmc_hooks.h" #include "opt_sched.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(SMP) && !defined(NO_ADAPTIVE_MUTEXES) #define ADAPTIVE_MUTEXES #endif #ifdef HWPMC_HOOKS #include PMC_SOFT_DEFINE( , , lock, failed); #endif /* * Return the mutex address when the lock cookie address is provided. * This functionality assumes that struct mtx* have a member named mtx_lock. */ #define mtxlock2mtx(c) (__containerof(c, struct mtx, mtx_lock)) /* * Internal utility macros. */ #define mtx_unowned(m) ((m)->mtx_lock == MTX_UNOWNED) #define mtx_destroyed(m) ((m)->mtx_lock == MTX_DESTROYED) static void assert_mtx(const struct lock_object *lock, int what); #ifdef DDB static void db_show_mtx(const struct lock_object *lock); #endif static void lock_mtx(struct lock_object *lock, uintptr_t how); static void lock_spin(struct lock_object *lock, uintptr_t how); #ifdef KDTRACE_HOOKS static int owner_mtx(const struct lock_object *lock, struct thread **owner); #endif static uintptr_t unlock_mtx(struct lock_object *lock); static uintptr_t unlock_spin(struct lock_object *lock); /* * Lock classes for sleep and spin mutexes. */ struct lock_class lock_class_mtx_sleep = { .lc_name = "sleep mutex", .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE, .lc_assert = assert_mtx, #ifdef DDB .lc_ddb_show = db_show_mtx, #endif .lc_lock = lock_mtx, .lc_unlock = unlock_mtx, #ifdef KDTRACE_HOOKS .lc_owner = owner_mtx, #endif }; struct lock_class lock_class_mtx_spin = { .lc_name = "spin mutex", .lc_flags = LC_SPINLOCK | LC_RECURSABLE, .lc_assert = assert_mtx, #ifdef DDB .lc_ddb_show = db_show_mtx, #endif .lc_lock = lock_spin, .lc_unlock = unlock_spin, #ifdef KDTRACE_HOOKS .lc_owner = owner_mtx, #endif }; #ifdef ADAPTIVE_MUTEXES static SYSCTL_NODE(_debug, OID_AUTO, mtx, CTLFLAG_RD, NULL, "mtx debugging"); static struct lock_delay_config __read_mostly mtx_delay; SYSCTL_INT(_debug_mtx, OID_AUTO, delay_base, CTLFLAG_RW, &mtx_delay.base, 0, ""); SYSCTL_INT(_debug_mtx, OID_AUTO, delay_max, CTLFLAG_RW, &mtx_delay.max, 0, ""); LOCK_DELAY_SYSINIT_DEFAULT(mtx_delay); #endif static SYSCTL_NODE(_debug, OID_AUTO, mtx_spin, CTLFLAG_RD, NULL, "mtx spin debugging"); static struct lock_delay_config __read_mostly mtx_spin_delay; SYSCTL_INT(_debug_mtx_spin, OID_AUTO, delay_base, CTLFLAG_RW, &mtx_spin_delay.base, 0, ""); SYSCTL_INT(_debug_mtx_spin, OID_AUTO, delay_max, CTLFLAG_RW, &mtx_spin_delay.max, 0, ""); LOCK_DELAY_SYSINIT_DEFAULT(mtx_spin_delay); /* * System-wide mutexes */ struct mtx blocked_lock; struct mtx Giant; void assert_mtx(const struct lock_object *lock, int what) { mtx_assert((const struct mtx *)lock, what); } void lock_mtx(struct lock_object *lock, uintptr_t how) { mtx_lock((struct mtx *)lock); } void lock_spin(struct lock_object *lock, uintptr_t how) { panic("spin locks can only use msleep_spin"); } uintptr_t unlock_mtx(struct lock_object *lock) { struct mtx *m; m = (struct mtx *)lock; mtx_assert(m, MA_OWNED | MA_NOTRECURSED); mtx_unlock(m); return (0); } uintptr_t unlock_spin(struct lock_object *lock) { panic("spin locks can only use msleep_spin"); } #ifdef KDTRACE_HOOKS int owner_mtx(const struct lock_object *lock, struct thread **owner) { const struct mtx *m; uintptr_t x; m = (const struct mtx *)lock; x = m->mtx_lock; *owner = (struct thread *)(x & ~MTX_FLAGMASK); return (x != MTX_UNOWNED); } #endif /* * Function versions of the inlined __mtx_* macros. These are used by * modules and can also be called from assembly language if needed. */ void __mtx_lock_flags(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; uintptr_t tid, v; m = mtxlock2mtx(c); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("mtx_lock() by idle thread %p on sleep mutex %s @ %s:%d", curthread, m->lock_object.lo_name, file, line)); KASSERT(m->mtx_lock != MTX_DESTROYED, ("mtx_lock() of destroyed mutex @ %s:%d", file, line)); KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep, ("mtx_lock() of spin mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); WITNESS_CHECKORDER(&m->lock_object, (opts & ~MTX_RECURSE) | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); tid = (uintptr_t)curthread; v = MTX_UNOWNED; if (!_mtx_obtain_lock_fetch(m, &v, tid)) _mtx_lock_sleep(m, v, tid, opts, file, line); else LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire, m, 0, 0, file, line); LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file, line); WITNESS_LOCK(&m->lock_object, (opts & ~MTX_RECURSE) | LOP_EXCLUSIVE, file, line); TD_LOCKS_INC(curthread); } void __mtx_unlock_flags(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; m = mtxlock2mtx(c); KASSERT(m->mtx_lock != MTX_DESTROYED, ("mtx_unlock() of destroyed mutex @ %s:%d", file, line)); KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep, ("mtx_unlock() of spin mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); LOCK_LOG_LOCK("UNLOCK", &m->lock_object, opts, m->mtx_recurse, file, line); mtx_assert(m, MA_OWNED); #ifdef LOCK_PROFILING __mtx_unlock_sleep(c, opts, file, line); #else __mtx_unlock(m, curthread, opts, file, line); #endif TD_LOCKS_DEC(curthread); } void __mtx_lock_spin_flags(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; if (SCHEDULER_STOPPED()) return; m = mtxlock2mtx(c); KASSERT(m->mtx_lock != MTX_DESTROYED, ("mtx_lock_spin() of destroyed mutex @ %s:%d", file, line)); KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin, ("mtx_lock_spin() of sleep mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); if (mtx_owned(m)) KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 || (opts & MTX_RECURSE) != 0, ("mtx_lock_spin: recursed on non-recursive mutex %s @ %s:%d\n", m->lock_object.lo_name, file, line)); opts &= ~MTX_RECURSE; WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); __mtx_lock_spin(m, curthread, opts, file, line); LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file, line); WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); } int __mtx_trylock_spin_flags(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; if (SCHEDULER_STOPPED()) return (1); m = mtxlock2mtx(c); KASSERT(m->mtx_lock != MTX_DESTROYED, ("mtx_trylock_spin() of destroyed mutex @ %s:%d", file, line)); KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin, ("mtx_trylock_spin() of sleep mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); KASSERT((opts & MTX_RECURSE) == 0, ("mtx_trylock_spin: unsupp. opt MTX_RECURSE on mutex %s @ %s:%d\n", m->lock_object.lo_name, file, line)); if (__mtx_trylock_spin(m, curthread, opts, file, line)) { LOCK_LOG_TRY("LOCK", &m->lock_object, opts, 1, file, line); WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); return (1); } LOCK_LOG_TRY("LOCK", &m->lock_object, opts, 0, file, line); return (0); } void __mtx_unlock_spin_flags(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; if (SCHEDULER_STOPPED()) return; m = mtxlock2mtx(c); KASSERT(m->mtx_lock != MTX_DESTROYED, ("mtx_unlock_spin() of destroyed mutex @ %s:%d", file, line)); KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin, ("mtx_unlock_spin() of sleep mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); WITNESS_UNLOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); LOCK_LOG_LOCK("UNLOCK", &m->lock_object, opts, m->mtx_recurse, file, line); mtx_assert(m, MA_OWNED); __mtx_unlock_spin(m); } /* * The important part of mtx_trylock{,_flags}() * Tries to acquire lock `m.' If this function is called on a mutex that * is already owned, it will recursively acquire the lock. */ int _mtx_trylock_flags_(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; struct thread *td; uintptr_t tid, v; #ifdef LOCK_PROFILING uint64_t waittime = 0; int contested = 0; #endif int rval; bool recursed; td = curthread; tid = (uintptr_t)td; if (SCHEDULER_STOPPED_TD(td)) return (1); m = mtxlock2mtx(c); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), ("mtx_trylock() by idle thread %p on sleep mutex %s @ %s:%d", curthread, m->lock_object.lo_name, file, line)); KASSERT(m->mtx_lock != MTX_DESTROYED, ("mtx_trylock() of destroyed mutex @ %s:%d", file, line)); KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_sleep, ("mtx_trylock() of spin mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); rval = 1; recursed = false; v = MTX_UNOWNED; for (;;) { if (_mtx_obtain_lock_fetch(m, &v, tid)) break; if (v == MTX_UNOWNED) continue; if (v == tid && ((m->lock_object.lo_flags & LO_RECURSABLE) != 0 || (opts & MTX_RECURSE) != 0)) { - m->mtx_recurse++; - atomic_set_ptr(&m->mtx_lock, MTX_RECURSED); - recursed = true; - break; + m->mtx_recurse++; + atomic_set_ptr(&m->mtx_lock, MTX_RECURSED); + recursed = true; + break; } rval = 0; break; } opts &= ~MTX_RECURSE; LOCK_LOG_TRY("LOCK", &m->lock_object, opts, rval, file, line); if (rval) { WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK, file, line); TD_LOCKS_INC(curthread); if (!recursed) LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire, m, contested, waittime, file, line); } return (rval); } /* * __mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock. * * We call this if the lock is either contested (i.e. we need to go to * sleep waiting for it), or if we need to recurse on it. */ #if LOCK_DEBUG > 0 void __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v, uintptr_t tid, int opts, const char *file, int line) #else void __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v, uintptr_t tid) #endif { struct mtx *m; struct turnstile *ts; #ifdef ADAPTIVE_MUTEXES volatile struct thread *owner; #endif #ifdef KTR int cont_logged = 0; #endif #ifdef LOCK_PROFILING int contested = 0; uint64_t waittime = 0; #endif #if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS) struct lock_delay_arg lda; #endif #ifdef KDTRACE_HOOKS u_int sleep_cnt = 0; int64_t sleep_time = 0; int64_t all_time = 0; #endif #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) int doing_lockprof; #endif if (SCHEDULER_STOPPED()) return; #if defined(ADAPTIVE_MUTEXES) lock_delay_arg_init(&lda, &mtx_delay); #elif defined(KDTRACE_HOOKS) lock_delay_arg_init(&lda, NULL); #endif m = mtxlock2mtx(c); if (__predict_false(v == MTX_UNOWNED)) v = MTX_READ_VALUE(m); if (__predict_false(lv_mtx_owner(v) == (struct thread *)tid)) { KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 || (opts & MTX_RECURSE) != 0, ("_mtx_lock_sleep: recursed on non-recursive mutex %s @ %s:%d\n", m->lock_object.lo_name, file, line)); #if LOCK_DEBUG > 0 opts &= ~MTX_RECURSE; #endif m->mtx_recurse++; atomic_set_ptr(&m->mtx_lock, MTX_RECURSED); if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m); return; } #if LOCK_DEBUG > 0 opts &= ~MTX_RECURSE; #endif #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&m->lock_object, &contested, &waittime); if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR4(KTR_LOCK, "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d", m->lock_object.lo_name, (void *)m->mtx_lock, file, line); #ifdef LOCK_PROFILING doing_lockprof = 1; #elif defined(KDTRACE_HOOKS) doing_lockprof = lockstat_enabled; if (__predict_false(doing_lockprof)) all_time -= lockstat_nsecs(&m->lock_object); #endif for (;;) { if (v == MTX_UNOWNED) { if (_mtx_obtain_lock_fetch(m, &v, tid)) break; continue; } #ifdef KDTRACE_HOOKS lda.spin_cnt++; #endif #ifdef ADAPTIVE_MUTEXES /* * If the owner is running on another CPU, spin until the * owner stops running or the state of the lock changes. */ owner = lv_mtx_owner(v); if (TD_IS_RUNNING(owner)) { if (LOCK_LOG_TEST(&m->lock_object, 0)) CTR3(KTR_LOCK, "%s: spinning on %p held by %p", __func__, m, owner); KTR_STATE1(KTR_SCHED, "thread", sched_tdname((struct thread *)tid), "spinning", "lockname:\"%s\"", m->lock_object.lo_name); do { lock_delay(&lda); v = MTX_READ_VALUE(m); owner = lv_mtx_owner(v); } while (v != MTX_UNOWNED && TD_IS_RUNNING(owner)); KTR_STATE0(KTR_SCHED, "thread", sched_tdname((struct thread *)tid), "running"); continue; } #endif ts = turnstile_trywait(&m->lock_object); v = MTX_READ_VALUE(m); /* * Check if the lock has been released while spinning for * the turnstile chain lock. */ if (v == MTX_UNOWNED) { turnstile_cancel(ts); continue; } #ifdef ADAPTIVE_MUTEXES /* * The current lock owner might have started executing * on another CPU (or the lock could have changed * owners) while we were waiting on the turnstile * chain lock. If so, drop the turnstile lock and try * again. */ owner = lv_mtx_owner(v); if (TD_IS_RUNNING(owner)) { turnstile_cancel(ts); continue; } #endif /* * If the mutex isn't already contested and a failure occurs * setting the contested bit, the mutex was either released * or the state of the MTX_RECURSED bit changed. */ if ((v & MTX_CONTESTED) == 0 && !atomic_cmpset_ptr(&m->mtx_lock, v, v | MTX_CONTESTED)) { turnstile_cancel(ts); v = MTX_READ_VALUE(m); continue; } /* * We definitely must sleep for this lock. */ mtx_assert(m, MA_NOTOWNED); #ifdef KTR if (!cont_logged) { CTR6(KTR_CONTENTION, "contention: %p at %s:%d wants %s, taken by %s:%d", (void *)tid, file, line, m->lock_object.lo_name, WITNESS_FILE(&m->lock_object), WITNESS_LINE(&m->lock_object)); cont_logged = 1; } #endif /* * Block on the turnstile. */ #ifdef KDTRACE_HOOKS sleep_time -= lockstat_nsecs(&m->lock_object); #endif turnstile_wait(ts, mtx_owner(m), TS_EXCLUSIVE_QUEUE); #ifdef KDTRACE_HOOKS sleep_time += lockstat_nsecs(&m->lock_object); sleep_cnt++; #endif v = MTX_READ_VALUE(m); } #ifdef KTR if (cont_logged) { CTR4(KTR_CONTENTION, "contention end: %s acquired by %p at %s:%d", m->lock_object.lo_name, (void *)tid, file, line); } #endif #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) if (__predict_true(!doing_lockprof)) return; #endif #ifdef KDTRACE_HOOKS all_time += lockstat_nsecs(&m->lock_object); #endif LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire, m, contested, waittime, file, line); #ifdef KDTRACE_HOOKS if (sleep_time) LOCKSTAT_RECORD1(adaptive__block, m, sleep_time); /* * Only record the loops spinning and not sleeping. */ if (lda.spin_cnt > sleep_cnt) LOCKSTAT_RECORD1(adaptive__spin, m, all_time - sleep_time); #endif } static void _mtx_lock_spin_failed(struct mtx *m) { struct thread *td; td = mtx_owner(m); /* If the mutex is unlocked, try again. */ if (td == NULL) return; printf( "spin lock %p (%s) held by %p (tid %d) too long\n", m, m->lock_object.lo_name, td, td->td_tid); #ifdef WITNESS witness_display_spinlock(&m->lock_object, td, printf); #endif panic("spin lock held too long"); } #ifdef SMP /* * _mtx_lock_spin_cookie: the tougher part of acquiring an MTX_SPIN lock. * * This is only called if we need to actually spin for the lock. Recursion * is handled inline. */ void _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t v, uintptr_t tid, int opts, const char *file, int line) { struct mtx *m; struct lock_delay_arg lda; #ifdef LOCK_PROFILING int contested = 0; uint64_t waittime = 0; #endif #ifdef KDTRACE_HOOKS int64_t spin_time = 0; #endif #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) int doing_lockprof; #endif if (SCHEDULER_STOPPED()) return; lock_delay_arg_init(&lda, &mtx_spin_delay); m = mtxlock2mtx(c); if (__predict_false(v == MTX_UNOWNED)) v = MTX_READ_VALUE(m); if (__predict_false(v == tid)) { m->mtx_recurse++; return; } if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m); KTR_STATE1(KTR_SCHED, "thread", sched_tdname((struct thread *)tid), "spinning", "lockname:\"%s\"", m->lock_object.lo_name); #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&m->lock_object, &contested, &waittime); #ifdef LOCK_PROFILING doing_lockprof = 1; #elif defined(KDTRACE_HOOKS) doing_lockprof = lockstat_enabled; if (__predict_false(doing_lockprof)) spin_time -= lockstat_nsecs(&m->lock_object); #endif for (;;) { if (v == MTX_UNOWNED) { if (_mtx_obtain_lock_fetch(m, &v, tid)) break; continue; } /* Give interrupts a chance while we spin. */ spinlock_exit(); do { if (lda.spin_cnt < 10000000) { lock_delay(&lda); } else { lda.spin_cnt++; if (lda.spin_cnt < 60000000 || kdb_active || panicstr != NULL) DELAY(1); else _mtx_lock_spin_failed(m); cpu_spinwait(); } v = MTX_READ_VALUE(m); } while (v != MTX_UNOWNED); spinlock_enter(); } if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m); KTR_STATE0(KTR_SCHED, "thread", sched_tdname((struct thread *)tid), "running"); #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) if (__predict_true(!doing_lockprof)) return; #endif #ifdef KDTRACE_HOOKS spin_time += lockstat_nsecs(&m->lock_object); #endif LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(spin__acquire, m, contested, waittime, file, line); #ifdef KDTRACE_HOOKS if (spin_time != 0) LOCKSTAT_RECORD1(spin__spin, m, spin_time); #endif } #endif /* SMP */ void thread_lock_flags_(struct thread *td, int opts, const char *file, int line) { struct mtx *m; uintptr_t tid, v; struct lock_delay_arg lda; #ifdef LOCK_PROFILING int contested = 0; uint64_t waittime = 0; #endif #ifdef KDTRACE_HOOKS int64_t spin_time = 0; #endif #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) int doing_lockprof = 1; #endif tid = (uintptr_t)curthread; if (SCHEDULER_STOPPED()) { /* * Ensure that spinlock sections are balanced even when the * scheduler is stopped, since we may otherwise inadvertently * re-enable interrupts while dumping core. */ spinlock_enter(); return; } lock_delay_arg_init(&lda, &mtx_spin_delay); #ifdef LOCK_PROFILING doing_lockprof = 1; #elif defined(KDTRACE_HOOKS) doing_lockprof = lockstat_enabled; if (__predict_false(doing_lockprof)) spin_time -= lockstat_nsecs(&td->td_lock->lock_object); #endif for (;;) { retry: v = MTX_UNOWNED; spinlock_enter(); m = td->td_lock; KASSERT(m->mtx_lock != MTX_DESTROYED, ("thread_lock() of destroyed mutex @ %s:%d", file, line)); KASSERT(LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin, ("thread_lock() of sleep mutex %s @ %s:%d", m->lock_object.lo_name, file, line)); if (mtx_owned(m)) KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0, ("thread_lock: recursed on non-recursive mutex %s @ %s:%d\n", m->lock_object.lo_name, file, line)); WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); for (;;) { if (_mtx_obtain_lock_fetch(m, &v, tid)) break; if (v == MTX_UNOWNED) continue; if (v == tid) { m->mtx_recurse++; break; } #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&m->lock_object, &contested, &waittime); /* Give interrupts a chance while we spin. */ spinlock_exit(); do { if (lda.spin_cnt < 10000000) { lock_delay(&lda); } else { lda.spin_cnt++; if (lda.spin_cnt < 60000000 || kdb_active || panicstr != NULL) DELAY(1); else _mtx_lock_spin_failed(m); cpu_spinwait(); } if (m != td->td_lock) goto retry; v = MTX_READ_VALUE(m); } while (v != MTX_UNOWNED); spinlock_enter(); } if (m == td->td_lock) break; __mtx_unlock_spin(m); /* does spinlock_exit() */ } LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file, line); WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) if (__predict_true(!doing_lockprof)) return; #endif #ifdef KDTRACE_HOOKS spin_time += lockstat_nsecs(&m->lock_object); #endif if (m->mtx_recurse == 0) LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(spin__acquire, m, contested, waittime, file, line); #ifdef KDTRACE_HOOKS if (spin_time != 0) LOCKSTAT_RECORD1(thread__spin, m, spin_time); #endif } struct mtx * thread_lock_block(struct thread *td) { struct mtx *lock; THREAD_LOCK_ASSERT(td, MA_OWNED); lock = td->td_lock; td->td_lock = &blocked_lock; mtx_unlock_spin(lock); return (lock); } void thread_lock_unblock(struct thread *td, struct mtx *new) { mtx_assert(new, MA_OWNED); MPASS(td->td_lock == &blocked_lock); atomic_store_rel_ptr((volatile void *)&td->td_lock, (uintptr_t)new); } void thread_lock_set(struct thread *td, struct mtx *new) { struct mtx *lock; mtx_assert(new, MA_OWNED); THREAD_LOCK_ASSERT(td, MA_OWNED); lock = td->td_lock; td->td_lock = new; mtx_unlock_spin(lock); } /* * __mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock. * * We are only called here if the lock is recursed, contested (i.e. we * need to wake up a blocked thread) or lockstat probe is active. */ #if LOCK_DEBUG > 0 void __mtx_unlock_sleep(volatile uintptr_t *c, int opts, const char *file, int line) #else void __mtx_unlock_sleep(volatile uintptr_t *c) #endif { struct mtx *m; struct turnstile *ts; uintptr_t tid, v; if (SCHEDULER_STOPPED()) return; tid = (uintptr_t)curthread; m = mtxlock2mtx(c); v = MTX_READ_VALUE(m); if (v & MTX_RECURSED) { if (--(m->mtx_recurse) == 0) atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED); if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m); return; } LOCKSTAT_PROFILE_RELEASE_LOCK(adaptive__release, m); if (v == tid && _mtx_release_lock(m, tid)) return; /* * We have to lock the chain before the turnstile so this turnstile * can be removed from the hash list if it is empty. */ turnstile_chain_lock(&m->lock_object); ts = turnstile_lookup(&m->lock_object); if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m); MPASS(ts != NULL); turnstile_broadcast(ts, TS_EXCLUSIVE_QUEUE); _mtx_release_lock_quick(m); /* * This turnstile is now no longer associated with the mutex. We can * unlock the chain lock so a new turnstile may take it's place. */ turnstile_unpend(ts, TS_EXCLUSIVE_LOCK); turnstile_chain_unlock(&m->lock_object); } /* * All the unlocking of MTX_SPIN locks is done inline. * See the __mtx_unlock_spin() macro for the details. */ /* * The backing function for the INVARIANTS-enabled mtx_assert() */ #ifdef INVARIANT_SUPPORT void __mtx_assert(const volatile uintptr_t *c, int what, const char *file, int line) { const struct mtx *m; if (panicstr != NULL || dumping || SCHEDULER_STOPPED()) return; m = mtxlock2mtx(c); switch (what) { case MA_OWNED: case MA_OWNED | MA_RECURSED: case MA_OWNED | MA_NOTRECURSED: if (!mtx_owned(m)) panic("mutex %s not owned at %s:%d", m->lock_object.lo_name, file, line); if (mtx_recursed(m)) { if ((what & MA_NOTRECURSED) != 0) panic("mutex %s recursed at %s:%d", m->lock_object.lo_name, file, line); } else if ((what & MA_RECURSED) != 0) { panic("mutex %s unrecursed at %s:%d", m->lock_object.lo_name, file, line); } break; case MA_NOTOWNED: if (mtx_owned(m)) panic("mutex %s owned at %s:%d", m->lock_object.lo_name, file, line); break; default: panic("unknown mtx_assert at %s:%d", file, line); } } #endif /* * General init routine used by the MTX_SYSINIT() macro. */ void mtx_sysinit(void *arg) { struct mtx_args *margs = arg; mtx_init((struct mtx *)margs->ma_mtx, margs->ma_desc, NULL, margs->ma_opts); } /* * Mutex initialization routine; initialize lock `m' of type contained in * `opts' with options contained in `opts' and name `name.' The optional * lock type `type' is used as a general lock category name for use with * witness. */ void _mtx_init(volatile uintptr_t *c, const char *name, const char *type, int opts) { struct mtx *m; struct lock_class *class; int flags; m = mtxlock2mtx(c); MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE | MTX_NOWITNESS | MTX_DUPOK | MTX_NOPROFILE | MTX_NEW)) == 0); ASSERT_ATOMIC_LOAD_PTR(m->mtx_lock, ("%s: mtx_lock not aligned for %s: %p", __func__, name, &m->mtx_lock)); /* Determine lock class and lock flags. */ if (opts & MTX_SPIN) class = &lock_class_mtx_spin; else class = &lock_class_mtx_sleep; flags = 0; if (opts & MTX_QUIET) flags |= LO_QUIET; if (opts & MTX_RECURSE) flags |= LO_RECURSABLE; if ((opts & MTX_NOWITNESS) == 0) flags |= LO_WITNESS; if (opts & MTX_DUPOK) flags |= LO_DUPOK; if (opts & MTX_NOPROFILE) flags |= LO_NOPROFILE; if (opts & MTX_NEW) flags |= LO_NEW; /* Initialize mutex. */ lock_init(&m->lock_object, class, name, type, flags); m->mtx_lock = MTX_UNOWNED; m->mtx_recurse = 0; } /* * Remove lock `m' from all_mtx queue. We don't allow MTX_QUIET to be * passed in as a flag here because if the corresponding mtx_init() was * called with MTX_QUIET set, then it will already be set in the mutex's * flags. */ void _mtx_destroy(volatile uintptr_t *c) { struct mtx *m; m = mtxlock2mtx(c); if (!mtx_owned(m)) MPASS(mtx_unowned(m)); else { MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0); /* Perform the non-mtx related part of mtx_unlock_spin(). */ if (LOCK_CLASS(&m->lock_object) == &lock_class_mtx_spin) spinlock_exit(); else TD_LOCKS_DEC(curthread); lock_profile_release_lock(&m->lock_object); /* Tell witness this isn't locked to make it happy. */ WITNESS_UNLOCK(&m->lock_object, LOP_EXCLUSIVE, __FILE__, __LINE__); } m->mtx_lock = MTX_DESTROYED; lock_destroy(&m->lock_object); } /* * Intialize the mutex code and system mutexes. This is called from the MD * startup code prior to mi_startup(). The per-CPU data space needs to be * setup before this is called. */ void mutex_init(void) { /* Setup turnstiles so that sleep mutexes work. */ init_turnstiles(); /* * Initialize mutexes. */ mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE); mtx_init(&blocked_lock, "blocked lock", NULL, MTX_SPIN); blocked_lock.mtx_lock = 0xdeadc0de; /* Always blocked. */ mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK); mtx_init(&proc0.p_slock, "process slock", NULL, MTX_SPIN); mtx_init(&proc0.p_statmtx, "pstatl", NULL, MTX_SPIN); mtx_init(&proc0.p_itimmtx, "pitiml", NULL, MTX_SPIN); mtx_init(&proc0.p_profmtx, "pprofl", NULL, MTX_SPIN); mtx_init(&devmtx, "cdev", NULL, MTX_DEF); mtx_lock(&Giant); } #ifdef DDB void db_show_mtx(const struct lock_object *lock) { struct thread *td; const struct mtx *m; m = (const struct mtx *)lock; db_printf(" flags: {"); if (LOCK_CLASS(lock) == &lock_class_mtx_spin) db_printf("SPIN"); else db_printf("DEF"); if (m->lock_object.lo_flags & LO_RECURSABLE) db_printf(", RECURSE"); if (m->lock_object.lo_flags & LO_DUPOK) db_printf(", DUPOK"); db_printf("}\n"); db_printf(" state: {"); if (mtx_unowned(m)) db_printf("UNOWNED"); else if (mtx_destroyed(m)) db_printf("DESTROYED"); else { db_printf("OWNED"); if (m->mtx_lock & MTX_CONTESTED) db_printf(", CONTESTED"); if (m->mtx_lock & MTX_RECURSED) db_printf(", RECURSED"); } db_printf("}\n"); if (!mtx_unowned(m) && !mtx_destroyed(m)) { td = mtx_owner(m); db_printf(" owner: %p (tid %d, pid %d, \"%s\")\n", td, td->td_tid, td->td_proc->p_pid, td->td_name); if (mtx_recursed(m)) db_printf(" recursed: %d\n", m->mtx_recurse); } } #endif Index: projects/clang500-import/sys/netinet/ip_icmp.c =================================================================== --- projects/clang500-import/sys/netinet/ip_icmp.c (revision 319250) +++ projects/clang500-import/sys/netinet/ip_icmp.c (revision 319251) @@ -1,1036 +1,1034 @@ /*- * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #endif /* INET */ /* * ICMP routines: error generation, receive packet processing, and * routines to turnaround packets back to the originator, and * host table maintenance routines. */ static VNET_DEFINE(int, icmplim) = 200; #define V_icmplim VNET(icmplim) SYSCTL_INT(_net_inet_icmp, ICMPCTL_ICMPLIM, icmplim, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmplim), 0, "Maximum number of ICMP responses per second"); static VNET_DEFINE(int, icmplim_output) = 1; #define V_icmplim_output VNET(icmplim_output) SYSCTL_INT(_net_inet_icmp, OID_AUTO, icmplim_output, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmplim_output), 0, "Enable logging of ICMP response rate limiting"); #ifdef INET VNET_PCPUSTAT_DEFINE(struct icmpstat, icmpstat); VNET_PCPUSTAT_SYSINIT(icmpstat); SYSCTL_VNET_PCPUSTAT(_net_inet_icmp, ICMPCTL_STATS, stats, struct icmpstat, icmpstat, "ICMP statistics (struct icmpstat, netinet/icmp_var.h)"); #ifdef VIMAGE VNET_PCPUSTAT_SYSUNINIT(icmpstat); #endif /* VIMAGE */ static VNET_DEFINE(int, icmpmaskrepl) = 0; #define V_icmpmaskrepl VNET(icmpmaskrepl) SYSCTL_INT(_net_inet_icmp, ICMPCTL_MASKREPL, maskrepl, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmpmaskrepl), 0, "Reply to ICMP Address Mask Request packets"); static VNET_DEFINE(u_int, icmpmaskfake) = 0; #define V_icmpmaskfake VNET(icmpmaskfake) SYSCTL_UINT(_net_inet_icmp, OID_AUTO, maskfake, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmpmaskfake), 0, "Fake reply to ICMP Address Mask Request packets"); VNET_DEFINE(int, drop_redirect) = 0; #define V_drop_redirect VNET(drop_redirect) SYSCTL_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(drop_redirect), 0, "Ignore ICMP redirects"); static VNET_DEFINE(int, log_redirect) = 0; #define V_log_redirect VNET(log_redirect) SYSCTL_INT(_net_inet_icmp, OID_AUTO, log_redirect, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(log_redirect), 0, "Log ICMP redirects to the console"); static VNET_DEFINE(char, reply_src[IFNAMSIZ]); #define V_reply_src VNET(reply_src) SYSCTL_STRING(_net_inet_icmp, OID_AUTO, reply_src, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(reply_src), IFNAMSIZ, "ICMP reply source for non-local packets"); static VNET_DEFINE(int, icmp_rfi) = 0; #define V_icmp_rfi VNET(icmp_rfi) SYSCTL_INT(_net_inet_icmp, OID_AUTO, reply_from_interface, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp_rfi), 0, "ICMP reply from incoming interface for non-local packets"); static VNET_DEFINE(int, icmp_quotelen) = 8; #define V_icmp_quotelen VNET(icmp_quotelen) SYSCTL_INT(_net_inet_icmp, OID_AUTO, quotelen, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp_quotelen), 0, "Number of bytes from original packet to quote in ICMP reply"); static VNET_DEFINE(int, icmpbmcastecho) = 0; #define V_icmpbmcastecho VNET(icmpbmcastecho) SYSCTL_INT(_net_inet_icmp, OID_AUTO, bmcastecho, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmpbmcastecho), 0, "Reply to multicast ICMP Echo Request and Timestamp packets"); static VNET_DEFINE(int, icmptstamprepl) = 1; #define V_icmptstamprepl VNET(icmptstamprepl) SYSCTL_INT(_net_inet_icmp, OID_AUTO, tstamprepl, CTLFLAG_RW, &VNET_NAME(icmptstamprepl), 0, "Respond to ICMP Timestamp packets"); #ifdef ICMPPRINTFS int icmpprintfs = 0; #endif static void icmp_reflect(struct mbuf *); static void icmp_send(struct mbuf *, struct mbuf *); extern struct protosw inetsw[]; /* * Kernel module interface for updating icmpstat. The argument is an index * into icmpstat treated as an array of u_long. While this encodes the * general layout of icmpstat into the caller, it doesn't encode its * location, so that future changes to add, for example, per-CPU stats * support won't cause binary compatibility problems for kernel modules. */ void kmod_icmpstat_inc(int statnum) { counter_u64_add(VNET(icmpstat)[statnum], 1); } /* * Generate an error packet of type error * in response to bad packet ip. */ void icmp_error(struct mbuf *n, int type, int code, uint32_t dest, int mtu) { struct ip *oip = mtod(n, struct ip *), *nip; unsigned oiphlen = oip->ip_hl << 2; struct icmp *icp; struct mbuf *m; unsigned icmplen, icmpelen, nlen; KASSERT((u_int)type <= ICMP_MAXTYPE, ("%s: illegal ICMP type", __func__)); #ifdef ICMPPRINTFS if (icmpprintfs) printf("icmp_error(%p, %x, %d)\n", oip, type, code); #endif if (type != ICMP_REDIRECT) ICMPSTAT_INC(icps_error); /* * Don't send error: * if the original packet was encrypted. * if not the first fragment of message. * in response to a multicast or broadcast packet. * if the old packet protocol was an ICMP error message. */ if (n->m_flags & M_DECRYPTED) goto freeit; if (oip->ip_off & htons(~(IP_MF|IP_DF))) goto freeit; if (n->m_flags & (M_BCAST|M_MCAST)) goto freeit; if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT && n->m_len >= oiphlen + ICMP_MINLEN && !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiphlen))->icmp_type)) { ICMPSTAT_INC(icps_oldicmp); goto freeit; } /* Drop if IP header plus 8 bytes is not contignous in first mbuf. */ if (oiphlen + 8 > n->m_len) goto freeit; /* * Calculate length to quote from original packet and * prevent the ICMP mbuf from overflowing. * Unfortunately this is non-trivial since ip_forward() * sends us truncated packets. */ nlen = m_length(n, NULL); if (oip->ip_p == IPPROTO_TCP) { struct tcphdr *th; int tcphlen; if (oiphlen + sizeof(struct tcphdr) > n->m_len && n->m_next == NULL) goto stdreply; if (n->m_len < oiphlen + sizeof(struct tcphdr) && ((n = m_pullup(n, oiphlen + sizeof(struct tcphdr))) == NULL)) goto freeit; th = (struct tcphdr *)((caddr_t)oip + oiphlen); tcphlen = th->th_off << 2; if (tcphlen < sizeof(struct tcphdr)) goto freeit; if (ntohs(oip->ip_len) < oiphlen + tcphlen) goto freeit; if (oiphlen + tcphlen > n->m_len && n->m_next == NULL) goto stdreply; if (n->m_len < oiphlen + tcphlen && ((n = m_pullup(n, oiphlen + tcphlen)) == NULL)) goto freeit; icmpelen = max(tcphlen, min(V_icmp_quotelen, ntohs(oip->ip_len) - oiphlen)); } else if (oip->ip_p == IPPROTO_SCTP) { struct sctphdr *sh; struct sctp_chunkhdr *ch; if (ntohs(oip->ip_len) < oiphlen + sizeof(struct sctphdr)) goto stdreply; if (oiphlen + sizeof(struct sctphdr) > n->m_len && n->m_next == NULL) goto stdreply; if (n->m_len < oiphlen + sizeof(struct sctphdr) && (n = m_pullup(n, oiphlen + sizeof(struct sctphdr))) == NULL) goto freeit; icmpelen = max(sizeof(struct sctphdr), min(V_icmp_quotelen, ntohs(oip->ip_len) - oiphlen)); sh = (struct sctphdr *)((caddr_t)oip + oiphlen); if (ntohl(sh->v_tag) == 0 && ntohs(oip->ip_len) >= oiphlen + sizeof(struct sctphdr) + 8 && (n->m_len >= oiphlen + sizeof(struct sctphdr) + 8 || n->m_next != NULL)) { if (n->m_len < oiphlen + sizeof(struct sctphdr) + 8 && (n = m_pullup(n, oiphlen + sizeof(struct sctphdr) + 8)) == NULL) goto freeit; ch = (struct sctp_chunkhdr *)(sh + 1); if (ch->chunk_type == SCTP_INITIATION) { icmpelen = max(sizeof(struct sctphdr) + 8, min(V_icmp_quotelen, ntohs(oip->ip_len) - oiphlen)); } } } else stdreply: icmpelen = max(8, min(V_icmp_quotelen, ntohs(oip->ip_len) - oiphlen)); icmplen = min(oiphlen + icmpelen, nlen); if (icmplen < sizeof(struct ip)) goto freeit; if (MHLEN > sizeof(struct ip) + ICMP_MINLEN + icmplen) m = m_gethdr(M_NOWAIT, MT_DATA); else m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) goto freeit; #ifdef MAC mac_netinet_icmp_reply(n, m); #endif icmplen = min(icmplen, M_TRAILINGSPACE(m) - sizeof(struct ip) - ICMP_MINLEN); m_align(m, ICMP_MINLEN + icmplen); m->m_len = ICMP_MINLEN + icmplen; /* XXX MRT make the outgoing packet use the same FIB * that was associated with the incoming packet */ M_SETFIB(m, M_GETFIB(n)); icp = mtod(m, struct icmp *); ICMPSTAT_INC(icps_outhist[type]); icp->icmp_type = type; if (type == ICMP_REDIRECT) icp->icmp_gwaddr.s_addr = dest; else { icp->icmp_void = 0; /* * The following assignments assume an overlay with the * just zeroed icmp_void field. */ if (type == ICMP_PARAMPROB) { icp->icmp_pptr = code; code = 0; } else if (type == ICMP_UNREACH && code == ICMP_UNREACH_NEEDFRAG && mtu) { icp->icmp_nextmtu = htons(mtu); } } icp->icmp_code = code; /* * Copy the quotation into ICMP message and * convert quoted IP header back to network representation. */ m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip); nip = &icp->icmp_ip; /* * Set up ICMP message mbuf and copy old IP header (without options * in front of ICMP message. * If the original mbuf was meant to bypass the firewall, the error * reply should bypass as well. */ m->m_flags |= n->m_flags & M_SKIP_FIREWALL; m->m_data -= sizeof(struct ip); m->m_len += sizeof(struct ip); m->m_pkthdr.len = m->m_len; m->m_pkthdr.rcvif = n->m_pkthdr.rcvif; nip = mtod(m, struct ip *); bcopy((caddr_t)oip, (caddr_t)nip, sizeof(struct ip)); nip->ip_len = htons(m->m_len); nip->ip_v = IPVERSION; nip->ip_hl = 5; nip->ip_p = IPPROTO_ICMP; nip->ip_tos = 0; nip->ip_off = 0; icmp_reflect(m); freeit: m_freem(n); } /* * Process a received ICMP message. */ int icmp_input(struct mbuf **mp, int *offp, int proto) { struct icmp *icp; struct in_ifaddr *ia; struct mbuf *m = *mp; struct ip *ip = mtod(m, struct ip *); struct sockaddr_in icmpsrc, icmpdst, icmpgw; int hlen = *offp; int icmplen = ntohs(ip->ip_len) - *offp; int i, code; void (*ctlfunc)(int, struct sockaddr *, void *); int fibnum; *mp = NULL; /* * Locate icmp structure in mbuf, and check * that not corrupted and of at least minimum length. */ #ifdef ICMPPRINTFS if (icmpprintfs) { char srcbuf[INET_ADDRSTRLEN]; char dstbuf[INET_ADDRSTRLEN]; printf("icmp_input from %s to %s, len %d\n", inet_ntoa_r(ip->ip_src, srcbuf), inet_ntoa_r(ip->ip_dst, dstbuf), icmplen); } #endif if (icmplen < ICMP_MINLEN) { ICMPSTAT_INC(icps_tooshort); goto freeit; } i = hlen + min(icmplen, ICMP_ADVLENMIN); if (m->m_len < i && (m = m_pullup(m, i)) == NULL) { ICMPSTAT_INC(icps_tooshort); return (IPPROTO_DONE); } ip = mtod(m, struct ip *); m->m_len -= hlen; m->m_data += hlen; icp = mtod(m, struct icmp *); if (in_cksum(m, icmplen)) { ICMPSTAT_INC(icps_checksum); goto freeit; } m->m_len += hlen; m->m_data -= hlen; #ifdef ICMPPRINTFS if (icmpprintfs) printf("icmp_input, type %d code %d\n", icp->icmp_type, icp->icmp_code); #endif /* * Message type specific processing. */ if (icp->icmp_type > ICMP_MAXTYPE) goto raw; /* Initialize */ bzero(&icmpsrc, sizeof(icmpsrc)); icmpsrc.sin_len = sizeof(struct sockaddr_in); icmpsrc.sin_family = AF_INET; bzero(&icmpdst, sizeof(icmpdst)); icmpdst.sin_len = sizeof(struct sockaddr_in); icmpdst.sin_family = AF_INET; bzero(&icmpgw, sizeof(icmpgw)); icmpgw.sin_len = sizeof(struct sockaddr_in); icmpgw.sin_family = AF_INET; ICMPSTAT_INC(icps_inhist[icp->icmp_type]); code = icp->icmp_code; switch (icp->icmp_type) { case ICMP_UNREACH: switch (code) { case ICMP_UNREACH_NET: case ICMP_UNREACH_HOST: case ICMP_UNREACH_SRCFAIL: case ICMP_UNREACH_NET_UNKNOWN: case ICMP_UNREACH_HOST_UNKNOWN: case ICMP_UNREACH_ISOLATED: case ICMP_UNREACH_TOSNET: case ICMP_UNREACH_TOSHOST: case ICMP_UNREACH_HOST_PRECEDENCE: case ICMP_UNREACH_PRECEDENCE_CUTOFF: code = PRC_UNREACH_NET; break; case ICMP_UNREACH_NEEDFRAG: code = PRC_MSGSIZE; break; /* * RFC 1122, Sections 3.2.2.1 and 4.2.3.9. * Treat subcodes 2,3 as immediate RST */ case ICMP_UNREACH_PROTOCOL: code = PRC_UNREACH_PROTOCOL; break; case ICMP_UNREACH_PORT: code = PRC_UNREACH_PORT; break; case ICMP_UNREACH_NET_PROHIB: case ICMP_UNREACH_HOST_PROHIB: case ICMP_UNREACH_FILTER_PROHIB: code = PRC_UNREACH_ADMIN_PROHIB; break; default: goto badcode; } goto deliver; case ICMP_TIMXCEED: if (code > 1) goto badcode; code += PRC_TIMXCEED_INTRANS; goto deliver; case ICMP_PARAMPROB: if (code > 1) goto badcode; code = PRC_PARAMPROB; deliver: /* * Problem with datagram; advise higher level routines. */ if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) || icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) { ICMPSTAT_INC(icps_badlen); goto freeit; } /* Discard ICMP's in response to multicast packets */ if (IN_MULTICAST(ntohl(icp->icmp_ip.ip_dst.s_addr))) goto badcode; #ifdef ICMPPRINTFS if (icmpprintfs) printf("deliver to protocol %d\n", icp->icmp_ip.ip_p); #endif icmpsrc.sin_addr = icp->icmp_ip.ip_dst; /* * XXX if the packet contains [IPv4 AH TCP], we can't make a * notification to TCP layer. */ i = sizeof(struct ip) + min(icmplen, ICMP_ADVLENPREF(icp)); ip_stripoptions(m); if (m->m_len < i && (m = m_pullup(m, i)) == NULL) { /* This should actually not happen */ ICMPSTAT_INC(icps_tooshort); return (IPPROTO_DONE); } ip = mtod(m, struct ip *); icp = (struct icmp *)(ip + 1); /* * The upper layer handler can rely on: * - The outer IP header has no options. * - The outer IP header, the ICMP header, the inner IP header, * and the first n bytes of the inner payload are contiguous. * n is at least 8, but might be larger based on * ICMP_ADVLENPREF. See its definition in ip_icmp.h. */ ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput; if (ctlfunc) (*ctlfunc)(code, (struct sockaddr *)&icmpsrc, (void *)&icp->icmp_ip); break; badcode: ICMPSTAT_INC(icps_badcode); break; case ICMP_ECHO: if (!V_icmpbmcastecho && (m->m_flags & (M_MCAST | M_BCAST)) != 0) { ICMPSTAT_INC(icps_bmcastecho); break; } - icp->icmp_type = ICMP_ECHOREPLY; if (badport_bandlim(BANDLIM_ICMP_ECHO) < 0) goto freeit; - else - goto reflect; + icp->icmp_type = ICMP_ECHOREPLY; + goto reflect; case ICMP_TSTAMP: if (V_icmptstamprepl == 0) break; if (!V_icmpbmcastecho && (m->m_flags & (M_MCAST | M_BCAST)) != 0) { ICMPSTAT_INC(icps_bmcasttstamp); break; } if (icmplen < ICMP_TSLEN) { ICMPSTAT_INC(icps_badlen); break; } + if (badport_bandlim(BANDLIM_ICMP_TSTAMP) < 0) + goto freeit; icp->icmp_type = ICMP_TSTAMPREPLY; icp->icmp_rtime = iptime(); icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */ - if (badport_bandlim(BANDLIM_ICMP_TSTAMP) < 0) - goto freeit; - else - goto reflect; + goto reflect; case ICMP_MASKREQ: if (V_icmpmaskrepl == 0) break; /* * We are not able to respond with all ones broadcast * unless we receive it over a point-to-point interface. */ if (icmplen < ICMP_MASKLEN) break; switch (ip->ip_dst.s_addr) { case INADDR_BROADCAST: case INADDR_ANY: icmpdst.sin_addr = ip->ip_src; break; default: icmpdst.sin_addr = ip->ip_dst; } ia = (struct in_ifaddr *)ifaof_ifpforaddr( (struct sockaddr *)&icmpdst, m->m_pkthdr.rcvif); if (ia == NULL) break; if (ia->ia_ifp == NULL) { ifa_free(&ia->ia_ifa); break; } icp->icmp_type = ICMP_MASKREPLY; if (V_icmpmaskfake == 0) icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr; else icp->icmp_mask = V_icmpmaskfake; if (ip->ip_src.s_addr == 0) { if (ia->ia_ifp->if_flags & IFF_BROADCAST) ip->ip_src = satosin(&ia->ia_broadaddr)->sin_addr; else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT) ip->ip_src = satosin(&ia->ia_dstaddr)->sin_addr; } ifa_free(&ia->ia_ifa); reflect: ICMPSTAT_INC(icps_reflect); ICMPSTAT_INC(icps_outhist[icp->icmp_type]); icmp_reflect(m); return (IPPROTO_DONE); case ICMP_REDIRECT: if (V_log_redirect) { u_long src, dst, gw; src = ntohl(ip->ip_src.s_addr); dst = ntohl(icp->icmp_ip.ip_dst.s_addr); gw = ntohl(icp->icmp_gwaddr.s_addr); printf("icmp redirect from %d.%d.%d.%d: " "%d.%d.%d.%d => %d.%d.%d.%d\n", (int)(src >> 24), (int)((src >> 16) & 0xff), (int)((src >> 8) & 0xff), (int)(src & 0xff), (int)(dst >> 24), (int)((dst >> 16) & 0xff), (int)((dst >> 8) & 0xff), (int)(dst & 0xff), (int)(gw >> 24), (int)((gw >> 16) & 0xff), (int)((gw >> 8) & 0xff), (int)(gw & 0xff)); } /* * RFC1812 says we must ignore ICMP redirects if we * are acting as router. */ if (V_drop_redirect || V_ipforwarding) break; if (code > 3) goto badcode; if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) || icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) { ICMPSTAT_INC(icps_badlen); break; } /* * Short circuit routing redirects to force * immediate change in the kernel's routing * tables. The message is also handed to anyone * listening on a raw socket (e.g. the routing * daemon for use in updating its tables). */ icmpgw.sin_addr = ip->ip_src; icmpdst.sin_addr = icp->icmp_gwaddr; #ifdef ICMPPRINTFS if (icmpprintfs) { char dstbuf[INET_ADDRSTRLEN]; char gwbuf[INET_ADDRSTRLEN]; printf("redirect dst %s to %s\n", inet_ntoa_r(icp->icmp_ip.ip_dst, dstbuf), inet_ntoa_r(icp->icmp_gwaddr, gwbuf)); } #endif icmpsrc.sin_addr = icp->icmp_ip.ip_dst; for ( fibnum = 0; fibnum < rt_numfibs; fibnum++) { in_rtredirect((struct sockaddr *)&icmpsrc, (struct sockaddr *)&icmpdst, (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST, (struct sockaddr *)&icmpgw, fibnum); } pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&icmpsrc); break; /* * No kernel processing for the following; * just fall through to send to raw listener. */ case ICMP_ECHOREPLY: case ICMP_ROUTERADVERT: case ICMP_ROUTERSOLICIT: case ICMP_TSTAMPREPLY: case ICMP_IREQREPLY: case ICMP_MASKREPLY: case ICMP_SOURCEQUENCH: default: break; } raw: *mp = m; rip_input(mp, offp, proto); return (IPPROTO_DONE); freeit: m_freem(m); return (IPPROTO_DONE); } /* * Reflect the ip packet back to the source */ static void icmp_reflect(struct mbuf *m) { struct rm_priotracker in_ifa_tracker; struct ip *ip = mtod(m, struct ip *); struct ifaddr *ifa; struct ifnet *ifp; struct in_ifaddr *ia; struct in_addr t; struct nhop4_extended nh_ext; struct mbuf *opts = NULL; int optlen = (ip->ip_hl << 2) - sizeof(struct ip); if (IN_MULTICAST(ntohl(ip->ip_src.s_addr)) || IN_EXPERIMENTAL(ntohl(ip->ip_src.s_addr)) || IN_ZERONET(ntohl(ip->ip_src.s_addr)) ) { m_freem(m); /* Bad return address */ ICMPSTAT_INC(icps_badaddr); goto done; /* Ip_output() will check for broadcast */ } t = ip->ip_dst; ip->ip_dst = ip->ip_src; /* * Source selection for ICMP replies: * * If the incoming packet was addressed directly to one of our * own addresses, use dst as the src for the reply. */ IN_IFADDR_RLOCK(&in_ifa_tracker); LIST_FOREACH(ia, INADDR_HASH(t.s_addr), ia_hash) { if (t.s_addr == IA_SIN(ia)->sin_addr.s_addr) { t = IA_SIN(ia)->sin_addr; IN_IFADDR_RUNLOCK(&in_ifa_tracker); goto match; } } IN_IFADDR_RUNLOCK(&in_ifa_tracker); /* * If the incoming packet was addressed to one of our broadcast * addresses, use the first non-broadcast address which corresponds * to the incoming interface. */ ifp = m->m_pkthdr.rcvif; if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET) continue; ia = ifatoia(ifa); if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == t.s_addr) { t = IA_SIN(ia)->sin_addr; IF_ADDR_RUNLOCK(ifp); goto match; } } IF_ADDR_RUNLOCK(ifp); } /* * If the packet was transiting through us, use the address of * the interface the packet came through in. If that interface * doesn't have a suitable IP address, the normal selection * criteria apply. */ if (V_icmp_rfi && ifp != NULL) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET) continue; ia = ifatoia(ifa); t = IA_SIN(ia)->sin_addr; IF_ADDR_RUNLOCK(ifp); goto match; } IF_ADDR_RUNLOCK(ifp); } /* * If the incoming packet was not addressed directly to us, use * designated interface for icmp replies specified by sysctl * net.inet.icmp.reply_src (default not set). Otherwise continue * with normal source selection. */ if (V_reply_src[0] != '\0' && (ifp = ifunit(V_reply_src))) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET) continue; ia = ifatoia(ifa); t = IA_SIN(ia)->sin_addr; IF_ADDR_RUNLOCK(ifp); goto match; } IF_ADDR_RUNLOCK(ifp); } /* * If the packet was transiting through us, use the address of * the interface that is the closest to the packet source. * When we don't have a route back to the packet source, stop here * and drop the packet. */ if (fib4_lookup_nh_ext(M_GETFIB(m), ip->ip_dst, 0, 0, &nh_ext) != 0) { m_freem(m); ICMPSTAT_INC(icps_noroute); goto done; } t = nh_ext.nh_src; match: #ifdef MAC mac_netinet_icmp_replyinplace(m); #endif ip->ip_src = t; ip->ip_ttl = V_ip_defttl; if (optlen > 0) { u_char *cp; int opt, cnt; u_int len; /* * Retrieve any source routing from the incoming packet; * add on any record-route or timestamp options. */ cp = (u_char *) (ip + 1); if ((opts = ip_srcroute(m)) == NULL && (opts = m_gethdr(M_NOWAIT, MT_DATA))) { opts->m_len = sizeof(struct in_addr); mtod(opts, struct in_addr *)->s_addr = 0; } if (opts) { #ifdef ICMPPRINTFS if (icmpprintfs) printf("icmp_reflect optlen %d rt %d => ", optlen, opts->m_len); #endif for (cnt = optlen; cnt > 0; cnt -= len, cp += len) { opt = cp[IPOPT_OPTVAL]; if (opt == IPOPT_EOL) break; if (opt == IPOPT_NOP) len = 1; else { if (cnt < IPOPT_OLEN + sizeof(*cp)) break; len = cp[IPOPT_OLEN]; if (len < IPOPT_OLEN + sizeof(*cp) || len > cnt) break; } /* * Should check for overflow, but it "can't happen" */ if (opt == IPOPT_RR || opt == IPOPT_TS || opt == IPOPT_SECURITY) { bcopy((caddr_t)cp, mtod(opts, caddr_t) + opts->m_len, len); opts->m_len += len; } } /* Terminate & pad, if necessary */ cnt = opts->m_len % 4; if (cnt) { for (; cnt < 4; cnt++) { *(mtod(opts, caddr_t) + opts->m_len) = IPOPT_EOL; opts->m_len++; } } #ifdef ICMPPRINTFS if (icmpprintfs) printf("%d\n", opts->m_len); #endif } ip_stripoptions(m); } m_tag_delete_nonpersistent(m); m->m_flags &= ~(M_BCAST|M_MCAST); icmp_send(m, opts); done: if (opts) (void)m_free(opts); } /* * Send an icmp packet back to the ip level, * after supplying a checksum. */ static void icmp_send(struct mbuf *m, struct mbuf *opts) { struct ip *ip = mtod(m, struct ip *); int hlen; struct icmp *icp; hlen = ip->ip_hl << 2; m->m_data += hlen; m->m_len -= hlen; icp = mtod(m, struct icmp *); icp->icmp_cksum = 0; icp->icmp_cksum = in_cksum(m, ntohs(ip->ip_len) - hlen); m->m_data -= hlen; m->m_len += hlen; m->m_pkthdr.rcvif = (struct ifnet *)0; #ifdef ICMPPRINTFS if (icmpprintfs) { char dstbuf[INET_ADDRSTRLEN]; char srcbuf[INET_ADDRSTRLEN]; printf("icmp_send dst %s src %s\n", inet_ntoa_r(ip->ip_dst, dstbuf), inet_ntoa_r(ip->ip_src, srcbuf)); } #endif (void) ip_output(m, opts, NULL, 0, NULL, NULL); } /* * Return milliseconds since 00:00 UTC in network format. */ uint32_t iptime(void) { struct timeval atv; u_long t; getmicrotime(&atv); t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000; return (htonl(t)); } /* * Return the next larger or smaller MTU plateau (table from RFC 1191) * given current value MTU. If DIR is less than zero, a larger plateau * is returned; otherwise, a smaller value is returned. */ int ip_next_mtu(int mtu, int dir) { static int mtutab[] = { 65535, 32000, 17914, 8166, 4352, 2002, 1492, 1280, 1006, 508, 296, 68, 0 }; int i, size; size = (sizeof mtutab) / (sizeof mtutab[0]); if (dir >= 0) { for (i = 0; i < size; i++) if (mtu > mtutab[i]) return mtutab[i]; } else { for (i = size - 1; i >= 0; i--) if (mtu < mtutab[i]) return mtutab[i]; if (mtu == mtutab[0]) return mtutab[0]; } return 0; } #endif /* INET */ /* * badport_bandlim() - check for ICMP bandwidth limit * * Return 0 if it is ok to send an ICMP error response, -1 if we have * hit our bandwidth limit and it is not ok. * * If icmplim is <= 0, the feature is disabled and 0 is returned. * * For now we separate the TCP and UDP subsystems w/ different 'which' * values. We may eventually remove this separation (and simplify the * code further). * * Note that the printing of the error message is delayed so we can * properly print the icmp error rate that the system was trying to do * (i.e. 22000/100 pps, etc...). This can cause long delays in printing * the 'final' error, but it doesn't make sense to solve the printing * delay with more complex code. */ struct icmp_rate { const char *descr; struct counter_rate cr; }; static VNET_DEFINE(struct icmp_rate, icmp_rates[BANDLIM_MAX]) = { { "icmp unreach response" }, { "icmp ping response" }, { "icmp tstamp response" }, { "closed port RST response" }, { "open port RST response" }, { "icmp6 unreach response" }, { "sctp ootb response" } }; #define V_icmp_rates VNET(icmp_rates) static void icmp_bandlimit_init(void) { for (int i = 0; i < BANDLIM_MAX; i++) { V_icmp_rates[i].cr.cr_rate = counter_u64_alloc(M_WAITOK); V_icmp_rates[i].cr.cr_ticks = ticks; } } VNET_SYSINIT(icmp_bandlimit, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, icmp_bandlimit_init, NULL); static void icmp_bandlimit_uninit(void) { for (int i = 0; i < BANDLIM_MAX; i++) counter_u64_free(V_icmp_rates[i].cr.cr_rate); } VNET_SYSUNINIT(icmp_bandlimit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, icmp_bandlimit_uninit, NULL); int badport_bandlim(int which) { int64_t pps; if (V_icmplim == 0 || which == BANDLIM_UNLIMITED) return (0); KASSERT(which >= 0 && which < BANDLIM_MAX, ("%s: which %d", __func__, which)); pps = counter_ratecheck(&V_icmp_rates[which].cr, V_icmplim); if (pps == -1) return (-1); if (pps > 0 && V_icmplim_output) log(LOG_NOTICE, "Limiting %s from %jd to %d packets/sec\n", V_icmp_rates[which].descr, (intmax_t )pps, V_icmplim); return (0); } Index: projects/clang500-import/sys/netinet6/icmp6.c =================================================================== --- projects/clang500-import/sys/netinet6/icmp6.c (revision 319250) +++ projects/clang500-import/sys/netinet6/icmp6.c (revision 319251) @@ -1,2813 +1,2814 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: icmp6.c,v 1.211 2001/04/04 05:56:20 itojun Exp $ */ /*- * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 */ #include __FBSDID("$FreeBSD$"); #define MBUF_PRIVATE /* XXXRW: Optimisation tries to avoid M_EXT mbufs */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern struct domain inet6domain; VNET_PCPUSTAT_DEFINE(struct icmp6stat, icmp6stat); VNET_PCPUSTAT_SYSINIT(icmp6stat); #ifdef VIMAGE VNET_PCPUSTAT_SYSUNINIT(icmp6stat); #endif /* VIMAGE */ VNET_DECLARE(struct inpcbinfo, ripcbinfo); VNET_DECLARE(struct inpcbhead, ripcb); VNET_DECLARE(int, icmp6errppslim); static VNET_DEFINE(int, icmp6errpps_count) = 0; static VNET_DEFINE(struct timeval, icmp6errppslim_last); VNET_DECLARE(int, icmp6_nodeinfo); #define V_ripcbinfo VNET(ripcbinfo) #define V_ripcb VNET(ripcb) #define V_icmp6errppslim VNET(icmp6errppslim) #define V_icmp6errpps_count VNET(icmp6errpps_count) #define V_icmp6errppslim_last VNET(icmp6errppslim_last) #define V_icmp6_nodeinfo VNET(icmp6_nodeinfo) static void icmp6_errcount(int, int); static int icmp6_rip6_input(struct mbuf **, int); static int icmp6_ratelimit(const struct in6_addr *, const int, const int); static const char *icmp6_redirect_diag(struct in6_addr *, struct in6_addr *, struct in6_addr *); static struct mbuf *ni6_input(struct mbuf *, int); static struct mbuf *ni6_nametodns(const char *, int, int); static int ni6_dnsmatch(const char *, int, const char *, int); static int ni6_addrs(struct icmp6_nodeinfo *, struct mbuf *, struct ifnet **, struct in6_addr *); static int ni6_store_addrs(struct icmp6_nodeinfo *, struct icmp6_nodeinfo *, struct ifnet *, int); static int icmp6_notify_error(struct mbuf **, int, int, int); /* * Kernel module interface for updating icmp6stat. The argument is an index * into icmp6stat treated as an array of u_quad_t. While this encodes the * general layout of icmp6stat into the caller, it doesn't encode its * location, so that future changes to add, for example, per-CPU stats * support won't cause binary compatibility problems for kernel modules. */ void kmod_icmp6stat_inc(int statnum) { counter_u64_add(VNET(icmp6stat)[statnum], 1); } static void icmp6_errcount(int type, int code) { switch (type) { case ICMP6_DST_UNREACH: switch (code) { case ICMP6_DST_UNREACH_NOROUTE: ICMP6STAT_INC(icp6s_odst_unreach_noroute); return; case ICMP6_DST_UNREACH_ADMIN: ICMP6STAT_INC(icp6s_odst_unreach_admin); return; case ICMP6_DST_UNREACH_BEYONDSCOPE: ICMP6STAT_INC(icp6s_odst_unreach_beyondscope); return; case ICMP6_DST_UNREACH_ADDR: ICMP6STAT_INC(icp6s_odst_unreach_addr); return; case ICMP6_DST_UNREACH_NOPORT: ICMP6STAT_INC(icp6s_odst_unreach_noport); return; } break; case ICMP6_PACKET_TOO_BIG: ICMP6STAT_INC(icp6s_opacket_too_big); return; case ICMP6_TIME_EXCEEDED: switch (code) { case ICMP6_TIME_EXCEED_TRANSIT: ICMP6STAT_INC(icp6s_otime_exceed_transit); return; case ICMP6_TIME_EXCEED_REASSEMBLY: ICMP6STAT_INC(icp6s_otime_exceed_reassembly); return; } break; case ICMP6_PARAM_PROB: switch (code) { case ICMP6_PARAMPROB_HEADER: ICMP6STAT_INC(icp6s_oparamprob_header); return; case ICMP6_PARAMPROB_NEXTHEADER: ICMP6STAT_INC(icp6s_oparamprob_nextheader); return; case ICMP6_PARAMPROB_OPTION: ICMP6STAT_INC(icp6s_oparamprob_option); return; } break; case ND_REDIRECT: ICMP6STAT_INC(icp6s_oredirect); return; } ICMP6STAT_INC(icp6s_ounknown); } /* * A wrapper function for icmp6_error() necessary when the erroneous packet * may not contain enough scope zone information. */ void icmp6_error2(struct mbuf *m, int type, int code, int param, struct ifnet *ifp) { struct ip6_hdr *ip6; if (ifp == NULL) return; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), ); #else if (m->m_len < sizeof(struct ip6_hdr)) { m = m_pullup(m, sizeof(struct ip6_hdr)); if (m == NULL) return; } #endif ip6 = mtod(m, struct ip6_hdr *); if (in6_setscope(&ip6->ip6_src, ifp, NULL) != 0) return; if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0) return; icmp6_error(m, type, code, param); } /* * Generate an error packet of type error in response to bad IP6 packet. */ void icmp6_error(struct mbuf *m, int type, int code, int param) { struct ip6_hdr *oip6, *nip6; struct icmp6_hdr *icmp6; u_int preplen; int off; int nxt; ICMP6STAT_INC(icp6s_error); /* count per-type-code statistics */ icmp6_errcount(type, code); #ifdef M_DECRYPTED /*not openbsd*/ if (m->m_flags & M_DECRYPTED) { ICMP6STAT_INC(icp6s_canterror); goto freeit; } #endif #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), ); #else if (m->m_len < sizeof(struct ip6_hdr)) { m = m_pullup(m, sizeof(struct ip6_hdr)); if (m == NULL) return; } #endif oip6 = mtod(m, struct ip6_hdr *); /* * If the destination address of the erroneous packet is a multicast * address, or the packet was sent using link-layer multicast, * we should basically suppress sending an error (RFC 2463, Section * 2.4). * We have two exceptions (the item e.2 in that section): * - the Packet Too Big message can be sent for path MTU discovery. * - the Parameter Problem Message that can be allowed an icmp6 error * in the option type field. This check has been done in * ip6_unknown_opt(), so we can just check the type and code. */ if ((m->m_flags & (M_BCAST|M_MCAST) || IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) && (type != ICMP6_PACKET_TOO_BIG && (type != ICMP6_PARAM_PROB || code != ICMP6_PARAMPROB_OPTION))) goto freeit; /* * RFC 2463, 2.4 (e.5): source address check. * XXX: the case of anycast source? */ if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) || IN6_IS_ADDR_MULTICAST(&oip6->ip6_src)) goto freeit; /* * If we are about to send ICMPv6 against ICMPv6 error/redirect, * don't do it. */ nxt = -1; off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt); if (off >= 0 && nxt == IPPROTO_ICMPV6) { struct icmp6_hdr *icp; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, off + sizeof(struct icmp6_hdr), ); icp = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off, sizeof(*icp)); if (icp == NULL) { ICMP6STAT_INC(icp6s_tooshort); return; } #endif if (icp->icmp6_type < ICMP6_ECHO_REQUEST || icp->icmp6_type == ND_REDIRECT) { /* * ICMPv6 error * Special case: for redirect (which is * informational) we must not send icmp6 error. */ ICMP6STAT_INC(icp6s_canterror); goto freeit; } else { /* ICMPv6 informational - send the error */ } } else { /* non-ICMPv6 - send the error */ } oip6 = mtod(m, struct ip6_hdr *); /* adjust pointer */ /* Finally, do rate limitation check. */ if (icmp6_ratelimit(&oip6->ip6_src, type, code)) { ICMP6STAT_INC(icp6s_toofreq); goto freeit; } /* * OK, ICMP6 can be generated. */ if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN) m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len); preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); M_PREPEND(m, preplen, M_NOWAIT); /* FIB is also copied over. */ if (m == NULL) { nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__)); return; } nip6 = mtod(m, struct ip6_hdr *); nip6->ip6_src = oip6->ip6_src; nip6->ip6_dst = oip6->ip6_dst; in6_clearscope(&oip6->ip6_src); in6_clearscope(&oip6->ip6_dst); icmp6 = (struct icmp6_hdr *)(nip6 + 1); icmp6->icmp6_type = type; icmp6->icmp6_code = code; icmp6->icmp6_pptr = htonl((u_int32_t)param); /* * icmp6_reflect() is designed to be in the input path. * icmp6_error() can be called from both input and output path, * and if we are in output path rcvif could contain bogus value. * clear m->m_pkthdr.rcvif for safety, we should have enough scope * information in ip header (nip6). */ m->m_pkthdr.rcvif = NULL; ICMP6STAT_INC(icp6s_outhist[type]); icmp6_reflect(m, sizeof(struct ip6_hdr)); /* header order: IPv6 - ICMPv6 */ return; freeit: /* * If we can't tell whether or not we can generate ICMP6, free it. */ m_freem(m); } /* * Process a received ICMP6 message. */ int icmp6_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp, *n; struct ifnet *ifp; struct ip6_hdr *ip6, *nip6; struct icmp6_hdr *icmp6, *nicmp6; int off = *offp; int icmp6len = m->m_pkthdr.len - *offp; int code, sum, noff; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; int ip6len, error; ifp = m->m_pkthdr.rcvif; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr), IPPROTO_DONE); /* m might change if M_LOOP. So, call mtod after this */ #endif /* * Locate icmp6 structure in mbuf, and check * that not corrupted and of at least minimum length */ ip6 = mtod(m, struct ip6_hdr *); ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen); if (icmp6len < sizeof(struct icmp6_hdr)) { ICMP6STAT_INC(icp6s_tooshort); goto freeit; } /* * Check multicast group membership. * Note: SSM filters are not applied for ICMPv6 traffic. */ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { struct in6_multi *inm; inm = in6m_lookup(ifp, &ip6->ip6_dst); if (inm == NULL) { IP6STAT_INC(ip6s_notmember); in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); goto freeit; } } /* * calculate the checksum */ #ifndef PULLDOWN_TEST icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6)); if (icmp6 == NULL) { ICMP6STAT_INC(icp6s_tooshort); return IPPROTO_DONE; } #endif code = icmp6->icmp6_code; if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) { nd6log((LOG_ERR, "ICMP6 checksum error(%d|%x) %s\n", icmp6->icmp6_type, sum, ip6_sprintf(ip6bufs, &ip6->ip6_src))); ICMP6STAT_INC(icp6s_checksum); goto freeit; } ICMP6STAT_INC(icp6s_inhist[icmp6->icmp6_type]); icmp6_ifstat_inc(ifp, ifs6_in_msg); if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK) icmp6_ifstat_inc(ifp, ifs6_in_error); switch (icmp6->icmp6_type) { case ICMP6_DST_UNREACH: icmp6_ifstat_inc(ifp, ifs6_in_dstunreach); switch (code) { case ICMP6_DST_UNREACH_NOROUTE: case ICMP6_DST_UNREACH_ADDR: /* PRC_HOSTDEAD is a DOS */ code = PRC_UNREACH_NET; break; case ICMP6_DST_UNREACH_ADMIN: icmp6_ifstat_inc(ifp, ifs6_in_adminprohib); code = PRC_UNREACH_ADMIN_PROHIB; break; case ICMP6_DST_UNREACH_BEYONDSCOPE: /* I mean "source address was incorrect." */ code = PRC_PARAMPROB; break; case ICMP6_DST_UNREACH_NOPORT: code = PRC_UNREACH_PORT; break; default: goto badcode; } goto deliver; break; case ICMP6_PACKET_TOO_BIG: icmp6_ifstat_inc(ifp, ifs6_in_pkttoobig); /* validation is made in icmp6_mtudisc_update */ code = PRC_MSGSIZE; /* * Updating the path MTU will be done after examining * intermediate extension headers. */ goto deliver; break; case ICMP6_TIME_EXCEEDED: icmp6_ifstat_inc(ifp, ifs6_in_timeexceed); switch (code) { case ICMP6_TIME_EXCEED_TRANSIT: code = PRC_TIMXCEED_INTRANS; break; case ICMP6_TIME_EXCEED_REASSEMBLY: code = PRC_TIMXCEED_REASS; break; default: goto badcode; } goto deliver; break; case ICMP6_PARAM_PROB: icmp6_ifstat_inc(ifp, ifs6_in_paramprob); switch (code) { case ICMP6_PARAMPROB_NEXTHEADER: code = PRC_UNREACH_PROTOCOL; break; case ICMP6_PARAMPROB_HEADER: case ICMP6_PARAMPROB_OPTION: code = PRC_PARAMPROB; break; default: goto badcode; } goto deliver; break; case ICMP6_ECHO_REQUEST: icmp6_ifstat_inc(ifp, ifs6_in_echo); if (code != 0) goto badcode; if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) { /* Give up remote */ break; } if (!M_WRITABLE(n) || n->m_len < off + sizeof(struct icmp6_hdr)) { struct mbuf *n0 = n; int n0len; CTASSERT(sizeof(*nip6) + sizeof(*nicmp6) <= MHLEN); n = m_gethdr(M_NOWAIT, n0->m_type); if (n == NULL) { /* Give up remote */ m_freem(n0); break; } m_move_pkthdr(n, n0); /* FIB copied. */ n0len = n0->m_pkthdr.len; /* save for use below */ /* * Copy IPv6 and ICMPv6 only. */ nip6 = mtod(n, struct ip6_hdr *); bcopy(ip6, nip6, sizeof(struct ip6_hdr)); nicmp6 = (struct icmp6_hdr *)(nip6 + 1); bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr)); noff = sizeof(struct ip6_hdr); /* new mbuf contains only ipv6+icmpv6 headers */ n->m_len = noff + sizeof(struct icmp6_hdr); /* * Adjust mbuf. ip6_plen will be adjusted in * ip6_output(). */ m_adj(n0, off + sizeof(struct icmp6_hdr)); /* recalculate complete packet size */ n->m_pkthdr.len = n0len + (noff - off); n->m_next = n0; } else { nip6 = mtod(n, struct ip6_hdr *); IP6_EXTHDR_GET(nicmp6, struct icmp6_hdr *, n, off, sizeof(*nicmp6)); noff = off; } - nicmp6->icmp6_type = ICMP6_ECHO_REPLY; - nicmp6->icmp6_code = 0; if (n) { + nicmp6->icmp6_type = ICMP6_ECHO_REPLY; + nicmp6->icmp6_code = 0; ICMP6STAT_INC(icp6s_reflect); ICMP6STAT_INC(icp6s_outhist[ICMP6_ECHO_REPLY]); icmp6_reflect(n, noff); } break; case ICMP6_ECHO_REPLY: icmp6_ifstat_inc(ifp, ifs6_in_echoreply); if (code != 0) goto badcode; break; case MLD_LISTENER_QUERY: case MLD_LISTENER_REPORT: case MLD_LISTENER_DONE: case MLDV2_LISTENER_REPORT: /* * Drop MLD traffic which is not link-local, has a hop limit * of greater than 1 hop, or which does not have the * IPv6 HBH Router Alert option. * As IPv6 HBH options are stripped in ip6_input() we must * check an mbuf header flag. * XXX Should we also sanity check that these messages * were directed to a link-local multicast prefix? */ if ((ip6->ip6_hlim != 1) || (m->m_flags & M_RTALERT_MLD) == 0) goto freeit; if (mld_input(m, off, icmp6len) != 0) return (IPPROTO_DONE); /* m stays. */ break; case ICMP6_WRUREQUEST: /* ICMP6_FQDN_QUERY */ { enum { WRU, FQDN } mode; if (!V_icmp6_nodeinfo) break; if (icmp6len == sizeof(struct icmp6_hdr) + 4) mode = WRU; else if (icmp6len >= sizeof(struct icmp6_nodeinfo)) mode = FQDN; else goto badlen; if (mode == FQDN) { #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_nodeinfo), IPPROTO_DONE); #endif n = m_copym(m, 0, M_COPYALL, M_NOWAIT); if (n) n = ni6_input(n, off); /* XXX meaningless if n == NULL */ noff = sizeof(struct ip6_hdr); } else { struct prison *pr; u_char *p; int maxhlen, hlen; /* * XXX: this combination of flags is pointless, * but should we keep this for compatibility? */ if ((V_icmp6_nodeinfo & (ICMP6_NODEINFO_FQDNOK | ICMP6_NODEINFO_TMPADDROK)) != (ICMP6_NODEINFO_FQDNOK | ICMP6_NODEINFO_TMPADDROK)) break; if (code != 0) goto badcode; CTASSERT(sizeof(*nip6) + sizeof(*nicmp6) + 4 <= MHLEN); n = m_gethdr(M_NOWAIT, m->m_type); if (n == NULL) { /* Give up remote */ break; } if (!m_dup_pkthdr(n, m, M_NOWAIT)) { /* * Previous code did a blind M_COPY_PKTHDR * and said "just for rcvif". If true, then * we could tolerate the dup failing (due to * the deep copy of the tag chain). For now * be conservative and just fail. */ m_free(n); n = NULL; + break; } maxhlen = M_TRAILINGSPACE(n) - (sizeof(*nip6) + sizeof(*nicmp6) + 4); pr = curthread->td_ucred->cr_prison; mtx_lock(&pr->pr_mtx); hlen = strlen(pr->pr_hostname); if (maxhlen > hlen) maxhlen = hlen; /* * Copy IPv6 and ICMPv6 only. */ nip6 = mtod(n, struct ip6_hdr *); bcopy(ip6, nip6, sizeof(struct ip6_hdr)); nicmp6 = (struct icmp6_hdr *)(nip6 + 1); bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr)); p = (u_char *)(nicmp6 + 1); bzero(p, 4); /* meaningless TTL */ bcopy(pr->pr_hostname, p + 4, maxhlen); mtx_unlock(&pr->pr_mtx); noff = sizeof(struct ip6_hdr); n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) + 4 + maxhlen; nicmp6->icmp6_type = ICMP6_WRUREPLY; nicmp6->icmp6_code = 0; } if (n) { ICMP6STAT_INC(icp6s_reflect); ICMP6STAT_INC(icp6s_outhist[ICMP6_WRUREPLY]); icmp6_reflect(n, noff); } break; } case ICMP6_WRUREPLY: if (code != 0) goto badcode; break; case ND_ROUTER_SOLICIT: icmp6_ifstat_inc(ifp, ifs6_in_routersolicit); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_router_solicit)) goto badlen; if (send_sendso_input_hook != NULL) { IP6_EXTHDR_CHECK(m, off, icmp6len, IPPROTO_DONE); error = send_sendso_input_hook(m, ifp, SND_IN, ip6len); if (error == 0) { m = NULL; goto freeit; } } n = m_copym(m, 0, M_COPYALL, M_NOWAIT); nd6_rs_input(m, off, icmp6len); m = n; if (m == NULL) goto freeit; break; case ND_ROUTER_ADVERT: icmp6_ifstat_inc(ifp, ifs6_in_routeradvert); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_router_advert)) goto badlen; if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(m, ifp, SND_IN, ip6len); if (error == 0) { m = NULL; goto freeit; } } n = m_copym(m, 0, M_COPYALL, M_NOWAIT); nd6_ra_input(m, off, icmp6len); m = n; if (m == NULL) goto freeit; break; case ND_NEIGHBOR_SOLICIT: icmp6_ifstat_inc(ifp, ifs6_in_neighborsolicit); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_neighbor_solicit)) goto badlen; if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(m, ifp, SND_IN, ip6len); if (error == 0) { m = NULL; goto freeit; } } n = m_copym(m, 0, M_COPYALL, M_NOWAIT); nd6_ns_input(m, off, icmp6len); m = n; if (m == NULL) goto freeit; break; case ND_NEIGHBOR_ADVERT: icmp6_ifstat_inc(ifp, ifs6_in_neighboradvert); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_neighbor_advert)) goto badlen; if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(m, ifp, SND_IN, ip6len); if (error == 0) { m = NULL; goto freeit; } } n = m_copym(m, 0, M_COPYALL, M_NOWAIT); nd6_na_input(m, off, icmp6len); m = n; if (m == NULL) goto freeit; break; case ND_REDIRECT: icmp6_ifstat_inc(ifp, ifs6_in_redirect); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_redirect)) goto badlen; if (send_sendso_input_hook != NULL) { error = send_sendso_input_hook(m, ifp, SND_IN, ip6len); if (error == 0) { m = NULL; goto freeit; } } n = m_copym(m, 0, M_COPYALL, M_NOWAIT); icmp6_redirect_input(m, off); m = n; if (m == NULL) goto freeit; break; case ICMP6_ROUTER_RENUMBERING: if (code != ICMP6_ROUTER_RENUMBERING_COMMAND && code != ICMP6_ROUTER_RENUMBERING_RESULT) goto badcode; if (icmp6len < sizeof(struct icmp6_router_renum)) goto badlen; break; default: nd6log((LOG_DEBUG, "icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%d)\n", icmp6->icmp6_type, ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst), ifp ? ifp->if_index : 0)); if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) { /* ICMPv6 error: MUST deliver it by spec... */ code = PRC_NCMDS; /* deliver */ } else { /* ICMPv6 informational: MUST not deliver */ break; } deliver: if (icmp6_notify_error(&m, off, icmp6len, code) != 0) { /* In this case, m should've been freed. */ return (IPPROTO_DONE); } break; badcode: ICMP6STAT_INC(icp6s_badcode); break; badlen: ICMP6STAT_INC(icp6s_badlen); break; } /* deliver the packet to appropriate sockets */ icmp6_rip6_input(&m, *offp); return IPPROTO_DONE; freeit: m_freem(m); return IPPROTO_DONE; } static int icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code) { struct mbuf *m = *mp; struct icmp6_hdr *icmp6; struct ip6_hdr *eip6; u_int32_t notifymtu; struct sockaddr_in6 icmp6src, icmp6dst; if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) { ICMP6STAT_INC(icp6s_tooshort); goto freeit; } #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr), -1); icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6) + sizeof(struct ip6_hdr)); if (icmp6 == NULL) { ICMP6STAT_INC(icp6s_tooshort); return (-1); } #endif eip6 = (struct ip6_hdr *)(icmp6 + 1); /* Detect the upper level protocol */ { void (*ctlfunc)(int, struct sockaddr *, void *); u_int8_t nxt = eip6->ip6_nxt; int eoff = off + sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr); struct ip6ctlparam ip6cp; struct in6_addr *finaldst = NULL; int icmp6type = icmp6->icmp6_type; struct ip6_frag *fh; struct ip6_rthdr *rth; struct ip6_rthdr0 *rth0; int rthlen; while (1) { /* XXX: should avoid infinite loop explicitly? */ struct ip6_ext *eh; switch (nxt) { case IPPROTO_HOPOPTS: case IPPROTO_DSTOPTS: case IPPROTO_AH: #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(struct ip6_ext), -1); eh = (struct ip6_ext *)(mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(eh, struct ip6_ext *, m, eoff, sizeof(*eh)); if (eh == NULL) { ICMP6STAT_INC(icp6s_tooshort); return (-1); } #endif if (nxt == IPPROTO_AH) eoff += (eh->ip6e_len + 2) << 2; else eoff += (eh->ip6e_len + 1) << 3; nxt = eh->ip6e_nxt; break; case IPPROTO_ROUTING: /* * When the erroneous packet contains a * routing header, we should examine the * header to determine the final destination. * Otherwise, we can't properly update * information that depends on the final * destination (e.g. path MTU). */ #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(*rth), -1); rth = (struct ip6_rthdr *) (mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(rth, struct ip6_rthdr *, m, eoff, sizeof(*rth)); if (rth == NULL) { ICMP6STAT_INC(icp6s_tooshort); return (-1); } #endif rthlen = (rth->ip6r_len + 1) << 3; /* * XXX: currently there is no * officially defined type other * than type-0. * Note that if the segment left field * is 0, all intermediate hops must * have been passed. */ if (rth->ip6r_segleft && rth->ip6r_type == IPV6_RTHDR_TYPE_0) { int hops; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + rthlen, -1); rth0 = (struct ip6_rthdr0 *) (mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(rth0, struct ip6_rthdr0 *, m, eoff, rthlen); if (rth0 == NULL) { ICMP6STAT_INC(icp6s_tooshort); return (-1); } #endif /* just ignore a bogus header */ if ((rth0->ip6r0_len % 2) == 0 && (hops = rth0->ip6r0_len/2)) finaldst = (struct in6_addr *)(rth0 + 1) + (hops - 1); } eoff += rthlen; nxt = rth->ip6r_nxt; break; case IPPROTO_FRAGMENT: #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(struct ip6_frag), -1); fh = (struct ip6_frag *)(mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(fh, struct ip6_frag *, m, eoff, sizeof(*fh)); if (fh == NULL) { ICMP6STAT_INC(icp6s_tooshort); return (-1); } #endif /* * Data after a fragment header is meaningless * unless it is the first fragment, but * we'll go to the notify label for path MTU * discovery. */ if (fh->ip6f_offlg & IP6F_OFF_MASK) goto notify; eoff += sizeof(struct ip6_frag); nxt = fh->ip6f_nxt; break; default: /* * This case includes ESP and the No Next * Header. In such cases going to the notify * label does not have any meaning * (i.e. ctlfunc will be NULL), but we go * anyway since we might have to update * path MTU information. */ goto notify; } } notify: #ifndef PULLDOWN_TEST icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6) + sizeof(struct ip6_hdr)); if (icmp6 == NULL) { ICMP6STAT_INC(icp6s_tooshort); return (-1); } #endif /* * retrieve parameters from the inner IPv6 header, and convert * them into sockaddr structures. * XXX: there is no guarantee that the source or destination * addresses of the inner packet are in the same scope as * the addresses of the icmp packet. But there is no other * way to determine the zone. */ eip6 = (struct ip6_hdr *)(icmp6 + 1); bzero(&icmp6dst, sizeof(icmp6dst)); icmp6dst.sin6_len = sizeof(struct sockaddr_in6); icmp6dst.sin6_family = AF_INET6; if (finaldst == NULL) icmp6dst.sin6_addr = eip6->ip6_dst; else icmp6dst.sin6_addr = *finaldst; if (in6_setscope(&icmp6dst.sin6_addr, m->m_pkthdr.rcvif, NULL)) goto freeit; bzero(&icmp6src, sizeof(icmp6src)); icmp6src.sin6_len = sizeof(struct sockaddr_in6); icmp6src.sin6_family = AF_INET6; icmp6src.sin6_addr = eip6->ip6_src; if (in6_setscope(&icmp6src.sin6_addr, m->m_pkthdr.rcvif, NULL)) goto freeit; icmp6src.sin6_flowinfo = (eip6->ip6_flow & IPV6_FLOWLABEL_MASK); if (finaldst == NULL) finaldst = &eip6->ip6_dst; ip6cp.ip6c_m = m; ip6cp.ip6c_icmp6 = icmp6; ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1); ip6cp.ip6c_off = eoff; ip6cp.ip6c_finaldst = finaldst; ip6cp.ip6c_src = &icmp6src; ip6cp.ip6c_nxt = nxt; if (icmp6type == ICMP6_PACKET_TOO_BIG) { notifymtu = ntohl(icmp6->icmp6_mtu); ip6cp.ip6c_cmdarg = (void *)¬ifymtu; icmp6_mtudisc_update(&ip6cp, 1); /*XXX*/ } ctlfunc = (void (*)(int, struct sockaddr *, void *)) (inet6sw[ip6_protox[nxt]].pr_ctlinput); if (ctlfunc) { (void) (*ctlfunc)(code, (struct sockaddr *)&icmp6dst, &ip6cp); } } *mp = m; return (0); freeit: m_freem(m); return (-1); } void icmp6_mtudisc_update(struct ip6ctlparam *ip6cp, int validated) { struct in6_addr *dst = ip6cp->ip6c_finaldst; struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6; struct mbuf *m = ip6cp->ip6c_m; /* will be necessary for scope issue */ u_int mtu = ntohl(icmp6->icmp6_mtu); struct in_conninfo inc; #if 0 /* * RFC2460 section 5, last paragraph. * even though minimum link MTU for IPv6 is IPV6_MMTU, * we may see ICMPv6 too big with mtu < IPV6_MMTU * due to packet translator in the middle. * see ip6_output() and ip6_getpmtu() "alwaysfrag" case for * special handling. */ if (mtu < IPV6_MMTU) return; #endif /* * we reject ICMPv6 too big with abnormally small value. * XXX what is the good definition of "abnormally small"? */ if (mtu < sizeof(struct ip6_hdr) + sizeof(struct ip6_frag) + 8) return; if (!validated) return; /* * In case the suggested mtu is less than IPV6_MMTU, we * only need to remember that it was for above mentioned * "alwaysfrag" case. * Try to be as close to the spec as possible. */ if (mtu < IPV6_MMTU) mtu = IPV6_MMTU - 8; bzero(&inc, sizeof(inc)); inc.inc_fibnum = M_GETFIB(m); inc.inc_flags |= INC_ISIPV6; inc.inc6_faddr = *dst; if (in6_setscope(&inc.inc6_faddr, m->m_pkthdr.rcvif, NULL)) return; if (mtu < tcp_maxmtu6(&inc, NULL)) { tcp_hc_updatemtu(&inc, mtu); ICMP6STAT_INC(icp6s_pmtuchg); } } /* * Process a Node Information Query packet, based on * draft-ietf-ipngwg-icmp-name-lookups-07. * * Spec incompatibilities: * - IPv6 Subject address handling * - IPv4 Subject address handling support missing * - Proxy reply (answer even if it's not for me) * - joins NI group address at in6_ifattach() time only, does not cope * with hostname changes by sethostname(3) */ static struct mbuf * ni6_input(struct mbuf *m, int off) { struct icmp6_nodeinfo *ni6, *nni6; struct mbuf *n = NULL; struct prison *pr; u_int16_t qtype; int subjlen; int replylen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo); struct ni_reply_fqdn *fqdn; int addrs; /* for NI_QTYPE_NODEADDR */ struct ifnet *ifp = NULL; /* for NI_QTYPE_NODEADDR */ struct in6_addr in6_subj; /* subject address */ struct ip6_hdr *ip6; int oldfqdn = 0; /* if 1, return pascal string (03 draft) */ char *subj = NULL; struct in6_ifaddr *ia6 = NULL; ip6 = mtod(m, struct ip6_hdr *); #ifndef PULLDOWN_TEST ni6 = (struct icmp6_nodeinfo *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(ni6, struct icmp6_nodeinfo *, m, off, sizeof(*ni6)); if (ni6 == NULL) { /* m is already reclaimed */ return (NULL); } #endif /* * Validate IPv6 source address. * The default configuration MUST be to refuse answering queries from * global-scope addresses according to RFC4602. * Notes: * - it's not very clear what "refuse" means; this implementation * simply drops it. * - it's not very easy to identify global-scope (unicast) addresses * since there are many prefixes for them. It should be safer * and in practice sufficient to check "all" but loopback and * link-local (note that site-local unicast was deprecated and * ULA is defined as global scope-wise) */ if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_GLOBALOK) == 0 && !IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) && !IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src)) goto bad; /* * Validate IPv6 destination address. * * The Responder must discard the Query without further processing * unless it is one of the Responder's unicast or anycast addresses, or * a link-local scope multicast address which the Responder has joined. * [RFC4602, Section 5.] */ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { if (!IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst)) goto bad; /* else it's a link-local multicast, fine */ } else { /* unicast or anycast */ ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); if (ia6 == NULL) goto bad; /* XXX impossible */ if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) && !(V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK)) { ifa_free(&ia6->ia_ifa); nd6log((LOG_DEBUG, "ni6_input: ignore node info to " "a temporary address in %s:%d", __FILE__, __LINE__)); goto bad; } ifa_free(&ia6->ia_ifa); } /* validate query Subject field. */ qtype = ntohs(ni6->ni_qtype); subjlen = m->m_pkthdr.len - off - sizeof(struct icmp6_nodeinfo); switch (qtype) { case NI_QTYPE_NOOP: case NI_QTYPE_SUPTYPES: /* 07 draft */ if (ni6->ni_code == ICMP6_NI_SUBJ_FQDN && subjlen == 0) break; /* FALLTHROUGH */ case NI_QTYPE_FQDN: case NI_QTYPE_NODEADDR: case NI_QTYPE_IPV4ADDR: switch (ni6->ni_code) { case ICMP6_NI_SUBJ_IPV6: #if ICMP6_NI_SUBJ_IPV6 != 0 case 0: #endif /* * backward compatibility - try to accept 03 draft * format, where no Subject is present. */ if (qtype == NI_QTYPE_FQDN && ni6->ni_code == 0 && subjlen == 0) { oldfqdn++; break; } #if ICMP6_NI_SUBJ_IPV6 != 0 if (ni6->ni_code != ICMP6_NI_SUBJ_IPV6) goto bad; #endif if (subjlen != sizeof(struct in6_addr)) goto bad; /* * Validate Subject address. * * Not sure what exactly "address belongs to the node" * means in the spec, is it just unicast, or what? * * At this moment we consider Subject address as * "belong to the node" if the Subject address equals * to the IPv6 destination address; validation for * IPv6 destination address should have done enough * check for us. * * We do not do proxy at this moment. */ /* m_pulldown instead of copy? */ m_copydata(m, off + sizeof(struct icmp6_nodeinfo), subjlen, (caddr_t)&in6_subj); if (in6_setscope(&in6_subj, m->m_pkthdr.rcvif, NULL)) goto bad; subj = (char *)&in6_subj; if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &in6_subj)) break; /* * XXX if we are to allow other cases, we should really * be careful about scope here. * basically, we should disallow queries toward IPv6 * destination X with subject Y, * if scope(X) > scope(Y). * if we allow scope(X) > scope(Y), it will result in * information leakage across scope boundary. */ goto bad; case ICMP6_NI_SUBJ_FQDN: /* * Validate Subject name with gethostname(3). * * The behavior may need some debate, since: * - we are not sure if the node has FQDN as * hostname (returned by gethostname(3)). * - the code does wildcard match for truncated names. * however, we are not sure if we want to perform * wildcard match, if gethostname(3) side has * truncated hostname. */ pr = curthread->td_ucred->cr_prison; mtx_lock(&pr->pr_mtx); n = ni6_nametodns(pr->pr_hostname, strlen(pr->pr_hostname), 0); mtx_unlock(&pr->pr_mtx); if (!n || n->m_next || n->m_len == 0) goto bad; IP6_EXTHDR_GET(subj, char *, m, off + sizeof(struct icmp6_nodeinfo), subjlen); if (subj == NULL) goto bad; if (!ni6_dnsmatch(subj, subjlen, mtod(n, const char *), n->m_len)) { goto bad; } m_freem(n); n = NULL; break; case ICMP6_NI_SUBJ_IPV4: /* XXX: to be implemented? */ default: goto bad; } break; } /* refuse based on configuration. XXX ICMP6_NI_REFUSED? */ switch (qtype) { case NI_QTYPE_FQDN: if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_FQDNOK) == 0) goto bad; break; case NI_QTYPE_NODEADDR: case NI_QTYPE_IPV4ADDR: if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_NODEADDROK) == 0) goto bad; break; } /* guess reply length */ switch (qtype) { case NI_QTYPE_NOOP: break; /* no reply data */ case NI_QTYPE_SUPTYPES: replylen += sizeof(u_int32_t); break; case NI_QTYPE_FQDN: /* XXX will append an mbuf */ replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen); break; case NI_QTYPE_NODEADDR: addrs = ni6_addrs(ni6, m, &ifp, (struct in6_addr *)subj); if ((replylen += addrs * (sizeof(struct in6_addr) + sizeof(u_int32_t))) > MCLBYTES) replylen = MCLBYTES; /* XXX: will truncate pkt later */ break; case NI_QTYPE_IPV4ADDR: /* unsupported - should respond with unknown Qtype? */ break; default: /* * XXX: We must return a reply with the ICMP6 code * `unknown Qtype' in this case. However we regard the case * as an FQDN query for backward compatibility. * Older versions set a random value to this field, * so it rarely varies in the defined qtypes. * But the mechanism is not reliable... * maybe we should obsolete older versions. */ qtype = NI_QTYPE_FQDN; /* XXX will append an mbuf */ replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen); oldfqdn++; break; } /* Allocate an mbuf to reply. */ if (replylen > MCLBYTES) { /* * XXX: should we try to allocate more? But MCLBYTES * is probably much larger than IPV6_MMTU... */ goto bad; } if (replylen > MHLEN) n = m_getcl(M_NOWAIT, m->m_type, M_PKTHDR); else n = m_gethdr(M_NOWAIT, m->m_type); if (n == NULL) { m_freem(m); return (NULL); } m_move_pkthdr(n, m); /* just for recvif and FIB */ n->m_pkthdr.len = n->m_len = replylen; /* copy mbuf header and IPv6 + Node Information base headers */ bcopy(mtod(m, caddr_t), mtod(n, caddr_t), sizeof(struct ip6_hdr)); nni6 = (struct icmp6_nodeinfo *)(mtod(n, struct ip6_hdr *) + 1); bcopy((caddr_t)ni6, (caddr_t)nni6, sizeof(struct icmp6_nodeinfo)); /* qtype dependent procedure */ switch (qtype) { case NI_QTYPE_NOOP: nni6->ni_code = ICMP6_NI_SUCCESS; nni6->ni_flags = 0; break; case NI_QTYPE_SUPTYPES: { u_int32_t v; nni6->ni_code = ICMP6_NI_SUCCESS; nni6->ni_flags = htons(0x0000); /* raw bitmap */ /* supports NOOP, SUPTYPES, FQDN, and NODEADDR */ v = (u_int32_t)htonl(0x0000000f); bcopy(&v, nni6 + 1, sizeof(u_int32_t)); break; } case NI_QTYPE_FQDN: nni6->ni_code = ICMP6_NI_SUCCESS; fqdn = (struct ni_reply_fqdn *)(mtod(n, caddr_t) + sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo)); nni6->ni_flags = 0; /* XXX: meaningless TTL */ fqdn->ni_fqdn_ttl = 0; /* ditto. */ /* * XXX do we really have FQDN in hostname? */ pr = curthread->td_ucred->cr_prison; mtx_lock(&pr->pr_mtx); n->m_next = ni6_nametodns(pr->pr_hostname, strlen(pr->pr_hostname), oldfqdn); mtx_unlock(&pr->pr_mtx); if (n->m_next == NULL) goto bad; /* XXX we assume that n->m_next is not a chain */ if (n->m_next->m_next != NULL) goto bad; n->m_pkthdr.len += n->m_next->m_len; break; case NI_QTYPE_NODEADDR: { int lenlim, copied; nni6->ni_code = ICMP6_NI_SUCCESS; n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo); lenlim = M_TRAILINGSPACE(n); copied = ni6_store_addrs(ni6, nni6, ifp, lenlim); /* XXX: reset mbuf length */ n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo) + copied; break; } default: break; /* XXX impossible! */ } nni6->ni_type = ICMP6_NI_REPLY; m_freem(m); return (n); bad: m_freem(m); if (n) m_freem(n); return (NULL); } /* * make a mbuf with DNS-encoded string. no compression support. * * XXX names with less than 2 dots (like "foo" or "foo.section") will be * treated as truncated name (two \0 at the end). this is a wild guess. * * old - return pascal string if non-zero */ static struct mbuf * ni6_nametodns(const char *name, int namelen, int old) { struct mbuf *m; char *cp, *ep; const char *p, *q; int i, len, nterm; if (old) len = namelen + 1; else len = MCLBYTES; /* Because MAXHOSTNAMELEN is usually 256, we use cluster mbuf. */ if (len > MLEN) m = m_getcl(M_NOWAIT, MT_DATA, 0); else m = m_get(M_NOWAIT, MT_DATA); if (m == NULL) goto fail; if (old) { m->m_len = len; *mtod(m, char *) = namelen; bcopy(name, mtod(m, char *) + 1, namelen); return m; } else { m->m_len = 0; cp = mtod(m, char *); ep = mtod(m, char *) + M_TRAILINGSPACE(m); /* if not certain about my name, return empty buffer */ if (namelen == 0) return m; /* * guess if it looks like shortened hostname, or FQDN. * shortened hostname needs two trailing "\0". */ i = 0; for (p = name; p < name + namelen; p++) { if (*p && *p == '.') i++; } if (i < 2) nterm = 2; else nterm = 1; p = name; while (cp < ep && p < name + namelen) { i = 0; for (q = p; q < name + namelen && *q && *q != '.'; q++) i++; /* result does not fit into mbuf */ if (cp + i + 1 >= ep) goto fail; /* * DNS label length restriction, RFC1035 page 8. * "i == 0" case is included here to avoid returning * 0-length label on "foo..bar". */ if (i <= 0 || i >= 64) goto fail; *cp++ = i; bcopy(p, cp, i); cp += i; p = q; if (p < name + namelen && *p == '.') p++; } /* termination */ if (cp + nterm >= ep) goto fail; while (nterm-- > 0) *cp++ = '\0'; m->m_len = cp - mtod(m, char *); return m; } panic("should not reach here"); /* NOTREACHED */ fail: if (m) m_freem(m); return NULL; } /* * check if two DNS-encoded string matches. takes care of truncated * form (with \0\0 at the end). no compression support. * XXX upper/lowercase match (see RFC2065) */ static int ni6_dnsmatch(const char *a, int alen, const char *b, int blen) { const char *a0, *b0; int l; /* simplest case - need validation? */ if (alen == blen && bcmp(a, b, alen) == 0) return 1; a0 = a; b0 = b; /* termination is mandatory */ if (alen < 2 || blen < 2) return 0; if (a0[alen - 1] != '\0' || b0[blen - 1] != '\0') return 0; alen--; blen--; while (a - a0 < alen && b - b0 < blen) { if (a - a0 + 1 > alen || b - b0 + 1 > blen) return 0; if ((signed char)a[0] < 0 || (signed char)b[0] < 0) return 0; /* we don't support compression yet */ if (a[0] >= 64 || b[0] >= 64) return 0; /* truncated case */ if (a[0] == 0 && a - a0 == alen - 1) return 1; if (b[0] == 0 && b - b0 == blen - 1) return 1; if (a[0] == 0 || b[0] == 0) return 0; if (a[0] != b[0]) return 0; l = a[0]; if (a - a0 + 1 + l > alen || b - b0 + 1 + l > blen) return 0; if (bcmp(a + 1, b + 1, l) != 0) return 0; a += 1 + l; b += 1 + l; } if (a - a0 == alen && b - b0 == blen) return 1; else return 0; } /* * calculate the number of addresses to be returned in the node info reply. */ static int ni6_addrs(struct icmp6_nodeinfo *ni6, struct mbuf *m, struct ifnet **ifpp, struct in6_addr *subj) { struct ifnet *ifp; struct in6_ifaddr *ifa6; struct ifaddr *ifa; int addrs = 0, addrsofif, iffound = 0; int niflags = ni6->ni_flags; if ((niflags & NI_NODEADDR_FLAG_ALL) == 0) { switch (ni6->ni_code) { case ICMP6_NI_SUBJ_IPV6: if (subj == NULL) /* must be impossible... */ return (0); break; default: /* * XXX: we only support IPv6 subject address for * this Qtype. */ return (0); } } IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { addrsofif = 0; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ifa6 = (struct in6_ifaddr *)ifa; if ((niflags & NI_NODEADDR_FLAG_ALL) == 0 && IN6_ARE_ADDR_EQUAL(subj, &ifa6->ia_addr.sin6_addr)) iffound = 1; /* * IPv4-mapped addresses can only be returned by a * Node Information proxy, since they represent * addresses of IPv4-only nodes, which perforce do * not implement this protocol. * [icmp-name-lookups-07, Section 5.4] * So we don't support NI_NODEADDR_FLAG_COMPAT in * this function at this moment. */ /* What do we have to do about ::1? */ switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) { case IPV6_ADDR_SCOPE_LINKLOCAL: if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_SITELOCAL: if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_GLOBAL: if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0) continue; break; default: continue; } /* * check if anycast is okay. * XXX: just experimental. not in the spec. */ if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 && (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0) continue; /* we need only unicast addresses */ if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && (V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) { continue; } addrsofif++; /* count the address */ } IF_ADDR_RUNLOCK(ifp); if (iffound) { *ifpp = ifp; IFNET_RUNLOCK_NOSLEEP(); return (addrsofif); } addrs += addrsofif; } IFNET_RUNLOCK_NOSLEEP(); return (addrs); } static int ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6, struct ifnet *ifp0, int resid) { struct ifnet *ifp; struct in6_ifaddr *ifa6; struct ifaddr *ifa; struct ifnet *ifp_dep = NULL; int copied = 0, allow_deprecated = 0; u_char *cp = (u_char *)(nni6 + 1); int niflags = ni6->ni_flags; u_int32_t ltime; if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL)) return (0); /* needless to copy */ IFNET_RLOCK_NOSLEEP(); ifp = ifp0 ? ifp0 : TAILQ_FIRST(&V_ifnet); again: for (; ifp; ifp = TAILQ_NEXT(ifp, if_link)) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ifa6 = (struct in6_ifaddr *)ifa; if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) != 0 && allow_deprecated == 0) { /* * prefererred address should be put before * deprecated addresses. */ /* record the interface for later search */ if (ifp_dep == NULL) ifp_dep = ifp; continue; } else if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) == 0 && allow_deprecated != 0) continue; /* we now collect deprecated addrs */ /* What do we have to do about ::1? */ switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) { case IPV6_ADDR_SCOPE_LINKLOCAL: if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_SITELOCAL: if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_GLOBAL: if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0) continue; break; default: continue; } /* * check if anycast is okay. * XXX: just experimental. not in the spec. */ if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 && (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0) continue; if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && (V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) { continue; } /* now we can copy the address */ if (resid < sizeof(struct in6_addr) + sizeof(u_int32_t)) { IF_ADDR_RUNLOCK(ifp); /* * We give up much more copy. * Set the truncate flag and return. */ nni6->ni_flags |= NI_NODEADDR_FLAG_TRUNCATE; IFNET_RUNLOCK_NOSLEEP(); return (copied); } /* * Set the TTL of the address. * The TTL value should be one of the following * according to the specification: * * 1. The remaining lifetime of a DHCP lease on the * address, or * 2. The remaining Valid Lifetime of a prefix from * which the address was derived through Stateless * Autoconfiguration. * * Note that we currently do not support stateful * address configuration by DHCPv6, so the former * case can't happen. */ if (ifa6->ia6_lifetime.ia6t_expire == 0) ltime = ND6_INFINITE_LIFETIME; else { if (ifa6->ia6_lifetime.ia6t_expire > time_uptime) ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_uptime); else ltime = 0; } bcopy(<ime, cp, sizeof(u_int32_t)); cp += sizeof(u_int32_t); /* copy the address itself */ bcopy(&ifa6->ia_addr.sin6_addr, cp, sizeof(struct in6_addr)); in6_clearscope((struct in6_addr *)cp); /* XXX */ cp += sizeof(struct in6_addr); resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t)); copied += (sizeof(struct in6_addr) + sizeof(u_int32_t)); } IF_ADDR_RUNLOCK(ifp); if (ifp0) /* we need search only on the specified IF */ break; } if (allow_deprecated == 0 && ifp_dep != NULL) { ifp = ifp_dep; allow_deprecated = 1; goto again; } IFNET_RUNLOCK_NOSLEEP(); return (copied); } /* * XXX almost dup'ed code with rip6_input. */ static int icmp6_rip6_input(struct mbuf **mp, int off) { struct mbuf *m = *mp; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct inpcb *in6p; struct inpcb *last = NULL; struct sockaddr_in6 fromsa; struct icmp6_hdr *icmp6; struct mbuf *opts = NULL; #ifndef PULLDOWN_TEST /* this is assumed to be safe. */ icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6)); if (icmp6 == NULL) { /* m is already reclaimed */ return (IPPROTO_DONE); } #endif /* * XXX: the address may have embedded scope zone ID, which should be * hidden from applications. */ bzero(&fromsa, sizeof(fromsa)); fromsa.sin6_family = AF_INET6; fromsa.sin6_len = sizeof(struct sockaddr_in6); fromsa.sin6_addr = ip6->ip6_src; if (sa6_recoverscope(&fromsa)) { m_freem(m); return (IPPROTO_DONE); } INP_INFO_RLOCK(&V_ripcbinfo); LIST_FOREACH(in6p, &V_ripcb, inp_list) { if ((in6p->inp_vflag & INP_IPV6) == 0) continue; if (in6p->inp_ip_p != IPPROTO_ICMPV6) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst)) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src)) continue; INP_RLOCK(in6p); if (ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type, in6p->in6p_icmp6filt)) { INP_RUNLOCK(in6p); continue; } if (last != NULL) { struct mbuf *n = NULL; /* * Recent network drivers tend to allocate a single * mbuf cluster, rather than to make a couple of * mbufs without clusters. Also, since the IPv6 code * path tries to avoid m_pullup(), it is highly * probable that we still have an mbuf cluster here * even though the necessary length can be stored in an * mbuf's internal buffer. * Meanwhile, the default size of the receive socket * buffer for raw sockets is not so large. This means * the possibility of packet loss is relatively higher * than before. To avoid this scenario, we copy the * received data to a separate mbuf that does not use * a cluster, if possible. * XXX: it is better to copy the data after stripping * intermediate headers. */ if ((m->m_flags & M_EXT) && m->m_next == NULL && m->m_len <= MHLEN) { n = m_get(M_NOWAIT, m->m_type); if (n != NULL) { if (m_dup_pkthdr(n, m, M_NOWAIT)) { bcopy(m->m_data, n->m_data, m->m_len); n->m_len = m->m_len; } else { m_free(n); n = NULL; } } } if (n != NULL || (n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) { if (last->inp_flags & INP_CONTROLOPTS) ip6_savecontrol(last, n, &opts); /* strip intermediate headers */ m_adj(n, off); SOCKBUF_LOCK(&last->inp_socket->so_rcv); if (sbappendaddr_locked( &last->inp_socket->so_rcv, (struct sockaddr *)&fromsa, n, opts) == 0) { /* should notify about lost packet */ m_freem(n); if (opts) { m_freem(opts); } SOCKBUF_UNLOCK( &last->inp_socket->so_rcv); } else sorwakeup_locked(last->inp_socket); opts = NULL; } INP_RUNLOCK(last); } last = in6p; } INP_INFO_RUNLOCK(&V_ripcbinfo); if (last != NULL) { if (last->inp_flags & INP_CONTROLOPTS) ip6_savecontrol(last, m, &opts); /* strip intermediate headers */ m_adj(m, off); /* avoid using mbuf clusters if possible (see above) */ if ((m->m_flags & M_EXT) && m->m_next == NULL && m->m_len <= MHLEN) { struct mbuf *n; n = m_get(M_NOWAIT, m->m_type); if (n != NULL) { if (m_dup_pkthdr(n, m, M_NOWAIT)) { bcopy(m->m_data, n->m_data, m->m_len); n->m_len = m->m_len; m_freem(m); m = n; } else { m_freem(n); n = NULL; } } } SOCKBUF_LOCK(&last->inp_socket->so_rcv); if (sbappendaddr_locked(&last->inp_socket->so_rcv, (struct sockaddr *)&fromsa, m, opts) == 0) { m_freem(m); if (opts) m_freem(opts); SOCKBUF_UNLOCK(&last->inp_socket->so_rcv); } else sorwakeup_locked(last->inp_socket); INP_RUNLOCK(last); } else { m_freem(m); IP6STAT_DEC(ip6s_delivered); } return IPPROTO_DONE; } /* * Reflect the ip6 packet back to the source. * OFF points to the icmp6 header, counted from the top of the mbuf. */ void icmp6_reflect(struct mbuf *m, size_t off) { struct in6_addr src6, *srcp; struct ip6_hdr *ip6; struct icmp6_hdr *icmp6; struct in6_ifaddr *ia = NULL; struct ifnet *outif = NULL; int plen; int type, code, hlim; /* too short to reflect */ if (off < sizeof(struct ip6_hdr)) { nd6log((LOG_DEBUG, "sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n", (u_long)off, (u_long)sizeof(struct ip6_hdr), __FILE__, __LINE__)); goto bad; } /* * If there are extra headers between IPv6 and ICMPv6, strip * off that header first. */ #ifdef DIAGNOSTIC if (sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) > MHLEN) panic("assumption failed in icmp6_reflect"); #endif if (off > sizeof(struct ip6_hdr)) { size_t l; struct ip6_hdr nip6; l = off - sizeof(struct ip6_hdr); m_copydata(m, 0, sizeof(nip6), (caddr_t)&nip6); m_adj(m, l); l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); if (m->m_len < l) { if ((m = m_pullup(m, l)) == NULL) return; } bcopy((caddr_t)&nip6, mtod(m, caddr_t), sizeof(nip6)); } else /* off == sizeof(struct ip6_hdr) */ { size_t l; l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); if (m->m_len < l) { if ((m = m_pullup(m, l)) == NULL) return; } } plen = m->m_pkthdr.len - sizeof(struct ip6_hdr); ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_nxt = IPPROTO_ICMPV6; icmp6 = (struct icmp6_hdr *)(ip6 + 1); type = icmp6->icmp6_type; /* keep type for statistics */ code = icmp6->icmp6_code; /* ditto. */ hlim = 0; srcp = NULL; /* * If the incoming packet was addressed directly to us (i.e. unicast), * use dst as the src for the reply. * The IN6_IFF_NOTREADY case should be VERY rare, but is possible * (for example) when we encounter an error while forwarding procedure * destined to a duplicated address of ours. */ if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { ia = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */); if (ia != NULL && !(ia->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) { src6 = ia->ia_addr.sin6_addr; srcp = &src6; if (m->m_pkthdr.rcvif != NULL) { /* XXX: This may not be the outgoing interface */ hlim = ND_IFINFO(m->m_pkthdr.rcvif)->chlim; } else hlim = V_ip6_defhlim; } if (ia != NULL) ifa_free(&ia->ia_ifa); } if (srcp == NULL) { int error; struct in6_addr dst6; uint32_t scopeid; /* * This case matches to multicasts, our anycast, or unicasts * that we do not own. Select a source address based on the * source address of the erroneous packet. */ in6_splitscope(&ip6->ip6_src, &dst6, &scopeid); error = in6_selectsrc_addr(M_GETFIB(m), &dst6, scopeid, NULL, &src6, &hlim); if (error) { char ip6buf[INET6_ADDRSTRLEN]; nd6log((LOG_DEBUG, "icmp6_reflect: source can't be determined: " "dst=%s, error=%d\n", ip6_sprintf(ip6buf, &ip6->ip6_dst), error)); goto bad; } srcp = &src6; } /* * ip6_input() drops a packet if its src is multicast. * So, the src is never multicast. */ ip6->ip6_dst = ip6->ip6_src; ip6->ip6_src = *srcp; ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; ip6->ip6_nxt = IPPROTO_ICMPV6; ip6->ip6_hlim = hlim; icmp6->icmp6_cksum = 0; icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(struct ip6_hdr), plen); /* * XXX option handling */ m->m_flags &= ~(M_BCAST|M_MCAST); ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL); if (outif) icmp6_ifoutstat_inc(outif, type, code); return; bad: m_freem(m); return; } void icmp6_fasttimo(void) { mld_fasttimo(); } void icmp6_slowtimo(void) { mld_slowtimo(); } static const char * icmp6_redirect_diag(struct in6_addr *src6, struct in6_addr *dst6, struct in6_addr *tgt6) { static char buf[1024]; char ip6bufs[INET6_ADDRSTRLEN]; char ip6bufd[INET6_ADDRSTRLEN]; char ip6buft[INET6_ADDRSTRLEN]; snprintf(buf, sizeof(buf), "(src=%s dst=%s tgt=%s)", ip6_sprintf(ip6bufs, src6), ip6_sprintf(ip6bufd, dst6), ip6_sprintf(ip6buft, tgt6)); return buf; } void icmp6_redirect_input(struct mbuf *m, int off) { struct ifnet *ifp; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct nd_redirect *nd_rd; int icmp6len = ntohs(ip6->ip6_plen); char *lladdr = NULL; int lladdrlen = 0; int is_router; int is_onlink; struct in6_addr src6 = ip6->ip6_src; struct in6_addr redtgt6; struct in6_addr reddst6; union nd_opts ndopts; char ip6buf[INET6_ADDRSTRLEN]; M_ASSERTPKTHDR(m); KASSERT(m->m_pkthdr.rcvif != NULL, ("%s: no rcvif", __func__)); ifp = m->m_pkthdr.rcvif; /* XXX if we are router, we don't update route by icmp6 redirect */ if (V_ip6_forwarding) goto freeit; if (!V_icmp6_rediraccept) goto freeit; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len,); nd_rd = (struct nd_redirect *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len); if (nd_rd == NULL) { ICMP6STAT_INC(icp6s_tooshort); return; } #endif redtgt6 = nd_rd->nd_rd_target; reddst6 = nd_rd->nd_rd_dst; if (in6_setscope(&redtgt6, m->m_pkthdr.rcvif, NULL) || in6_setscope(&reddst6, m->m_pkthdr.rcvif, NULL)) { goto freeit; } /* validation */ if (!IN6_IS_ADDR_LINKLOCAL(&src6)) { nd6log((LOG_ERR, "ICMP6 redirect sent from %s rejected; " "must be from linklocal\n", ip6_sprintf(ip6buf, &src6))); goto bad; } if (ip6->ip6_hlim != 255) { nd6log((LOG_ERR, "ICMP6 redirect sent from %s rejected; " "hlim=%d (must be 255)\n", ip6_sprintf(ip6buf, &src6), ip6->ip6_hlim)); goto bad; } { /* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */ struct nhop6_basic nh6; struct in6_addr kdst; uint32_t scopeid; in6_splitscope(&reddst6, &kdst, &scopeid); if (fib6_lookup_nh_basic(ifp->if_fib, &kdst, scopeid, 0, 0,&nh6)==0){ if ((nh6.nh_flags & NHF_GATEWAY) == 0) { nd6log((LOG_ERR, "ICMP6 redirect rejected; no route " "with inet6 gateway found for redirect dst: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } if (IN6_ARE_ADDR_EQUAL(&src6, &nh6.nh_addr) == 0) { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "not equal to gw-for-src=%s (must be same): " "%s\n", ip6_sprintf(ip6buf, &nh6.nh_addr), icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } } else { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "no route found for redirect dst: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } } if (IN6_IS_ADDR_MULTICAST(&reddst6)) { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "redirect dst must be unicast: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } is_router = is_onlink = 0; if (IN6_IS_ADDR_LINKLOCAL(&redtgt6)) is_router = 1; /* router case */ if (bcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0) is_onlink = 1; /* on-link destination case */ if (!is_router && !is_onlink) { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "neither router case nor onlink case: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } icmp6len -= sizeof(*nd_rd); nd6_option_init(nd_rd + 1, icmp6len, &ndopts); if (nd6_options(&ndopts) < 0) { nd6log((LOG_INFO, "%s: invalid ND option, rejected: %s\n", __func__, icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); /* nd6_options have incremented stats */ goto freeit; } if (ndopts.nd_opts_tgt_lladdr) { lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1); lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3; } if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { nd6log((LOG_INFO, "%s: lladdrlen mismatch for %s " "(if %d, icmp6 packet %d): %s\n", __func__, ip6_sprintf(ip6buf, &redtgt6), ifp->if_addrlen, lladdrlen - 2, icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } /* Validation passed. */ /* RFC 2461 8.3 */ nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT, is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER); /* * Install a gateway route in the better-router case or an interface * route in the on-link-destination case. */ { struct sockaddr_in6 sdst; struct sockaddr_in6 sgw; struct sockaddr_in6 ssrc; struct sockaddr *gw; int rt_flags; u_int fibnum; bzero(&sdst, sizeof(sdst)); bzero(&ssrc, sizeof(ssrc)); sdst.sin6_family = ssrc.sin6_family = AF_INET6; sdst.sin6_len = ssrc.sin6_len = sizeof(struct sockaddr_in6); bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr)); bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr)); rt_flags = RTF_HOST; if (is_router) { bzero(&sgw, sizeof(sgw)); sgw.sin6_family = AF_INET6; sgw.sin6_len = sizeof(struct sockaddr_in6); bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr)); gw = (struct sockaddr *)&sgw; rt_flags |= RTF_GATEWAY; } else gw = ifp->if_addr->ifa_addr; for (fibnum = 0; fibnum < rt_numfibs; fibnum++) in6_rtredirect((struct sockaddr *)&sdst, gw, (struct sockaddr *)NULL, rt_flags, (struct sockaddr *)&ssrc, fibnum); } /* finally update cached route in each socket via pfctlinput */ { struct sockaddr_in6 sdst; bzero(&sdst, sizeof(sdst)); sdst.sin6_family = AF_INET6; sdst.sin6_len = sizeof(struct sockaddr_in6); bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr)); pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst); } freeit: m_freem(m); return; bad: ICMP6STAT_INC(icp6s_badredirect); m_freem(m); } void icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt) { struct ifnet *ifp; /* my outgoing interface */ struct in6_addr *ifp_ll6; struct in6_addr *router_ll6; struct ip6_hdr *sip6; /* m0 as struct ip6_hdr */ struct mbuf *m = NULL; /* newly allocated one */ struct m_tag *mtag; struct ip6_hdr *ip6; /* m as struct ip6_hdr */ struct nd_redirect *nd_rd; struct llentry *ln = NULL; size_t maxlen; u_char *p; struct ifnet *outif = NULL; struct sockaddr_in6 src_sa; icmp6_errcount(ND_REDIRECT, 0); /* if we are not router, we don't send icmp6 redirect */ if (!V_ip6_forwarding) goto fail; /* sanity check */ if (!m0 || !rt || !(rt->rt_flags & RTF_UP) || !(ifp = rt->rt_ifp)) goto fail; /* * Address check: * the source address must identify a neighbor, and * the destination address must not be a multicast address * [RFC 2461, sec 8.2] */ sip6 = mtod(m0, struct ip6_hdr *); bzero(&src_sa, sizeof(src_sa)); src_sa.sin6_family = AF_INET6; src_sa.sin6_len = sizeof(src_sa); src_sa.sin6_addr = sip6->ip6_src; if (nd6_is_addr_neighbor(&src_sa, ifp) == 0) goto fail; if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst)) goto fail; /* what should we do here? */ /* rate limit */ if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0)) goto fail; /* * Since we are going to append up to 1280 bytes (= IPV6_MMTU), * we almost always ask for an mbuf cluster for simplicity. * (MHLEN < IPV6_MMTU is almost always true) */ #if IPV6_MMTU >= MCLBYTES # error assumption failed about IPV6_MMTU and MCLBYTES #endif m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) goto fail; M_SETFIB(m, rt->rt_fibnum); maxlen = M_TRAILINGSPACE(m); maxlen = min(IPV6_MMTU, maxlen); /* just for safety */ if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) + ((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) { goto fail; } { /* get ip6 linklocal address for ifp(my outgoing interface). */ struct in6_ifaddr *ia; if ((ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY| IN6_IFF_ANYCAST)) == NULL) goto fail; ifp_ll6 = &ia->ia_addr.sin6_addr; /* XXXRW: reference released prematurely. */ ifa_free(&ia->ia_ifa); } /* get ip6 linklocal address for the router. */ if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)rt->rt_gateway; router_ll6 = &sin6->sin6_addr; if (!IN6_IS_ADDR_LINKLOCAL(router_ll6)) router_ll6 = (struct in6_addr *)NULL; } else router_ll6 = (struct in6_addr *)NULL; /* ip6 */ ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; /* ip6->ip6_plen will be set later */ ip6->ip6_nxt = IPPROTO_ICMPV6; ip6->ip6_hlim = 255; /* ip6->ip6_src must be linklocal addr for my outgoing if. */ bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr)); bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr)); /* ND Redirect */ nd_rd = (struct nd_redirect *)(ip6 + 1); nd_rd->nd_rd_type = ND_REDIRECT; nd_rd->nd_rd_code = 0; nd_rd->nd_rd_reserved = 0; if (rt->rt_flags & RTF_GATEWAY) { /* * nd_rd->nd_rd_target must be a link-local address in * better router cases. */ if (!router_ll6) goto fail; bcopy(router_ll6, &nd_rd->nd_rd_target, sizeof(nd_rd->nd_rd_target)); bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst, sizeof(nd_rd->nd_rd_dst)); } else { /* make sure redtgt == reddst */ bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target, sizeof(nd_rd->nd_rd_target)); bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst, sizeof(nd_rd->nd_rd_dst)); } p = (u_char *)(nd_rd + 1); if (!router_ll6) goto nolladdropt; { /* target lladdr option */ int len; struct nd_opt_hdr *nd_opt; char *lladdr; IF_AFDATA_RLOCK(ifp); ln = nd6_lookup(router_ll6, 0, ifp); IF_AFDATA_RUNLOCK(ifp); if (ln == NULL) goto nolladdropt; len = sizeof(*nd_opt) + ifp->if_addrlen; len = (len + 7) & ~7; /* round by 8 */ /* safety check */ if (len + (p - (u_char *)ip6) > maxlen) goto nolladdropt; if (ln->la_flags & LLE_VALID) { nd_opt = (struct nd_opt_hdr *)p; nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; nd_opt->nd_opt_len = len >> 3; lladdr = (char *)(nd_opt + 1); bcopy(ln->ll_addr, lladdr, ifp->if_addrlen); p += len; } } nolladdropt: if (ln != NULL) LLE_RUNLOCK(ln); m->m_pkthdr.len = m->m_len = p - (u_char *)ip6; /* just to be safe */ #ifdef M_DECRYPTED /*not openbsd*/ if (m0->m_flags & M_DECRYPTED) goto noredhdropt; #endif if (p - (u_char *)ip6 > maxlen) goto noredhdropt; { /* redirected header option */ int len; struct nd_opt_rd_hdr *nd_opt_rh; /* * compute the maximum size for icmp6 redirect header option. * XXX room for auth header? */ len = maxlen - (p - (u_char *)ip6); len &= ~7; /* This is just for simplicity. */ if (m0->m_pkthdr.len != m0->m_len) { if (m0->m_next) { m_freem(m0->m_next); m0->m_next = NULL; } m0->m_pkthdr.len = m0->m_len; } /* * Redirected header option spec (RFC2461 4.6.3) talks nothing * about padding/truncate rule for the original IP packet. * From the discussion on IPv6imp in Feb 1999, * the consensus was: * - "attach as much as possible" is the goal * - pad if not aligned (original size can be guessed by * original ip6 header) * Following code adds the padding if it is simple enough, * and truncates if not. */ if (m0->m_next || m0->m_pkthdr.len != m0->m_len) panic("assumption failed in %s:%d", __FILE__, __LINE__); if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) { /* not enough room, truncate */ m0->m_pkthdr.len = m0->m_len = len - sizeof(*nd_opt_rh); } else { /* enough room, pad or truncate */ size_t extra; extra = m0->m_pkthdr.len % 8; if (extra) { /* pad if easy enough, truncate if not */ if (8 - extra <= M_TRAILINGSPACE(m0)) { /* pad */ m0->m_len += (8 - extra); m0->m_pkthdr.len += (8 - extra); } else { /* truncate */ m0->m_pkthdr.len -= extra; m0->m_len -= extra; } } len = m0->m_pkthdr.len + sizeof(*nd_opt_rh); m0->m_pkthdr.len = m0->m_len = len - sizeof(*nd_opt_rh); } nd_opt_rh = (struct nd_opt_rd_hdr *)p; bzero(nd_opt_rh, sizeof(*nd_opt_rh)); nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER; nd_opt_rh->nd_opt_rh_len = len >> 3; p += sizeof(*nd_opt_rh); m->m_pkthdr.len = m->m_len = p - (u_char *)ip6; /* connect m0 to m */ m_tag_delete_chain(m0, NULL); m0->m_flags &= ~M_PKTHDR; m->m_next = m0; m->m_pkthdr.len = m->m_len + m0->m_len; m0 = NULL; } noredhdropt:; if (m0) { m_freem(m0); m0 = NULL; } /* XXX: clear embedded link IDs in the inner header */ in6_clearscope(&sip6->ip6_src); in6_clearscope(&sip6->ip6_dst); in6_clearscope(&nd_rd->nd_rd_target); in6_clearscope(&nd_rd->nd_rd_dst); ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); nd_rd->nd_rd_cksum = 0; nd_rd->nd_rd_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), ntohs(ip6->ip6_plen)); if (send_sendso_input_hook != NULL) { mtag = m_tag_get(PACKET_TAG_ND_OUTGOING, sizeof(unsigned short), M_NOWAIT); if (mtag == NULL) goto fail; *(unsigned short *)(mtag + 1) = nd_rd->nd_rd_type; m_tag_prepend(m, mtag); } /* send the packet to outside... */ ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL); if (outif) { icmp6_ifstat_inc(outif, ifs6_out_msg); icmp6_ifstat_inc(outif, ifs6_out_redirect); } ICMP6STAT_INC(icp6s_outhist[ND_REDIRECT]); return; fail: if (m) m_freem(m); if (m0) m_freem(m0); } /* * ICMPv6 socket option processing. */ int icmp6_ctloutput(struct socket *so, struct sockopt *sopt) { int error = 0; int optlen; struct inpcb *inp = sotoinpcb(so); int level, op, optname; if (sopt) { level = sopt->sopt_level; op = sopt->sopt_dir; optname = sopt->sopt_name; optlen = sopt->sopt_valsize; } else level = op = optname = optlen = 0; if (level != IPPROTO_ICMPV6) { return EINVAL; } switch (op) { case PRCO_SETOPT: switch (optname) { case ICMP6_FILTER: { struct icmp6_filter ic6f; if (optlen != sizeof(ic6f)) { error = EMSGSIZE; break; } error = sooptcopyin(sopt, &ic6f, optlen, optlen); if (error == 0) { INP_WLOCK(inp); *inp->in6p_icmp6filt = ic6f; INP_WUNLOCK(inp); } break; } default: error = ENOPROTOOPT; break; } break; case PRCO_GETOPT: switch (optname) { case ICMP6_FILTER: { struct icmp6_filter ic6f; INP_RLOCK(inp); ic6f = *inp->in6p_icmp6filt; INP_RUNLOCK(inp); error = sooptcopyout(sopt, &ic6f, sizeof(ic6f)); break; } default: error = ENOPROTOOPT; break; } break; } return (error); } /* * Perform rate limit check. * Returns 0 if it is okay to send the icmp6 packet. * Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate * limitation. * * XXX per-destination/type check necessary? * * dst - not used at this moment * type - not used at this moment * code - not used at this moment */ static int icmp6_ratelimit(const struct in6_addr *dst, const int type, const int code) { int ret; ret = 0; /* okay to send */ /* PPS limit */ if (!ppsratecheck(&V_icmp6errppslim_last, &V_icmp6errpps_count, V_icmp6errppslim)) { /* The packet is subject to rate limit */ ret++; } return ret; } Index: projects/clang500-import/sys/netinet6/ip6_output.c =================================================================== --- projects/clang500-import/sys/netinet6/ip6_output.c (revision 319250) +++ projects/clang500-import/sys/netinet6/ip6_output.c (revision 319251) @@ -1,3108 +1,3108 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $ */ /*- * Copyright (c) 1982, 1986, 1988, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ratelimit.h" #include "opt_ipsec.h" #include "opt_sctp.h" #include "opt_route.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SCTP #include #include #endif #include #include #ifdef FLOWTABLE #include #endif extern int in6_mcast_loop; struct ip6_exthdrs { struct mbuf *ip6e_ip6; struct mbuf *ip6e_hbh; struct mbuf *ip6e_dest1; struct mbuf *ip6e_rthdr; struct mbuf *ip6e_dest2; }; static MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options"); static int ip6_pcbopt(int, u_char *, int, struct ip6_pktopts **, struct ucred *, int); static int ip6_pcbopts(struct ip6_pktopts **, struct mbuf *, struct socket *, struct sockopt *); static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *); static int ip6_setpktopt(int, u_char *, int, struct ip6_pktopts *, struct ucred *, int, int, int); static int ip6_copyexthdr(struct mbuf **, caddr_t, int); static int ip6_insertfraghdr(struct mbuf *, struct mbuf *, int, struct ip6_frag **); static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t); static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *); static int ip6_getpmtu(struct route_in6 *, int, struct ifnet *, const struct in6_addr *, u_long *, int *, u_int, u_int); static int ip6_calcmtu(struct ifnet *, const struct in6_addr *, u_long, u_long *, int *, u_int); static int ip6_getpmtu_ctl(u_int, const struct in6_addr *, u_long *); static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int); /* * Make an extension header from option data. hp is the source, and * mp is the destination. */ #define MAKE_EXTHDR(hp, mp) \ do { \ if (hp) { \ struct ip6_ext *eh = (struct ip6_ext *)(hp); \ error = ip6_copyexthdr((mp), (caddr_t)(hp), \ ((eh)->ip6e_len + 1) << 3); \ if (error) \ goto freehdrs; \ } \ } while (/*CONSTCOND*/ 0) /* * Form a chain of extension headers. * m is the extension header mbuf * mp is the previous mbuf in the chain * p is the next header * i is the type of option. */ #define MAKE_CHAIN(m, mp, p, i)\ do {\ if (m) {\ if (!hdrsplit) \ panic("assumption failed: hdr not split"); \ *mtod((m), u_char *) = *(p);\ *(p) = (i);\ p = mtod((m), u_char *);\ (m)->m_next = (mp)->m_next;\ (mp)->m_next = (m);\ (mp) = (m);\ }\ } while (/*CONSTCOND*/ 0) void in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset) { u_short csum; csum = in_cksum_skip(m, offset + plen, offset); if (m->m_pkthdr.csum_flags & CSUM_UDP_IPV6 && csum == 0) csum = 0xffff; offset += m->m_pkthdr.csum_data; /* checksum offset */ if (offset + sizeof(u_short) > m->m_len) { printf("%s: delayed m_pullup, m->len: %d plen %u off %u " "csum_flags=%b\n", __func__, m->m_len, plen, offset, (int)m->m_pkthdr.csum_flags, CSUM_BITS); /* * XXX this should not happen, but if it does, the correct * behavior may be to insert the checksum in the appropriate * next mbuf in the chain. */ return; } *(u_short *)(m->m_data + offset) = csum; } int ip6_fragment(struct ifnet *ifp, struct mbuf *m0, int hlen, u_char nextproto, int fraglen , uint32_t id) { struct mbuf *m, **mnext, *m_frgpart; struct ip6_hdr *ip6, *mhip6; struct ip6_frag *ip6f; int off; int error; int tlen = m0->m_pkthdr.len; KASSERT((fraglen % 8 == 0), ("Fragment length must be a multiple of 8")); m = m0; ip6 = mtod(m, struct ip6_hdr *); mnext = &m->m_nextpkt; for (off = hlen; off < tlen; off += fraglen) { m = m_gethdr(M_NOWAIT, MT_DATA); if (!m) { IP6STAT_INC(ip6s_odropped); return (ENOBUFS); } m->m_flags = m0->m_flags & M_COPYFLAGS; *mnext = m; mnext = &m->m_nextpkt; m->m_data += max_linkhdr; mhip6 = mtod(m, struct ip6_hdr *); *mhip6 = *ip6; m->m_len = sizeof(*mhip6); error = ip6_insertfraghdr(m0, m, hlen, &ip6f); if (error) { IP6STAT_INC(ip6s_odropped); return (error); } ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7)); if (off + fraglen >= tlen) fraglen = tlen - off; else ip6f->ip6f_offlg |= IP6F_MORE_FRAG; mhip6->ip6_plen = htons((u_short)(fraglen + hlen + sizeof(*ip6f) - sizeof(struct ip6_hdr))); if ((m_frgpart = m_copym(m0, off, fraglen, M_NOWAIT)) == NULL) { IP6STAT_INC(ip6s_odropped); return (ENOBUFS); } m_cat(m, m_frgpart); m->m_pkthdr.len = fraglen + hlen + sizeof(*ip6f); m->m_pkthdr.fibnum = m0->m_pkthdr.fibnum; m->m_pkthdr.rcvif = NULL; ip6f->ip6f_reserved = 0; ip6f->ip6f_ident = id; ip6f->ip6f_nxt = nextproto; IP6STAT_INC(ip6s_ofragments); in6_ifstat_inc(ifp, ifs6_out_fragcreat); } return (0); } /* * IP6 output. The packet in mbuf chain m contains a skeletal IP6 * header (with pri, len, nxt, hlim, src, dst). * This function may modify ver and hlim only. * The mbuf chain containing the packet will be freed. * The mbuf opt, if present, will not be freed. * If route_in6 ro is present and has ro_rt initialized, route lookup would be * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL, * then result of route lookup is stored in ro->ro_rt. * * type of "mtu": rt_mtu is u_long, ifnet.ifr_mtu is int, and * nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one, * which is rt_mtu. * * ifpp - XXX: just for statistics */ /* * XXX TODO: no flowid is assigned for outbound flows? */ int ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct route_in6 *ro, int flags, struct ip6_moptions *im6o, struct ifnet **ifpp, struct inpcb *inp) { struct ip6_hdr *ip6; struct ifnet *ifp, *origifp; struct mbuf *m = m0; struct mbuf *mprev = NULL; int hlen, tlen, len; struct route_in6 ip6route; struct rtentry *rt = NULL; struct sockaddr_in6 *dst, src_sa, dst_sa; struct in6_addr odst; int error = 0; struct in6_ifaddr *ia = NULL; u_long mtu; int alwaysfrag, dontfrag; u_int32_t optlen = 0, plen = 0, unfragpartlen = 0; struct ip6_exthdrs exthdrs; struct in6_addr src0, dst0; u_int32_t zone; struct route_in6 *ro_pmtu = NULL; int hdrsplit = 0; int sw_csum, tso; int needfiblookup; uint32_t fibnum; struct m_tag *fwd_tag = NULL; uint32_t id; if (inp != NULL) { INP_LOCK_ASSERT(inp); M_SETFIB(m, inp->inp_inc.inc_fibnum); if ((flags & IP_NODEFAULTFLOWID) == 0) { /* unconditionally set flowid */ m->m_pkthdr.flowid = inp->inp_flowid; M_HASHTYPE_SET(m, inp->inp_flowtype); } } #if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * IPSec checking which handles several cases. * FAST IPSEC: We re-injected the packet. * XXX: need scope argument. */ if (IPSEC_ENABLED(ipv6)) { if ((error = IPSEC_OUTPUT(ipv6, m, inp)) != 0) { if (error == EINPROGRESS) error = 0; goto done; } } #endif /* IPSEC */ bzero(&exthdrs, sizeof(exthdrs)); if (opt) { /* Hop-by-Hop options header */ MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh); /* Destination options header(1st part) */ if (opt->ip6po_rthdr) { /* * Destination options header(1st part) * This only makes sense with a routing header. * See Section 9.2 of RFC 3542. * Disabling this part just for MIP6 convenience is * a bad idea. We need to think carefully about a * way to make the advanced API coexist with MIP6 * options, which might automatically be inserted in * the kernel. */ MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1); } /* Routing header */ MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr); /* Destination options header(2nd part) */ MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2); } /* * Calculate the total length of the extension header chain. * Keep the length of the unfragmentable part for fragmentation. */ optlen = 0; if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len; if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len; if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len; unfragpartlen = optlen + sizeof(struct ip6_hdr); /* NOTE: we don't add AH/ESP length here (done in ip6_ipsec_output) */ if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len; /* * If there is at least one extension header, * separate IP6 header from the payload. */ if (optlen && !hdrsplit) { if ((error = ip6_splithdr(m, &exthdrs)) != 0) { m = NULL; goto freehdrs; } m = exthdrs.ip6e_ip6; hdrsplit++; } ip6 = mtod(m, struct ip6_hdr *); /* adjust mbuf packet header length */ m->m_pkthdr.len += optlen; plen = m->m_pkthdr.len - sizeof(*ip6); /* If this is a jumbo payload, insert a jumbo payload option. */ if (plen > IPV6_MAXPACKET) { if (!hdrsplit) { if ((error = ip6_splithdr(m, &exthdrs)) != 0) { m = NULL; goto freehdrs; } m = exthdrs.ip6e_ip6; hdrsplit++; } /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0) goto freehdrs; ip6->ip6_plen = 0; } else ip6->ip6_plen = htons(plen); /* * Concatenate headers and fill in next header fields. * Here we have, on "m" * IPv6 payload * and we insert headers accordingly. Finally, we should be getting: * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload] * * during the header composing process, "m" points to IPv6 header. * "mprev" points to an extension header prior to esp. */ u_char *nexthdrp = &ip6->ip6_nxt; mprev = m; /* * we treat dest2 specially. this makes IPsec processing * much easier. the goal here is to make mprev point the * mbuf prior to dest2. * * result: IPv6 dest2 payload * m and mprev will point to IPv6 header. */ if (exthdrs.ip6e_dest2) { if (!hdrsplit) panic("assumption failed: hdr not split"); exthdrs.ip6e_dest2->m_next = m->m_next; m->m_next = exthdrs.ip6e_dest2; *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt; ip6->ip6_nxt = IPPROTO_DSTOPTS; } /* * result: IPv6 hbh dest1 rthdr dest2 payload * m will point to IPv6 header. mprev will point to the * extension header prior to dest2 (rthdr in the above case). */ MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS); MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp, IPPROTO_DSTOPTS); MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, IPPROTO_ROUTING); /* * If there is a routing header, discard the packet. */ if (exthdrs.ip6e_rthdr) { error = EINVAL; goto bad; } /* Source address validation */ if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) && (flags & IPV6_UNSPECSRC) == 0) { error = EOPNOTSUPP; IP6STAT_INC(ip6s_badscope); goto bad; } if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) { error = EOPNOTSUPP; IP6STAT_INC(ip6s_badscope); goto bad; } IP6STAT_INC(ip6s_localout); /* * Route packet. */ if (ro == NULL) { ro = &ip6route; bzero((caddr_t)ro, sizeof(*ro)); } ro_pmtu = ro; if (opt && opt->ip6po_rthdr) ro = &opt->ip6po_route; dst = (struct sockaddr_in6 *)&ro->ro_dst; #ifdef FLOWTABLE if (ro->ro_rt == NULL) (void )flowtable_lookup(AF_INET6, m, (struct route *)ro); #endif fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m); again: /* * if specified, try to fill in the traffic class field. * do not override if a non-zero value is already set. * we check the diffserv field and the ecn field separately. */ if (opt && opt->ip6po_tclass >= 0) { int mask = 0; if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0) mask |= 0xfc; if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0) mask |= 0x03; if (mask != 0) ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20); } /* fill in or override the hop limit field, if necessary. */ if (opt && opt->ip6po_hlim != -1) ip6->ip6_hlim = opt->ip6po_hlim & 0xff; else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { if (im6o != NULL) ip6->ip6_hlim = im6o->im6o_multicast_hlim; else ip6->ip6_hlim = V_ip6_defmcasthlim; } /* * Validate route against routing table additions; * a better/more specific route might have been added. * Make sure address family is set in route. */ if (inp) { ro->ro_dst.sin6_family = AF_INET6; RT_VALIDATE((struct route *)ro, &inp->inp_rt_cookie, fibnum); } if (ro->ro_rt && fwd_tag == NULL && (ro->ro_rt->rt_flags & RTF_UP) && ro->ro_dst.sin6_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, &ip6->ip6_dst)) { rt = ro->ro_rt; ifp = ro->ro_rt->rt_ifp; } else { if (ro->ro_lle) LLE_FREE(ro->ro_lle); /* zeros ro_lle */ ro->ro_lle = NULL; if (fwd_tag == NULL) { bzero(&dst_sa, sizeof(dst_sa)); dst_sa.sin6_family = AF_INET6; dst_sa.sin6_len = sizeof(dst_sa); dst_sa.sin6_addr = ip6->ip6_dst; } error = in6_selectroute_fib(&dst_sa, opt, im6o, ro, &ifp, &rt, fibnum); if (error != 0) { if (ifp != NULL) in6_ifstat_inc(ifp, ifs6_out_discard); goto bad; } } if (rt == NULL) { /* * If in6_selectroute() does not return a route entry, * dst may not have been updated. */ *dst = dst_sa; /* XXX */ } /* * then rt (for unicast) and ifp must be non-NULL valid values. */ if ((flags & IPV6_FORWARDING) == 0) { /* XXX: the FORWARDING flag can be set for mrouting. */ in6_ifstat_inc(ifp, ifs6_out_request); } if (rt != NULL) { ia = (struct in6_ifaddr *)(rt->rt_ifa); counter_u64_add(rt->rt_pksent, 1); } /* * The outgoing interface must be in the zone of source and * destination addresses. */ origifp = ifp; src0 = ip6->ip6_src; if (in6_setscope(&src0, origifp, &zone)) goto badscope; bzero(&src_sa, sizeof(src_sa)); src_sa.sin6_family = AF_INET6; src_sa.sin6_len = sizeof(src_sa); src_sa.sin6_addr = ip6->ip6_src; if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id) goto badscope; dst0 = ip6->ip6_dst; if (in6_setscope(&dst0, origifp, &zone)) goto badscope; /* re-initialize to be sure */ bzero(&dst_sa, sizeof(dst_sa)); dst_sa.sin6_family = AF_INET6; dst_sa.sin6_len = sizeof(dst_sa); dst_sa.sin6_addr = ip6->ip6_dst; if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) { goto badscope; } /* We should use ia_ifp to support the case of * sending packets to an address of our own. */ if (ia != NULL && ia->ia_ifp) ifp = ia->ia_ifp; /* scope check is done. */ goto routefound; badscope: IP6STAT_INC(ip6s_badscope); in6_ifstat_inc(origifp, ifs6_out_discard); if (error == 0) error = EHOSTUNREACH; /* XXX */ goto bad; routefound: if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { if (opt && opt->ip6po_nextroute.ro_rt) { /* * The nexthop is explicitly specified by the * application. We assume the next hop is an IPv6 * address. */ dst = (struct sockaddr_in6 *)opt->ip6po_nexthop; } else if ((rt->rt_flags & RTF_GATEWAY)) dst = (struct sockaddr_in6 *)rt->rt_gateway; } if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */ } else { m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST; in6_ifstat_inc(ifp, ifs6_out_mcast); /* * Confirm that the outgoing interface supports multicast. */ if (!(ifp->if_flags & IFF_MULTICAST)) { IP6STAT_INC(ip6s_noroute); in6_ifstat_inc(ifp, ifs6_out_discard); error = ENETUNREACH; goto bad; } if ((im6o == NULL && in6_mcast_loop) || (im6o && im6o->im6o_multicast_loop)) { /* * Loop back multicast datagram if not expressly * forbidden to do so, even if we have not joined * the address; protocols will filter it later, * thus deferring a hash lookup and lock acquisition * at the expense of an m_copym(). */ ip6_mloopback(ifp, m); } else { /* * If we are acting as a multicast router, perform * multicast forwarding as if the packet had just * arrived on the interface to which we are about * to send. The multicast forwarding function * recursively calls this function, using the * IPV6_FORWARDING flag to prevent infinite recursion. * * Multicasts that are looped back by ip6_mloopback(), * above, will be forwarded by the ip6_input() routine, * if necessary. */ if (V_ip6_mrouter && (flags & IPV6_FORWARDING) == 0) { /* * XXX: ip6_mforward expects that rcvif is NULL * when it is called from the originating path. * However, it may not always be the case. */ m->m_pkthdr.rcvif = NULL; if (ip6_mforward(ip6, ifp, m) != 0) { m_freem(m); goto done; } } } /* * Multicasts with a hoplimit of zero may be looped back, * above, but must not be transmitted on a network. * Also, multicasts addressed to the loopback interface * are not sent -- the above call to ip6_mloopback() will * loop back a copy if this host actually belongs to the * destination group on the loopback interface. */ if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) || IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) { m_freem(m); goto done; } } /* * Fill the outgoing inteface to tell the upper layer * to increment per-interface statistics. */ if (ifpp) *ifpp = ifp; /* Determine path MTU. */ if ((error = ip6_getpmtu(ro_pmtu, ro != ro_pmtu, ifp, &ip6->ip6_dst, &mtu, &alwaysfrag, fibnum, *nexthdrp)) != 0) goto bad; /* * The caller of this function may specify to use the minimum MTU * in some cases. * An advanced API option (IPV6_USE_MIN_MTU) can also override MTU * setting. The logic is a bit complicated; by default, unicast * packets will follow path MTU while multicast packets will be sent at * the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets * including unicast ones will be sent at the minimum MTU. Multicast * packets will always be sent at the minimum MTU unless * IP6PO_MINMTU_DISABLE is explicitly specified. * See RFC 3542 for more details. */ if (mtu > IPV6_MMTU) { if ((flags & IPV6_MINMTU)) mtu = IPV6_MMTU; else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL) mtu = IPV6_MMTU; else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) && (opt == NULL || opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) { mtu = IPV6_MMTU; } } /* * clear embedded scope identifiers if necessary. * in6_clearscope will touch the addresses only when necessary. */ in6_clearscope(&ip6->ip6_src); in6_clearscope(&ip6->ip6_dst); /* * If the outgoing packet contains a hop-by-hop options header, * it must be examined and processed even by the source node. * (RFC 2460, section 4.) */ if (exthdrs.ip6e_hbh) { struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *); u_int32_t dummy; /* XXX unused */ u_int32_t plen = 0; /* XXX: ip6_process will check the value */ #ifdef DIAGNOSTIC if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len) panic("ip6e_hbh is not contiguous"); #endif /* * XXX: if we have to send an ICMPv6 error to the sender, * we need the M_LOOP flag since icmp6_error() expects * the IPv6 and the hop-by-hop options header are * contiguous unless the flag is set. */ m->m_flags |= M_LOOP; m->m_pkthdr.rcvif = ifp; if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1), ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh), &dummy, &plen) < 0) { /* m was already freed at this point */ error = EINVAL;/* better error? */ goto done; } m->m_flags &= ~M_LOOP; /* XXX */ m->m_pkthdr.rcvif = NULL; } /* Jump over all PFIL processing if hooks are not active. */ if (!PFIL_HOOKED(&V_inet6_pfil_hook)) goto passout; odst = ip6->ip6_dst; /* Run through list of hooks for output packets. */ error = pfil_run_hooks(&V_inet6_pfil_hook, &m, ifp, PFIL_OUT, inp); if (error != 0 || m == NULL) goto done; /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); needfiblookup = 0; /* See if destination IP address was changed by packet filter. */ if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) { m->m_flags |= M_SKIP_FIREWALL; /* If destination is now ourself drop to ip6_input(). */ if (in6_localip(&ip6->ip6_dst)) { m->m_flags |= M_FASTFWD_OURS; if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = V_loif; if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; #endif error = netisr_queue(NETISR_IPV6, m); goto done; } else { RO_RTFREE(ro); needfiblookup = 1; /* Redo the routing table lookup. */ if (ro->ro_lle) LLE_FREE(ro->ro_lle); /* zeros ro_lle */ ro->ro_lle = NULL; } } /* See if fib was changed by packet filter. */ if (fibnum != M_GETFIB(m)) { m->m_flags |= M_SKIP_FIREWALL; fibnum = M_GETFIB(m); RO_RTFREE(ro); needfiblookup = 1; if (ro->ro_lle) LLE_FREE(ro->ro_lle); /* zeros ro_lle */ ro->ro_lle = NULL; } if (needfiblookup) goto again; /* See if local, if yes, send it to netisr. */ if (m->m_flags & M_FASTFWD_OURS) { if (m->m_pkthdr.rcvif == NULL) m->m_pkthdr.rcvif = V_loif; if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { m->m_pkthdr.csum_flags |= CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xffff; } #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; #endif error = netisr_queue(NETISR_IPV6, m); goto done; } /* Or forward to some other address? */ if ((m->m_flags & M_IP6_NEXTHOP) && (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) { dst = (struct sockaddr_in6 *)&ro->ro_dst; bcopy((fwd_tag+1), &dst_sa, sizeof(struct sockaddr_in6)); m->m_flags |= M_SKIP_FIREWALL; m->m_flags &= ~M_IP6_NEXTHOP; m_tag_delete(m, fwd_tag); goto again; } passout: /* * Send the packet to the outgoing interface. * If necessary, do IPv6 fragmentation before sending. * * the logic here is rather complex: * 1: normal case (dontfrag == 0, alwaysfrag == 0) * 1-a: send as is if tlen <= path mtu * 1-b: fragment if tlen > path mtu * * 2: if user asks us not to fragment (dontfrag == 1) * 2-a: send as is if tlen <= interface mtu * 2-b: error if tlen > interface mtu * * 3: if we always need to attach fragment header (alwaysfrag == 1) * always fragment * * 4: if dontfrag == 1 && alwaysfrag == 1 * error, as we cannot handle this conflicting request */ sw_csum = m->m_pkthdr.csum_flags; if (!hdrsplit) { tso = ((sw_csum & ifp->if_hwassist & CSUM_TSO) != 0) ? 1 : 0; sw_csum &= ~ifp->if_hwassist; } else tso = 0; /* * If we added extension headers, we will not do TSO and calculate the * checksums ourselves for now. * XXX-BZ Need a framework to know when the NIC can handle it, even * with ext. hdrs. */ if (sw_csum & CSUM_DELAY_DATA_IPV6) { sw_csum &= ~CSUM_DELAY_DATA_IPV6; in6_delayed_cksum(m, plen, sizeof(struct ip6_hdr)); } #ifdef SCTP if (sw_csum & CSUM_SCTP_IPV6) { sw_csum &= ~CSUM_SCTP_IPV6; sctp_delayed_cksum(m, sizeof(struct ip6_hdr)); } #endif m->m_pkthdr.csum_flags &= ifp->if_hwassist; tlen = m->m_pkthdr.len; if ((opt && (opt->ip6po_flags & IP6PO_DONTFRAG)) || tso) dontfrag = 1; else dontfrag = 0; if (dontfrag && alwaysfrag) { /* case 4 */ /* conflicting request - can't transmit */ error = EMSGSIZE; goto bad; } if (dontfrag && tlen > IN6_LINKMTU(ifp) && !tso) { /* case 2-b */ /* * Even if the DONTFRAG option is specified, we cannot send the * packet when the data length is larger than the MTU of the * outgoing interface. * Notify the error by sending IPV6_PATHMTU ancillary data if * application wanted to know the MTU value. Also return an * error code (this is not described in the API spec). */ if (inp != NULL) ip6_notify_pmtu(inp, &dst_sa, (u_int32_t)mtu); error = EMSGSIZE; goto bad; } /* * transmit packet without fragmentation */ if (dontfrag || (!alwaysfrag && tlen <= mtu)) { /* case 1-a and 2-a */ struct in6_ifaddr *ia6; ip6 = mtod(m, struct ip6_hdr *); ia6 = in6_ifawithifp(ifp, &ip6->ip6_src); if (ia6) { /* Record statistics for this interface address. */ counter_u64_add(ia6->ia_ifa.ifa_opackets, 1); counter_u64_add(ia6->ia_ifa.ifa_obytes, m->m_pkthdr.len); ifa_free(&ia6->ia_ifa); } #ifdef RATELIMIT if (inp != NULL) { if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) in_pcboutput_txrtlmt(inp, ifp, m); /* stamp send tag on mbuf */ m->m_pkthdr.snd_tag = inp->inp_snd_tag; } else { m->m_pkthdr.snd_tag = NULL; } #endif error = nd6_output_ifp(ifp, origifp, m, dst, (struct route *)ro); #ifdef RATELIMIT /* check for route change */ if (error == EAGAIN) in_pcboutput_eagain(inp); #endif goto done; } /* * try to fragment the packet. case 1-b and 3 */ if (mtu < IPV6_MMTU) { /* path MTU cannot be less than IPV6_MMTU */ error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); goto bad; } else if (ip6->ip6_plen == 0) { /* jumbo payload cannot be fragmented */ error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); goto bad; } else { u_char nextproto; /* * Too large for the destination or interface; * fragment if possible. * Must be able to put at least 8 bytes per fragment. */ hlen = unfragpartlen; if (mtu > IPV6_MAXPACKET) mtu = IPV6_MAXPACKET; len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7; if (len < 8) { error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); goto bad; } /* * If the interface will not calculate checksums on * fragmented packets, then do it here. * XXX-BZ handle the hw offloading case. Need flags. */ if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { in6_delayed_cksum(m, plen, hlen); m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; } #ifdef SCTP if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) { sctp_delayed_cksum(m, hlen); m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6; } #endif /* * Change the next header field of the last header in the * unfragmentable part. */ if (exthdrs.ip6e_rthdr) { nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *); *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT; } else if (exthdrs.ip6e_dest1) { nextproto = *mtod(exthdrs.ip6e_dest1, u_char *); *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT; } else if (exthdrs.ip6e_hbh) { nextproto = *mtod(exthdrs.ip6e_hbh, u_char *); *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT; } else { nextproto = ip6->ip6_nxt; ip6->ip6_nxt = IPPROTO_FRAGMENT; } /* * Loop through length of segment after first fragment, * make new header and copy data of each part and link onto * chain. */ m0 = m; id = htonl(ip6_randomid()); if ((error = ip6_fragment(ifp, m, hlen, nextproto, len, id))) goto sendorfree; in6_ifstat_inc(ifp, ifs6_out_fragok); } /* * Remove leading garbages. */ sendorfree: m = m0->m_nextpkt; m0->m_nextpkt = 0; m_freem(m0); for (m0 = m; m; m = m0) { m0 = m->m_nextpkt; m->m_nextpkt = 0; if (error == 0) { /* Record statistics for this interface address. */ if (ia) { counter_u64_add(ia->ia_ifa.ifa_opackets, 1); counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len); } #ifdef RATELIMIT if (inp != NULL) { if (inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) in_pcboutput_txrtlmt(inp, ifp, m); /* stamp send tag on mbuf */ m->m_pkthdr.snd_tag = inp->inp_snd_tag; } else { m->m_pkthdr.snd_tag = NULL; } #endif error = nd6_output_ifp(ifp, origifp, m, dst, (struct route *)ro); #ifdef RATELIMIT /* check for route change */ if (error == EAGAIN) in_pcboutput_eagain(inp); #endif } else m_freem(m); } if (error == 0) IP6STAT_INC(ip6s_fragmented); done: if (ro == &ip6route) RO_RTFREE(ro); return (error); freehdrs: m_freem(exthdrs.ip6e_hbh); /* m_freem will check if mbuf is 0 */ m_freem(exthdrs.ip6e_dest1); m_freem(exthdrs.ip6e_rthdr); m_freem(exthdrs.ip6e_dest2); /* FALLTHROUGH */ bad: if (m) m_freem(m); goto done; } static int ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen) { struct mbuf *m; if (hlen > MCLBYTES) return (ENOBUFS); /* XXX */ if (hlen > MLEN) m = m_getcl(M_NOWAIT, MT_DATA, 0); else m = m_get(M_NOWAIT, MT_DATA); if (m == NULL) return (ENOBUFS); m->m_len = hlen; if (hdr) bcopy(hdr, mtod(m, caddr_t), hlen); *mp = m; return (0); } /* * Insert jumbo payload option. */ static int ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen) { struct mbuf *mopt; u_char *optbuf; u_int32_t v; #define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */ /* * If there is no hop-by-hop options header, allocate new one. * If there is one but it doesn't have enough space to store the * jumbo payload option, allocate a cluster to store the whole options. * Otherwise, use it to store the options. */ if (exthdrs->ip6e_hbh == NULL) { mopt = m_get(M_NOWAIT, MT_DATA); if (mopt == NULL) return (ENOBUFS); mopt->m_len = JUMBOOPTLEN; optbuf = mtod(mopt, u_char *); optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */ exthdrs->ip6e_hbh = mopt; } else { struct ip6_hbh *hbh; mopt = exthdrs->ip6e_hbh; if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) { /* * XXX assumption: * - exthdrs->ip6e_hbh is not referenced from places * other than exthdrs. * - exthdrs->ip6e_hbh is not an mbuf chain. */ int oldoptlen = mopt->m_len; struct mbuf *n; /* * XXX: give up if the whole (new) hbh header does * not fit even in an mbuf cluster. */ if (oldoptlen + JUMBOOPTLEN > MCLBYTES) return (ENOBUFS); /* * As a consequence, we must always prepare a cluster * at this point. */ n = m_getcl(M_NOWAIT, MT_DATA, 0); if (n == NULL) return (ENOBUFS); n->m_len = oldoptlen + JUMBOOPTLEN; bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t), oldoptlen); optbuf = mtod(n, caddr_t) + oldoptlen; m_freem(mopt); mopt = exthdrs->ip6e_hbh = n; } else { optbuf = mtod(mopt, u_char *) + mopt->m_len; mopt->m_len += JUMBOOPTLEN; } optbuf[0] = IP6OPT_PADN; optbuf[1] = 1; /* * Adjust the header length according to the pad and * the jumbo payload option. */ hbh = mtod(mopt, struct ip6_hbh *); hbh->ip6h_len += (JUMBOOPTLEN >> 3); } /* fill in the option. */ optbuf[2] = IP6OPT_JUMBO; optbuf[3] = 4; v = (u_int32_t)htonl(plen + JUMBOOPTLEN); bcopy(&v, &optbuf[4], sizeof(u_int32_t)); /* finally, adjust the packet header length */ exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN; return (0); #undef JUMBOOPTLEN } /* * Insert fragment header and copy unfragmentable header portions. */ static int ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen, struct ip6_frag **frghdrp) { struct mbuf *n, *mlast; if (hlen > sizeof(struct ip6_hdr)) { n = m_copym(m0, sizeof(struct ip6_hdr), hlen - sizeof(struct ip6_hdr), M_NOWAIT); if (n == NULL) return (ENOBUFS); m->m_next = n; } else n = m; /* Search for the last mbuf of unfragmentable part. */ for (mlast = n; mlast->m_next; mlast = mlast->m_next) ; if (M_WRITABLE(mlast) && M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) { /* use the trailing space of the last mbuf for the fragment hdr */ *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) + mlast->m_len); mlast->m_len += sizeof(struct ip6_frag); m->m_pkthdr.len += sizeof(struct ip6_frag); } else { /* allocate a new mbuf for the fragment header */ struct mbuf *mfrg; mfrg = m_get(M_NOWAIT, MT_DATA); if (mfrg == NULL) return (ENOBUFS); mfrg->m_len = sizeof(struct ip6_frag); *frghdrp = mtod(mfrg, struct ip6_frag *); mlast->m_next = mfrg; } return (0); } /* * Calculates IPv6 path mtu for destination @dst. * Resulting MTU is stored in @mtup. * * Returns 0 on success. */ static int ip6_getpmtu_ctl(u_int fibnum, const struct in6_addr *dst, u_long *mtup) { struct nhop6_extended nh6; struct in6_addr kdst; uint32_t scopeid; struct ifnet *ifp; u_long mtu; int error; in6_splitscope(dst, &kdst, &scopeid); if (fib6_lookup_nh_ext(fibnum, &kdst, scopeid, NHR_REF, 0, &nh6) != 0) return (EHOSTUNREACH); ifp = nh6.nh_ifp; mtu = nh6.nh_mtu; error = ip6_calcmtu(ifp, dst, mtu, mtup, NULL, 0); fib6_free_nh_ext(fibnum, &nh6); return (error); } /* * Calculates IPv6 path MTU for @dst based on transmit @ifp, * and cached data in @ro_pmtu. * MTU from (successful) route lookup is saved (along with dst) * inside @ro_pmtu to avoid subsequent route lookups after packet * filter processing. * * Stores mtu and always-frag value into @mtup and @alwaysfragp. * Returns 0 on success. */ static int ip6_getpmtu(struct route_in6 *ro_pmtu, int do_lookup, struct ifnet *ifp, const struct in6_addr *dst, u_long *mtup, int *alwaysfragp, u_int fibnum, u_int proto) { struct nhop6_basic nh6; struct in6_addr kdst; uint32_t scopeid; struct sockaddr_in6 *sa6_dst; u_long mtu; mtu = 0; if (do_lookup) { /* * Here ro_pmtu has final destination address, while * ro might represent immediate destination. * Use ro_pmtu destination since mtu might differ. */ sa6_dst = (struct sockaddr_in6 *)&ro_pmtu->ro_dst; if (!IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst)) ro_pmtu->ro_mtu = 0; if (ro_pmtu->ro_mtu == 0) { bzero(sa6_dst, sizeof(*sa6_dst)); sa6_dst->sin6_family = AF_INET6; sa6_dst->sin6_len = sizeof(struct sockaddr_in6); sa6_dst->sin6_addr = *dst; in6_splitscope(dst, &kdst, &scopeid); if (fib6_lookup_nh_basic(fibnum, &kdst, scopeid, 0, 0, &nh6) == 0) ro_pmtu->ro_mtu = nh6.nh_mtu; } mtu = ro_pmtu->ro_mtu; } if (ro_pmtu->ro_rt) mtu = ro_pmtu->ro_rt->rt_mtu; return (ip6_calcmtu(ifp, dst, mtu, mtup, alwaysfragp, proto)); } /* * Calculate MTU based on transmit @ifp, route mtu @rt_mtu and * hostcache data for @dst. * Stores mtu and always-frag value into @mtup and @alwaysfragp. * * Returns 0 on success. */ static int ip6_calcmtu(struct ifnet *ifp, const struct in6_addr *dst, u_long rt_mtu, u_long *mtup, int *alwaysfragp, u_int proto) { u_long mtu = 0; int alwaysfrag = 0; int error = 0; if (rt_mtu > 0) { u_int32_t ifmtu; struct in_conninfo inc; bzero(&inc, sizeof(inc)); inc.inc_flags |= INC_ISIPV6; inc.inc6_faddr = *dst; ifmtu = IN6_LINKMTU(ifp); /* TCP is known to react to pmtu changes so skip hc */ if (proto != IPPROTO_TCP) mtu = tcp_hc_getmtu(&inc); if (mtu) mtu = min(mtu, rt_mtu); else mtu = rt_mtu; if (mtu == 0) mtu = ifmtu; else if (mtu < IPV6_MMTU) { /* * RFC2460 section 5, last paragraph: * if we record ICMPv6 too big message with * mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU * or smaller, with framgent header attached. * (fragment header is needed regardless from the * packet size, for translators to identify packets) */ alwaysfrag = 1; mtu = IPV6_MMTU; } } else if (ifp) { mtu = IN6_LINKMTU(ifp); } else error = EHOSTUNREACH; /* XXX */ *mtup = mtu; if (alwaysfragp) *alwaysfragp = alwaysfrag; return (error); } /* * IP6 socket option processing. */ int ip6_ctloutput(struct socket *so, struct sockopt *sopt) { int optdatalen, uproto; void *optdata; struct inpcb *in6p = sotoinpcb(so); int error, optval; int level, op, optname; int optlen; struct thread *td; #ifdef RSS uint32_t rss_bucket; int retval; #endif /* * Don't use more than a quarter of mbuf clusters. N.B.: * nmbclusters is an int, but nmbclusters * MCLBYTES may overflow * on LP64 architectures, so cast to u_long to avoid undefined * behavior. ILP32 architectures cannot have nmbclusters * large enough to overflow for other reasons. */ #define IPV6_PKTOPTIONS_MBUF_LIMIT ((u_long)nmbclusters * MCLBYTES / 4) level = sopt->sopt_level; op = sopt->sopt_dir; optname = sopt->sopt_name; optlen = sopt->sopt_valsize; td = sopt->sopt_td; error = 0; optval = 0; uproto = (int)so->so_proto->pr_protocol; if (level != IPPROTO_IPV6) { error = EINVAL; if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_dir == SOPT_SET) { switch (sopt->sopt_name) { case SO_REUSEADDR: INP_WLOCK(in6p); if ((so->so_options & SO_REUSEADDR) != 0) in6p->inp_flags2 |= INP_REUSEADDR; else in6p->inp_flags2 &= ~INP_REUSEADDR; INP_WUNLOCK(in6p); error = 0; break; case SO_REUSEPORT: INP_WLOCK(in6p); if ((so->so_options & SO_REUSEPORT) != 0) in6p->inp_flags2 |= INP_REUSEPORT; else in6p->inp_flags2 &= ~INP_REUSEPORT; INP_WUNLOCK(in6p); error = 0; break; case SO_SETFIB: INP_WLOCK(in6p); in6p->inp_inc.inc_fibnum = so->so_fibnum; INP_WUNLOCK(in6p); error = 0; break; case SO_MAX_PACING_RATE: #ifdef RATELIMIT INP_WLOCK(in6p); in6p->inp_flags2 |= INP_RATE_LIMIT_CHANGED; INP_WUNLOCK(in6p); error = 0; #else error = EOPNOTSUPP; #endif break; default: break; } } } else { /* level == IPPROTO_IPV6 */ switch (op) { case SOPT_SET: switch (optname) { case IPV6_2292PKTOPTIONS: #ifdef IPV6_PKTOPTIONS case IPV6_PKTOPTIONS: #endif { struct mbuf *m; if (optlen > IPV6_PKTOPTIONS_MBUF_LIMIT) { printf("ip6_ctloutput: mbuf limit hit\n"); error = ENOBUFS; break; } error = soopt_getm(sopt, &m); /* XXX */ if (error != 0) break; error = soopt_mcopyin(sopt, m); /* XXX */ if (error != 0) break; error = ip6_pcbopts(&in6p->in6p_outputopts, m, so, sopt); m_freem(m); /* XXX */ break; } /* * Use of some Hop-by-Hop options or some * Destination options, might require special * privilege. That is, normal applications * (without special privilege) might be forbidden * from setting certain options in outgoing packets, * and might never see certain options in received * packets. [RFC 2292 Section 6] * KAME specific note: * KAME prevents non-privileged users from sending or * receiving ANY hbh/dst options in order to avoid * overhead of parsing options in the kernel. */ case IPV6_RECVHOPOPTS: case IPV6_RECVDSTOPTS: case IPV6_RECVRTHDRDSTOPTS: if (td != NULL) { error = priv_check(td, PRIV_NETINET_SETHDROPTS); if (error) break; } /* FALLTHROUGH */ case IPV6_UNICAST_HOPS: case IPV6_HOPLIMIT: case IPV6_RECVPKTINFO: case IPV6_RECVHOPLIMIT: case IPV6_RECVRTHDR: case IPV6_RECVPATHMTU: case IPV6_RECVTCLASS: case IPV6_RECVFLOWID: #ifdef RSS case IPV6_RECVRSSBUCKETID: #endif case IPV6_V6ONLY: case IPV6_AUTOFLOWLABEL: case IPV6_ORIGDSTADDR: case IPV6_BINDANY: case IPV6_BINDMULTI: #ifdef RSS case IPV6_RSS_LISTEN_BUCKET: #endif if (optname == IPV6_BINDANY && td != NULL) { error = priv_check(td, PRIV_NETINET_BINDANY); if (error) break; } if (optlen != sizeof(int)) { error = EINVAL; break; } error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; switch (optname) { case IPV6_UNICAST_HOPS: if (optval < -1 || optval >= 256) error = EINVAL; else { /* -1 = kernel default */ in6p->in6p_hops = optval; if ((in6p->inp_vflag & INP_IPV4) != 0) in6p->inp_ip_ttl = optval; } break; #define OPTSET(bit) \ do { \ INP_WLOCK(in6p); \ if (optval) \ in6p->inp_flags |= (bit); \ else \ in6p->inp_flags &= ~(bit); \ INP_WUNLOCK(in6p); \ } while (/*CONSTCOND*/ 0) #define OPTSET2292(bit) \ do { \ INP_WLOCK(in6p); \ in6p->inp_flags |= IN6P_RFC2292; \ if (optval) \ in6p->inp_flags |= (bit); \ else \ in6p->inp_flags &= ~(bit); \ INP_WUNLOCK(in6p); \ } while (/*CONSTCOND*/ 0) #define OPTBIT(bit) (in6p->inp_flags & (bit) ? 1 : 0) #define OPTSET2(bit, val) do { \ INP_WLOCK(in6p); \ if (val) \ in6p->inp_flags2 |= bit; \ else \ in6p->inp_flags2 &= ~bit; \ INP_WUNLOCK(in6p); \ } while (0) #define OPTBIT2(bit) (in6p->inp_flags2 & (bit) ? 1 : 0) case IPV6_RECVPKTINFO: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_PKTINFO); break; case IPV6_HOPLIMIT: { struct ip6_pktopts **optp; /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } optp = &in6p->in6p_outputopts; error = ip6_pcbopt(IPV6_HOPLIMIT, (u_char *)&optval, sizeof(optval), optp, (td != NULL) ? td->td_ucred : NULL, uproto); break; } case IPV6_RECVHOPLIMIT: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_HOPLIMIT); break; case IPV6_RECVHOPOPTS: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_HOPOPTS); break; case IPV6_RECVDSTOPTS: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_DSTOPTS); break; case IPV6_RECVRTHDRDSTOPTS: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_RTHDRDSTOPTS); break; case IPV6_RECVRTHDR: /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_RTHDR); break; case IPV6_RECVPATHMTU: /* * We ignore this option for TCP * sockets. * (RFC3542 leaves this case * unspecified.) */ if (uproto != IPPROTO_TCP) OPTSET(IN6P_MTU); break; case IPV6_RECVFLOWID: OPTSET2(INP_RECVFLOWID, optval); break; #ifdef RSS case IPV6_RECVRSSBUCKETID: OPTSET2(INP_RECVRSSBUCKETID, optval); break; #endif case IPV6_V6ONLY: /* * make setsockopt(IPV6_V6ONLY) * available only prior to bind(2). * see ipng mailing list, Jun 22 2001. */ if (in6p->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) { error = EINVAL; break; } OPTSET(IN6P_IPV6_V6ONLY); if (optval) in6p->inp_vflag &= ~INP_IPV4; else in6p->inp_vflag |= INP_IPV4; break; case IPV6_RECVTCLASS: /* cannot mix with RFC2292 XXX */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } OPTSET(IN6P_TCLASS); break; case IPV6_AUTOFLOWLABEL: OPTSET(IN6P_AUTOFLOWLABEL); break; case IPV6_ORIGDSTADDR: OPTSET2(INP_ORIGDSTADDR, optval); break; case IPV6_BINDANY: OPTSET(INP_BINDANY); break; case IPV6_BINDMULTI: OPTSET2(INP_BINDMULTI, optval); break; #ifdef RSS case IPV6_RSS_LISTEN_BUCKET: if ((optval >= 0) && (optval < rss_getnumbuckets())) { in6p->inp_rss_listen_bucket = optval; OPTSET2(INP_RSS_BUCKET_SET, 1); } else { error = EINVAL; } break; #endif } break; case IPV6_TCLASS: case IPV6_DONTFRAG: case IPV6_USE_MIN_MTU: case IPV6_PREFER_TEMPADDR: if (optlen != sizeof(optval)) { error = EINVAL; break; } error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; { struct ip6_pktopts **optp; optp = &in6p->in6p_outputopts; error = ip6_pcbopt(optname, (u_char *)&optval, sizeof(optval), optp, (td != NULL) ? td->td_ucred : NULL, uproto); break; } case IPV6_2292PKTINFO: case IPV6_2292HOPLIMIT: case IPV6_2292HOPOPTS: case IPV6_2292DSTOPTS: case IPV6_2292RTHDR: /* RFC 2292 */ if (optlen != sizeof(int)) { error = EINVAL; break; } error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; switch (optname) { case IPV6_2292PKTINFO: OPTSET2292(IN6P_PKTINFO); break; case IPV6_2292HOPLIMIT: OPTSET2292(IN6P_HOPLIMIT); break; case IPV6_2292HOPOPTS: /* * Check super-user privilege. * See comments for IPV6_RECVHOPOPTS. */ if (td != NULL) { error = priv_check(td, PRIV_NETINET_SETHDROPTS); if (error) return (error); } OPTSET2292(IN6P_HOPOPTS); break; case IPV6_2292DSTOPTS: if (td != NULL) { error = priv_check(td, PRIV_NETINET_SETHDROPTS); if (error) return (error); } OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */ break; case IPV6_2292RTHDR: OPTSET2292(IN6P_RTHDR); break; } break; case IPV6_PKTINFO: case IPV6_HOPOPTS: case IPV6_RTHDR: case IPV6_DSTOPTS: case IPV6_RTHDRDSTOPTS: case IPV6_NEXTHOP: { /* new advanced API (RFC3542) */ u_char *optbuf; u_char optbuf_storage[MCLBYTES]; int optlen; struct ip6_pktopts **optp; /* cannot mix with RFC2292 */ if (OPTBIT(IN6P_RFC2292)) { error = EINVAL; break; } /* * We only ensure valsize is not too large * here. Further validation will be done * later. */ error = sooptcopyin(sopt, optbuf_storage, sizeof(optbuf_storage), 0); if (error) break; optlen = sopt->sopt_valsize; optbuf = optbuf_storage; optp = &in6p->in6p_outputopts; error = ip6_pcbopt(optname, optbuf, optlen, optp, (td != NULL) ? td->td_ucred : NULL, uproto); break; } #undef OPTSET case IPV6_MULTICAST_IF: case IPV6_MULTICAST_HOPS: case IPV6_MULTICAST_LOOP: case IPV6_JOIN_GROUP: case IPV6_LEAVE_GROUP: case IPV6_MSFILTER: case MCAST_BLOCK_SOURCE: case MCAST_UNBLOCK_SOURCE: case MCAST_JOIN_GROUP: case MCAST_LEAVE_GROUP: case MCAST_JOIN_SOURCE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: error = ip6_setmoptions(in6p, sopt); break; case IPV6_PORTRANGE: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; INP_WLOCK(in6p); switch (optval) { case IPV6_PORTRANGE_DEFAULT: in6p->inp_flags &= ~(INP_LOWPORT); in6p->inp_flags &= ~(INP_HIGHPORT); break; case IPV6_PORTRANGE_HIGH: in6p->inp_flags &= ~(INP_LOWPORT); in6p->inp_flags |= INP_HIGHPORT; break; case IPV6_PORTRANGE_LOW: in6p->inp_flags &= ~(INP_HIGHPORT); in6p->inp_flags |= INP_LOWPORT; break; default: error = EINVAL; break; } INP_WUNLOCK(in6p); break; #if defined(IPSEC) || defined(IPSEC_SUPPORT) case IPV6_IPSEC_POLICY: if (IPSEC_ENABLED(ipv6)) { error = IPSEC_PCBCTL(ipv6, in6p, sopt); break; } /* FALLTHROUGH */ #endif /* IPSEC */ default: error = ENOPROTOOPT; break; } break; case SOPT_GET: switch (optname) { case IPV6_2292PKTOPTIONS: #ifdef IPV6_PKTOPTIONS case IPV6_PKTOPTIONS: #endif /* * RFC3542 (effectively) deprecated the * semantics of the 2292-style pktoptions. * Since it was not reliable in nature (i.e., * applications had to expect the lack of some * information after all), it would make sense * to simplify this part by always returning * empty data. */ sopt->sopt_valsize = 0; break; case IPV6_RECVHOPOPTS: case IPV6_RECVDSTOPTS: case IPV6_RECVRTHDRDSTOPTS: case IPV6_UNICAST_HOPS: case IPV6_RECVPKTINFO: case IPV6_RECVHOPLIMIT: case IPV6_RECVRTHDR: case IPV6_RECVPATHMTU: case IPV6_V6ONLY: case IPV6_PORTRANGE: case IPV6_RECVTCLASS: case IPV6_AUTOFLOWLABEL: case IPV6_BINDANY: case IPV6_FLOWID: case IPV6_FLOWTYPE: case IPV6_RECVFLOWID: #ifdef RSS case IPV6_RSSBUCKETID: case IPV6_RECVRSSBUCKETID: #endif case IPV6_BINDMULTI: switch (optname) { case IPV6_RECVHOPOPTS: optval = OPTBIT(IN6P_HOPOPTS); break; case IPV6_RECVDSTOPTS: optval = OPTBIT(IN6P_DSTOPTS); break; case IPV6_RECVRTHDRDSTOPTS: optval = OPTBIT(IN6P_RTHDRDSTOPTS); break; case IPV6_UNICAST_HOPS: optval = in6p->in6p_hops; break; case IPV6_RECVPKTINFO: optval = OPTBIT(IN6P_PKTINFO); break; case IPV6_RECVHOPLIMIT: optval = OPTBIT(IN6P_HOPLIMIT); break; case IPV6_RECVRTHDR: optval = OPTBIT(IN6P_RTHDR); break; case IPV6_RECVPATHMTU: optval = OPTBIT(IN6P_MTU); break; case IPV6_V6ONLY: optval = OPTBIT(IN6P_IPV6_V6ONLY); break; case IPV6_PORTRANGE: { int flags; flags = in6p->inp_flags; if (flags & INP_HIGHPORT) optval = IPV6_PORTRANGE_HIGH; else if (flags & INP_LOWPORT) optval = IPV6_PORTRANGE_LOW; else optval = 0; break; } case IPV6_RECVTCLASS: optval = OPTBIT(IN6P_TCLASS); break; case IPV6_AUTOFLOWLABEL: optval = OPTBIT(IN6P_AUTOFLOWLABEL); break; case IPV6_ORIGDSTADDR: optval = OPTBIT2(INP_ORIGDSTADDR); break; case IPV6_BINDANY: optval = OPTBIT(INP_BINDANY); break; case IPV6_FLOWID: optval = in6p->inp_flowid; break; case IPV6_FLOWTYPE: optval = in6p->inp_flowtype; break; case IPV6_RECVFLOWID: optval = OPTBIT2(INP_RECVFLOWID); break; #ifdef RSS case IPV6_RSSBUCKETID: retval = rss_hash2bucket(in6p->inp_flowid, in6p->inp_flowtype, &rss_bucket); if (retval == 0) optval = rss_bucket; else error = EINVAL; break; case IPV6_RECVRSSBUCKETID: optval = OPTBIT2(INP_RECVRSSBUCKETID); break; #endif case IPV6_BINDMULTI: optval = OPTBIT2(INP_BINDMULTI); break; } if (error) break; error = sooptcopyout(sopt, &optval, sizeof optval); break; case IPV6_PATHMTU: { u_long pmtu = 0; struct ip6_mtuinfo mtuinfo; if (!(so->so_state & SS_ISCONNECTED)) return (ENOTCONN); /* * XXX: we dot not consider the case of source * routing, or optional information to specify * the outgoing interface. */ error = ip6_getpmtu_ctl(so->so_fibnum, &in6p->in6p_faddr, &pmtu); if (error) break; if (pmtu > IPV6_MAXPACKET) pmtu = IPV6_MAXPACKET; bzero(&mtuinfo, sizeof(mtuinfo)); mtuinfo.ip6m_mtu = (u_int32_t)pmtu; optdata = (void *)&mtuinfo; optdatalen = sizeof(mtuinfo); error = sooptcopyout(sopt, optdata, optdatalen); break; } case IPV6_2292PKTINFO: case IPV6_2292HOPLIMIT: case IPV6_2292HOPOPTS: case IPV6_2292RTHDR: case IPV6_2292DSTOPTS: switch (optname) { case IPV6_2292PKTINFO: optval = OPTBIT(IN6P_PKTINFO); break; case IPV6_2292HOPLIMIT: optval = OPTBIT(IN6P_HOPLIMIT); break; case IPV6_2292HOPOPTS: optval = OPTBIT(IN6P_HOPOPTS); break; case IPV6_2292RTHDR: optval = OPTBIT(IN6P_RTHDR); break; case IPV6_2292DSTOPTS: optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); break; } error = sooptcopyout(sopt, &optval, sizeof optval); break; case IPV6_PKTINFO: case IPV6_HOPOPTS: case IPV6_RTHDR: case IPV6_DSTOPTS: case IPV6_RTHDRDSTOPTS: case IPV6_NEXTHOP: case IPV6_TCLASS: case IPV6_DONTFRAG: case IPV6_USE_MIN_MTU: case IPV6_PREFER_TEMPADDR: error = ip6_getpcbopt(in6p->in6p_outputopts, optname, sopt); break; case IPV6_MULTICAST_IF: case IPV6_MULTICAST_HOPS: case IPV6_MULTICAST_LOOP: case IPV6_MSFILTER: error = ip6_getmoptions(in6p, sopt); break; #if defined(IPSEC) || defined(IPSEC_SUPPORT) case IPV6_IPSEC_POLICY: if (IPSEC_ENABLED(ipv6)) { error = IPSEC_PCBCTL(ipv6, in6p, sopt); break; } /* FALLTHROUGH */ #endif /* IPSEC */ default: error = ENOPROTOOPT; break; } break; } } return (error); } int ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt) { int error = 0, optval, optlen; const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum); struct inpcb *in6p = sotoinpcb(so); int level, op, optname; level = sopt->sopt_level; op = sopt->sopt_dir; optname = sopt->sopt_name; optlen = sopt->sopt_valsize; if (level != IPPROTO_IPV6) { return (EINVAL); } switch (optname) { case IPV6_CHECKSUM: /* * For ICMPv6 sockets, no modification allowed for checksum * offset, permit "no change" values to help existing apps. * * RFC3542 says: "An attempt to set IPV6_CHECKSUM * for an ICMPv6 socket will fail." * The current behavior does not meet RFC3542. */ switch (op) { case SOPT_SET: if (optlen != sizeof(int)) { error = EINVAL; break; } error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval)); if (error) break; if ((optval % 2) != 0) { /* the API assumes even offset values */ error = EINVAL; } else if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) { if (optval != icmp6off) error = EINVAL; } else in6p->in6p_cksum = optval; break; case SOPT_GET: if (so->so_proto->pr_protocol == IPPROTO_ICMPV6) optval = icmp6off; else optval = in6p->in6p_cksum; error = sooptcopyout(sopt, &optval, sizeof(optval)); break; default: error = EINVAL; break; } break; default: error = ENOPROTOOPT; break; } return (error); } /* * Set up IP6 options in pcb for insertion in output packets or * specifying behavior of outgoing packets. */ static int ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m, struct socket *so, struct sockopt *sopt) { struct ip6_pktopts *opt = *pktopt; int error = 0; struct thread *td = sopt->sopt_td; /* turn off any old options. */ if (opt) { #ifdef DIAGNOSTIC if (opt->ip6po_pktinfo || opt->ip6po_nexthop || opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 || opt->ip6po_rhinfo.ip6po_rhi_rthdr) printf("ip6_pcbopts: all specified options are cleared.\n"); #endif ip6_clearpktopts(opt, -1); } else opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK); *pktopt = NULL; if (!m || m->m_len == 0) { /* * Only turning off any previous options, regardless of * whether the opt is just created or given. */ free(opt, M_IP6OPT); return (0); } /* set options specified by user. */ if ((error = ip6_setpktopts(m, opt, NULL, (td != NULL) ? td->td_ucred : NULL, so->so_proto->pr_protocol)) != 0) { ip6_clearpktopts(opt, -1); /* XXX: discard all options */ free(opt, M_IP6OPT); return (error); } *pktopt = opt; return (0); } /* * initialize ip6_pktopts. beware that there are non-zero default values in * the struct. */ void ip6_initpktopts(struct ip6_pktopts *opt) { bzero(opt, sizeof(*opt)); opt->ip6po_hlim = -1; /* -1 means default hop limit */ opt->ip6po_tclass = -1; /* -1 means default traffic class */ opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY; opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM; } static int ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt, struct ucred *cred, int uproto) { struct ip6_pktopts *opt; if (*pktopt == NULL) { *pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT, M_WAITOK); ip6_initpktopts(*pktopt); } opt = *pktopt; return (ip6_setpktopt(optname, buf, len, opt, cred, 1, 0, uproto)); } static int ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt) { void *optdata = NULL; int optdatalen = 0; struct ip6_ext *ip6e; int error = 0; struct in6_pktinfo null_pktinfo; int deftclass = 0, on; int defminmtu = IP6PO_MINMTU_MCASTONLY; int defpreftemp = IP6PO_TEMPADDR_SYSTEM; switch (optname) { case IPV6_PKTINFO: optdata = (void *)&null_pktinfo; if (pktopt && pktopt->ip6po_pktinfo) { bcopy(pktopt->ip6po_pktinfo, &null_pktinfo, sizeof(null_pktinfo)); in6_clearscope(&null_pktinfo.ipi6_addr); } else { /* XXX: we don't have to do this every time... */ bzero(&null_pktinfo, sizeof(null_pktinfo)); } optdatalen = sizeof(struct in6_pktinfo); break; case IPV6_TCLASS: if (pktopt && pktopt->ip6po_tclass >= 0) optdata = (void *)&pktopt->ip6po_tclass; else optdata = (void *)&deftclass; optdatalen = sizeof(int); break; case IPV6_HOPOPTS: if (pktopt && pktopt->ip6po_hbh) { optdata = (void *)pktopt->ip6po_hbh; ip6e = (struct ip6_ext *)pktopt->ip6po_hbh; optdatalen = (ip6e->ip6e_len + 1) << 3; } break; case IPV6_RTHDR: if (pktopt && pktopt->ip6po_rthdr) { optdata = (void *)pktopt->ip6po_rthdr; ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr; optdatalen = (ip6e->ip6e_len + 1) << 3; } break; case IPV6_RTHDRDSTOPTS: if (pktopt && pktopt->ip6po_dest1) { optdata = (void *)pktopt->ip6po_dest1; ip6e = (struct ip6_ext *)pktopt->ip6po_dest1; optdatalen = (ip6e->ip6e_len + 1) << 3; } break; case IPV6_DSTOPTS: if (pktopt && pktopt->ip6po_dest2) { optdata = (void *)pktopt->ip6po_dest2; ip6e = (struct ip6_ext *)pktopt->ip6po_dest2; optdatalen = (ip6e->ip6e_len + 1) << 3; } break; case IPV6_NEXTHOP: if (pktopt && pktopt->ip6po_nexthop) { optdata = (void *)pktopt->ip6po_nexthop; optdatalen = pktopt->ip6po_nexthop->sa_len; } break; case IPV6_USE_MIN_MTU: if (pktopt) optdata = (void *)&pktopt->ip6po_minmtu; else optdata = (void *)&defminmtu; optdatalen = sizeof(int); break; case IPV6_DONTFRAG: if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG)) on = 1; else on = 0; optdata = (void *)&on; optdatalen = sizeof(on); break; case IPV6_PREFER_TEMPADDR: if (pktopt) optdata = (void *)&pktopt->ip6po_prefer_tempaddr; else optdata = (void *)&defpreftemp; optdatalen = sizeof(int); break; default: /* should not happen */ #ifdef DIAGNOSTIC panic("ip6_getpcbopt: unexpected option\n"); #endif return (ENOPROTOOPT); } error = sooptcopyout(sopt, optdata, optdatalen); return (error); } void ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname) { if (pktopt == NULL) return; if (optname == -1 || optname == IPV6_PKTINFO) { if (pktopt->ip6po_pktinfo) free(pktopt->ip6po_pktinfo, M_IP6OPT); pktopt->ip6po_pktinfo = NULL; } if (optname == -1 || optname == IPV6_HOPLIMIT) pktopt->ip6po_hlim = -1; if (optname == -1 || optname == IPV6_TCLASS) pktopt->ip6po_tclass = -1; if (optname == -1 || optname == IPV6_NEXTHOP) { if (pktopt->ip6po_nextroute.ro_rt) { RTFREE(pktopt->ip6po_nextroute.ro_rt); pktopt->ip6po_nextroute.ro_rt = NULL; } if (pktopt->ip6po_nexthop) free(pktopt->ip6po_nexthop, M_IP6OPT); pktopt->ip6po_nexthop = NULL; } if (optname == -1 || optname == IPV6_HOPOPTS) { if (pktopt->ip6po_hbh) free(pktopt->ip6po_hbh, M_IP6OPT); pktopt->ip6po_hbh = NULL; } if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) { if (pktopt->ip6po_dest1) free(pktopt->ip6po_dest1, M_IP6OPT); pktopt->ip6po_dest1 = NULL; } if (optname == -1 || optname == IPV6_RTHDR) { if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr) free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT); pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL; if (pktopt->ip6po_route.ro_rt) { RTFREE(pktopt->ip6po_route.ro_rt); pktopt->ip6po_route.ro_rt = NULL; } } if (optname == -1 || optname == IPV6_DSTOPTS) { if (pktopt->ip6po_dest2) free(pktopt->ip6po_dest2, M_IP6OPT); pktopt->ip6po_dest2 = NULL; } } #define PKTOPT_EXTHDRCPY(type) \ do {\ if (src->type) {\ int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\ dst->type = malloc(hlen, M_IP6OPT, canwait);\ - if (dst->type == NULL && canwait == M_NOWAIT)\ + if (dst->type == NULL)\ goto bad;\ bcopy(src->type, dst->type, hlen);\ }\ } while (/*CONSTCOND*/ 0) static int copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait) { if (dst == NULL || src == NULL) { printf("ip6_clearpktopts: invalid argument\n"); return (EINVAL); } dst->ip6po_hlim = src->ip6po_hlim; dst->ip6po_tclass = src->ip6po_tclass; dst->ip6po_flags = src->ip6po_flags; dst->ip6po_minmtu = src->ip6po_minmtu; dst->ip6po_prefer_tempaddr = src->ip6po_prefer_tempaddr; if (src->ip6po_pktinfo) { dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo), M_IP6OPT, canwait); if (dst->ip6po_pktinfo == NULL) goto bad; *dst->ip6po_pktinfo = *src->ip6po_pktinfo; } if (src->ip6po_nexthop) { dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len, M_IP6OPT, canwait); if (dst->ip6po_nexthop == NULL) goto bad; bcopy(src->ip6po_nexthop, dst->ip6po_nexthop, src->ip6po_nexthop->sa_len); } PKTOPT_EXTHDRCPY(ip6po_hbh); PKTOPT_EXTHDRCPY(ip6po_dest1); PKTOPT_EXTHDRCPY(ip6po_dest2); PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */ return (0); bad: ip6_clearpktopts(dst, -1); return (ENOBUFS); } #undef PKTOPT_EXTHDRCPY struct ip6_pktopts * ip6_copypktopts(struct ip6_pktopts *src, int canwait) { int error; struct ip6_pktopts *dst; dst = malloc(sizeof(*dst), M_IP6OPT, canwait); if (dst == NULL) return (NULL); ip6_initpktopts(dst); if ((error = copypktopts(dst, src, canwait)) != 0) { free(dst, M_IP6OPT); return (NULL); } return (dst); } void ip6_freepcbopts(struct ip6_pktopts *pktopt) { if (pktopt == NULL) return; ip6_clearpktopts(pktopt, -1); free(pktopt, M_IP6OPT); } /* * Set IPv6 outgoing packet options based on advanced API. */ int ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt, struct ip6_pktopts *stickyopt, struct ucred *cred, int uproto) { struct cmsghdr *cm = NULL; if (control == NULL || opt == NULL) return (EINVAL); ip6_initpktopts(opt); if (stickyopt) { int error; /* * If stickyopt is provided, make a local copy of the options * for this particular packet, then override them by ancillary * objects. * XXX: copypktopts() does not copy the cached route to a next * hop (if any). This is not very good in terms of efficiency, * but we can allow this since this option should be rarely * used. */ if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0) return (error); } /* * XXX: Currently, we assume all the optional information is stored * in a single mbuf. */ if (control->m_next) return (EINVAL); for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len), control->m_len -= CMSG_ALIGN(cm->cmsg_len)) { int error; if (control->m_len < CMSG_LEN(0)) return (EINVAL); cm = mtod(control, struct cmsghdr *); if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len) return (EINVAL); if (cm->cmsg_level != IPPROTO_IPV6) continue; error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm), cm->cmsg_len - CMSG_LEN(0), opt, cred, 0, 1, uproto); if (error) return (error); } return (0); } /* * Set a particular packet option, as a sticky option or an ancillary data * item. "len" can be 0 only when it's a sticky option. * We have 4 cases of combination of "sticky" and "cmsg": * "sticky=0, cmsg=0": impossible * "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data * "sticky=1, cmsg=0": RFC3542 socket option * "sticky=1, cmsg=1": RFC2292 socket option */ static int ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt, struct ucred *cred, int sticky, int cmsg, int uproto) { int minmtupolicy, preftemp; int error; if (!sticky && !cmsg) { #ifdef DIAGNOSTIC printf("ip6_setpktopt: impossible case\n"); #endif return (EINVAL); } /* * IPV6_2292xxx is for backward compatibility to RFC2292, and should * not be specified in the context of RFC3542. Conversely, * RFC3542 types should not be specified in the context of RFC2292. */ if (!cmsg) { switch (optname) { case IPV6_2292PKTINFO: case IPV6_2292HOPLIMIT: case IPV6_2292NEXTHOP: case IPV6_2292HOPOPTS: case IPV6_2292DSTOPTS: case IPV6_2292RTHDR: case IPV6_2292PKTOPTIONS: return (ENOPROTOOPT); } } if (sticky && cmsg) { switch (optname) { case IPV6_PKTINFO: case IPV6_HOPLIMIT: case IPV6_NEXTHOP: case IPV6_HOPOPTS: case IPV6_DSTOPTS: case IPV6_RTHDRDSTOPTS: case IPV6_RTHDR: case IPV6_USE_MIN_MTU: case IPV6_DONTFRAG: case IPV6_TCLASS: case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */ return (ENOPROTOOPT); } } switch (optname) { case IPV6_2292PKTINFO: case IPV6_PKTINFO: { struct ifnet *ifp = NULL; struct in6_pktinfo *pktinfo; if (len != sizeof(struct in6_pktinfo)) return (EINVAL); pktinfo = (struct in6_pktinfo *)buf; /* * An application can clear any sticky IPV6_PKTINFO option by * doing a "regular" setsockopt with ipi6_addr being * in6addr_any and ipi6_ifindex being zero. * [RFC 3542, Section 6] */ if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo && pktinfo->ipi6_ifindex == 0 && IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) { ip6_clearpktopts(opt, optname); break; } if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO && sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) { return (EINVAL); } if (IN6_IS_ADDR_MULTICAST(&pktinfo->ipi6_addr)) return (EINVAL); /* validate the interface index if specified. */ if (pktinfo->ipi6_ifindex > V_if_index) return (ENXIO); if (pktinfo->ipi6_ifindex) { ifp = ifnet_byindex(pktinfo->ipi6_ifindex); if (ifp == NULL) return (ENXIO); } if (ifp != NULL && (ifp->if_afdata[AF_INET6] == NULL || (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) != 0)) return (ENETDOWN); if (ifp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) { struct in6_ifaddr *ia; in6_setscope(&pktinfo->ipi6_addr, ifp, NULL); ia = in6ifa_ifpwithaddr(ifp, &pktinfo->ipi6_addr); if (ia == NULL) return (EADDRNOTAVAIL); ifa_free(&ia->ia_ifa); } /* * We store the address anyway, and let in6_selectsrc() * validate the specified address. This is because ipi6_addr * may not have enough information about its scope zone, and * we may need additional information (such as outgoing * interface or the scope zone of a destination address) to * disambiguate the scope. * XXX: the delay of the validation may confuse the * application when it is used as a sticky option. */ if (opt->ip6po_pktinfo == NULL) { opt->ip6po_pktinfo = malloc(sizeof(*pktinfo), M_IP6OPT, M_NOWAIT); if (opt->ip6po_pktinfo == NULL) return (ENOBUFS); } bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo)); break; } case IPV6_2292HOPLIMIT: case IPV6_HOPLIMIT: { int *hlimp; /* * RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT * to simplify the ordering among hoplimit options. */ if (optname == IPV6_HOPLIMIT && sticky) return (ENOPROTOOPT); if (len != sizeof(int)) return (EINVAL); hlimp = (int *)buf; if (*hlimp < -1 || *hlimp > 255) return (EINVAL); opt->ip6po_hlim = *hlimp; break; } case IPV6_TCLASS: { int tclass; if (len != sizeof(int)) return (EINVAL); tclass = *(int *)buf; if (tclass < -1 || tclass > 255) return (EINVAL); opt->ip6po_tclass = tclass; break; } case IPV6_2292NEXTHOP: case IPV6_NEXTHOP: if (cred != NULL) { error = priv_check_cred(cred, PRIV_NETINET_SETHDROPTS, 0); if (error) return (error); } if (len == 0) { /* just remove the option */ ip6_clearpktopts(opt, IPV6_NEXTHOP); break; } /* check if cmsg_len is large enough for sa_len */ if (len < sizeof(struct sockaddr) || len < *buf) return (EINVAL); switch (((struct sockaddr *)buf)->sa_family) { case AF_INET6: { struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf; int error; if (sa6->sin6_len != sizeof(struct sockaddr_in6)) return (EINVAL); if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) || IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) { return (EINVAL); } if ((error = sa6_embedscope(sa6, V_ip6_use_defzone)) != 0) { return (error); } break; } case AF_LINK: /* should eventually be supported */ default: return (EAFNOSUPPORT); } /* turn off the previous option, then set the new option. */ ip6_clearpktopts(opt, IPV6_NEXTHOP); opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT); if (opt->ip6po_nexthop == NULL) return (ENOBUFS); bcopy(buf, opt->ip6po_nexthop, *buf); break; case IPV6_2292HOPOPTS: case IPV6_HOPOPTS: { struct ip6_hbh *hbh; int hbhlen; /* * XXX: We don't allow a non-privileged user to set ANY HbH * options, since per-option restriction has too much * overhead. */ if (cred != NULL) { error = priv_check_cred(cred, PRIV_NETINET_SETHDROPTS, 0); if (error) return (error); } if (len == 0) { ip6_clearpktopts(opt, IPV6_HOPOPTS); break; /* just remove the option */ } /* message length validation */ if (len < sizeof(struct ip6_hbh)) return (EINVAL); hbh = (struct ip6_hbh *)buf; hbhlen = (hbh->ip6h_len + 1) << 3; if (len != hbhlen) return (EINVAL); /* turn off the previous option, then set the new option. */ ip6_clearpktopts(opt, IPV6_HOPOPTS); opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT); if (opt->ip6po_hbh == NULL) return (ENOBUFS); bcopy(hbh, opt->ip6po_hbh, hbhlen); break; } case IPV6_2292DSTOPTS: case IPV6_DSTOPTS: case IPV6_RTHDRDSTOPTS: { struct ip6_dest *dest, **newdest = NULL; int destlen; if (cred != NULL) { /* XXX: see the comment for IPV6_HOPOPTS */ error = priv_check_cred(cred, PRIV_NETINET_SETHDROPTS, 0); if (error) return (error); } if (len == 0) { ip6_clearpktopts(opt, optname); break; /* just remove the option */ } /* message length validation */ if (len < sizeof(struct ip6_dest)) return (EINVAL); dest = (struct ip6_dest *)buf; destlen = (dest->ip6d_len + 1) << 3; if (len != destlen) return (EINVAL); /* * Determine the position that the destination options header * should be inserted; before or after the routing header. */ switch (optname) { case IPV6_2292DSTOPTS: /* * The old advacned API is ambiguous on this point. * Our approach is to determine the position based * according to the existence of a routing header. * Note, however, that this depends on the order of the * extension headers in the ancillary data; the 1st * part of the destination options header must appear * before the routing header in the ancillary data, * too. * RFC3542 solved the ambiguity by introducing * separate ancillary data or option types. */ if (opt->ip6po_rthdr == NULL) newdest = &opt->ip6po_dest1; else newdest = &opt->ip6po_dest2; break; case IPV6_RTHDRDSTOPTS: newdest = &opt->ip6po_dest1; break; case IPV6_DSTOPTS: newdest = &opt->ip6po_dest2; break; } /* turn off the previous option, then set the new option. */ ip6_clearpktopts(opt, optname); *newdest = malloc(destlen, M_IP6OPT, M_NOWAIT); if (*newdest == NULL) return (ENOBUFS); bcopy(dest, *newdest, destlen); break; } case IPV6_2292RTHDR: case IPV6_RTHDR: { struct ip6_rthdr *rth; int rthlen; if (len == 0) { ip6_clearpktopts(opt, IPV6_RTHDR); break; /* just remove the option */ } /* message length validation */ if (len < sizeof(struct ip6_rthdr)) return (EINVAL); rth = (struct ip6_rthdr *)buf; rthlen = (rth->ip6r_len + 1) << 3; if (len != rthlen) return (EINVAL); switch (rth->ip6r_type) { case IPV6_RTHDR_TYPE_0: if (rth->ip6r_len == 0) /* must contain one addr */ return (EINVAL); if (rth->ip6r_len % 2) /* length must be even */ return (EINVAL); if (rth->ip6r_len / 2 != rth->ip6r_segleft) return (EINVAL); break; default: return (EINVAL); /* not supported */ } /* turn off the previous option */ ip6_clearpktopts(opt, IPV6_RTHDR); opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT); if (opt->ip6po_rthdr == NULL) return (ENOBUFS); bcopy(rth, opt->ip6po_rthdr, rthlen); break; } case IPV6_USE_MIN_MTU: if (len != sizeof(int)) return (EINVAL); minmtupolicy = *(int *)buf; if (minmtupolicy != IP6PO_MINMTU_MCASTONLY && minmtupolicy != IP6PO_MINMTU_DISABLE && minmtupolicy != IP6PO_MINMTU_ALL) { return (EINVAL); } opt->ip6po_minmtu = minmtupolicy; break; case IPV6_DONTFRAG: if (len != sizeof(int)) return (EINVAL); if (uproto == IPPROTO_TCP || *(int *)buf == 0) { /* * we ignore this option for TCP sockets. * (RFC3542 leaves this case unspecified.) */ opt->ip6po_flags &= ~IP6PO_DONTFRAG; } else opt->ip6po_flags |= IP6PO_DONTFRAG; break; case IPV6_PREFER_TEMPADDR: if (len != sizeof(int)) return (EINVAL); preftemp = *(int *)buf; if (preftemp != IP6PO_TEMPADDR_SYSTEM && preftemp != IP6PO_TEMPADDR_NOTPREFER && preftemp != IP6PO_TEMPADDR_PREFER) { return (EINVAL); } opt->ip6po_prefer_tempaddr = preftemp; break; default: return (ENOPROTOOPT); } /* end of switch */ return (0); } /* * Routine called from ip6_output() to loop back a copy of an IP6 multicast * packet to the input queue of a specified interface. Note that this * calls the output routine of the loopback "driver", but with an interface * pointer that might NOT be &loif -- easier than replicating that code here. */ void ip6_mloopback(struct ifnet *ifp, struct mbuf *m) { struct mbuf *copym; struct ip6_hdr *ip6; copym = m_copym(m, 0, M_COPYALL, M_NOWAIT); if (copym == NULL) return; /* * Make sure to deep-copy IPv6 header portion in case the data * is in an mbuf cluster, so that we can safely override the IPv6 * header portion later. */ if (!M_WRITABLE(copym) || copym->m_len < sizeof(struct ip6_hdr)) { copym = m_pullup(copym, sizeof(struct ip6_hdr)); if (copym == NULL) return; } ip6 = mtod(copym, struct ip6_hdr *); /* * clear embedded scope identifiers if necessary. * in6_clearscope will touch the addresses only when necessary. */ in6_clearscope(&ip6->ip6_src); in6_clearscope(&ip6->ip6_dst); if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { copym->m_pkthdr.csum_flags |= CSUM_DATA_VALID_IPV6 | CSUM_PSEUDO_HDR; copym->m_pkthdr.csum_data = 0xffff; } if_simloop(ifp, copym, AF_INET6, 0); } /* * Chop IPv6 header off from the payload. */ static int ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs) { struct mbuf *mh; struct ip6_hdr *ip6; ip6 = mtod(m, struct ip6_hdr *); if (m->m_len > sizeof(*ip6)) { mh = m_gethdr(M_NOWAIT, MT_DATA); if (mh == NULL) { m_freem(m); return ENOBUFS; } m_move_pkthdr(mh, m); M_ALIGN(mh, sizeof(*ip6)); m->m_len -= sizeof(*ip6); m->m_data += sizeof(*ip6); mh->m_next = m; m = mh; m->m_len = sizeof(*ip6); bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6)); } exthdrs->ip6e_ip6 = m; return 0; } /* * Compute IPv6 extension header length. */ int ip6_optlen(struct inpcb *in6p) { int len; if (!in6p->in6p_outputopts) return 0; len = 0; #define elen(x) \ (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0) len += elen(in6p->in6p_outputopts->ip6po_hbh); if (in6p->in6p_outputopts->ip6po_rthdr) /* dest1 is valid with rthdr only */ len += elen(in6p->in6p_outputopts->ip6po_dest1); len += elen(in6p->in6p_outputopts->ip6po_rthdr); len += elen(in6p->in6p_outputopts->ip6po_dest2); return len; #undef elen } Index: projects/clang500-import/sys/netinet6/udp6_usrreq.c =================================================================== --- projects/clang500-import/sys/netinet6/udp6_usrreq.c (revision 319250) +++ projects/clang500-import/sys/netinet6/udp6_usrreq.c (revision 319251) @@ -1,1324 +1,1320 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * Copyright (c) 2010-2011 Juniper Networks, Inc. * Copyright (c) 2014 Kevin Lo * All rights reserved. * * Portions of this software were developed by Robert N. M. Watson under * contract to Juniper Networks, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: udp6_usrreq.c,v 1.27 2001/05/21 05:45:10 jinmei Exp $ * $KAME: udp6_output.c,v 1.31 2001/05/21 16:39:15 jinmei Exp $ */ /*- * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 * The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * UDP protocol implementation. * Per RFC 768, August, 1980. */ extern struct protosw inetsw[]; static void udp6_detach(struct socket *so); static int udp6_append(struct inpcb *inp, struct mbuf *n, int off, struct sockaddr_in6 *fromsa) { struct socket *so; struct mbuf *opts = NULL, *tmp_opts; struct udpcb *up; INP_LOCK_ASSERT(inp); /* * Engage the tunneling protocol. */ up = intoudpcb(inp); if (up->u_tun_func != NULL) { in_pcbref(inp); INP_RUNLOCK(inp); (*up->u_tun_func)(n, off, inp, (struct sockaddr *)&fromsa[0], up->u_tun_ctx); INP_RLOCK(inp); return (in_pcbrele_rlocked(inp)); } #if defined(IPSEC) || defined(IPSEC_SUPPORT) /* Check AH/ESP integrity. */ if (IPSEC_ENABLED(ipv6)) { if (IPSEC_CHECK_POLICY(ipv6, n, inp) != 0) { m_freem(n); return (0); } } #endif /* IPSEC */ #ifdef MAC if (mac_inpcb_check_deliver(inp, n) != 0) { m_freem(n); return (0); } #endif opts = NULL; if (inp->inp_flags & INP_CONTROLOPTS || inp->inp_socket->so_options & SO_TIMESTAMP) ip6_savecontrol(inp, n, &opts); if ((inp->inp_vflag & INP_IPV6) && (inp->inp_flags2 & INP_ORIGDSTADDR)) { tmp_opts = sbcreatecontrol((caddr_t)&fromsa[1], sizeof(struct sockaddr_in6), IPV6_ORIGDSTADDR, IPPROTO_IPV6); if (tmp_opts) { if (opts) { tmp_opts->m_next = opts; opts = tmp_opts; } else opts = tmp_opts; } } m_adj(n, off + sizeof(struct udphdr)); so = inp->inp_socket; SOCKBUF_LOCK(&so->so_rcv); if (sbappendaddr_locked(&so->so_rcv, (struct sockaddr *)&fromsa[0], n, opts) == 0) { SOCKBUF_UNLOCK(&so->so_rcv); m_freem(n); if (opts) m_freem(opts); UDPSTAT_INC(udps_fullsock); } else sorwakeup_locked(so); return (0); } int udp6_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp; struct ifnet *ifp; struct ip6_hdr *ip6; struct udphdr *uh; struct inpcb *inp; struct inpcbinfo *pcbinfo; struct udpcb *up; int off = *offp; int cscov_partial; int plen, ulen; struct sockaddr_in6 fromsa[2]; struct m_tag *fwd_tag; uint16_t uh_sum; uint8_t nxt; ifp = m->m_pkthdr.rcvif; ip6 = mtod(m, struct ip6_hdr *); #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct udphdr), IPPROTO_DONE); ip6 = mtod(m, struct ip6_hdr *); uh = (struct udphdr *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(uh, struct udphdr *, m, off, sizeof(*uh)); if (!uh) return (IPPROTO_DONE); #endif UDPSTAT_INC(udps_ipackets); /* * Destination port of 0 is illegal, based on RFC768. */ if (uh->uh_dport == 0) goto badunlocked; plen = ntohs(ip6->ip6_plen) - off + sizeof(*ip6); ulen = ntohs((u_short)uh->uh_ulen); nxt = proto; cscov_partial = (nxt == IPPROTO_UDPLITE) ? 1 : 0; if (nxt == IPPROTO_UDPLITE) { /* Zero means checksum over the complete packet. */ if (ulen == 0) ulen = plen; if (ulen == plen) cscov_partial = 0; if ((ulen < sizeof(struct udphdr)) || (ulen > plen)) { /* XXX: What is the right UDPLite MIB counter? */ goto badunlocked; } if (uh->uh_sum == 0) { /* XXX: What is the right UDPLite MIB counter? */ goto badunlocked; } } else { if ((ulen < sizeof(struct udphdr)) || (plen != ulen)) { UDPSTAT_INC(udps_badlen); goto badunlocked; } if (uh->uh_sum == 0) { UDPSTAT_INC(udps_nosum); goto badunlocked; } } if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) && !cscov_partial) { if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) uh_sum = m->m_pkthdr.csum_data; else uh_sum = in6_cksum_pseudo(ip6, ulen, nxt, m->m_pkthdr.csum_data); uh_sum ^= 0xffff; } else uh_sum = in6_cksum_partial(m, nxt, off, plen, ulen); if (uh_sum != 0) { UDPSTAT_INC(udps_badsum); goto badunlocked; } /* * Construct sockaddr format source address. */ init_sin6(&fromsa[0], m, 0); fromsa[0].sin6_port = uh->uh_sport; init_sin6(&fromsa[1], m, 1); fromsa[1].sin6_port = uh->uh_dport; pcbinfo = udp_get_inpcbinfo(nxt); if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { struct inpcb *last; struct inpcbhead *pcblist; struct ip6_moptions *imo; INP_INFO_RLOCK(pcbinfo); /* * In the event that laddr should be set to the link-local * address (this happens in RIPng), the multicast address * specified in the received packet will not match laddr. To * handle this situation, matching is relaxed if the * receiving interface is the same as one specified in the * socket and if the destination multicast address matches * one of the multicast groups specified in the socket. */ /* * KAME note: traditionally we dropped udpiphdr from mbuf * here. We need udphdr for IPsec processing so we do that * later. */ pcblist = udp_get_pcblist(nxt); last = NULL; LIST_FOREACH(inp, pcblist, inp_list) { if ((inp->inp_vflag & INP_IPV6) == 0) continue; if (inp->inp_lport != uh->uh_dport) continue; if (inp->inp_fport != 0 && inp->inp_fport != uh->uh_sport) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &ip6->ip6_dst)) continue; } if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &ip6->ip6_src) || inp->inp_fport != uh->uh_sport) continue; } /* * XXXRW: Because we weren't holding either the inpcb * or the hash lock when we checked for a match * before, we should probably recheck now that the * inpcb lock is (supposed to be) held. */ /* * Handle socket delivery policy for any-source * and source-specific multicast. [RFC3678] */ imo = inp->in6p_moptions; if (imo && IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { struct sockaddr_in6 mcaddr; int blocked; INP_RLOCK(inp); bzero(&mcaddr, sizeof(struct sockaddr_in6)); mcaddr.sin6_len = sizeof(struct sockaddr_in6); mcaddr.sin6_family = AF_INET6; mcaddr.sin6_addr = ip6->ip6_dst; blocked = im6o_mc_filter(imo, ifp, (struct sockaddr *)&mcaddr, (struct sockaddr *)&fromsa[0]); if (blocked != MCAST_PASS) { if (blocked == MCAST_NOTGMEMBER) IP6STAT_INC(ip6s_notmember); if (blocked == MCAST_NOTSMEMBER || blocked == MCAST_MUTED) UDPSTAT_INC(udps_filtermcast); INP_RUNLOCK(inp); /* XXX */ continue; } INP_RUNLOCK(inp); } if (last != NULL) { struct mbuf *n; if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) { INP_RLOCK(last); UDP_PROBE(receive, NULL, last, ip6, last, uh); if (udp6_append(last, n, off, fromsa)) goto inp_lost; INP_RUNLOCK(last); } } last = inp; /* * Don't look for additional matches if this one does * not have either the SO_REUSEPORT or SO_REUSEADDR * socket options set. This heuristic avoids * searching through all pcbs in the common case of a * non-shared port. It assumes that an application * will never clear these options after setting them. */ if ((last->inp_socket->so_options & (SO_REUSEPORT|SO_REUSEADDR)) == 0) break; } if (last == NULL) { /* * No matching pcb found; discard datagram. (No need * to send an ICMP Port Unreachable for a broadcast * or multicast datgram.) */ UDPSTAT_INC(udps_noport); UDPSTAT_INC(udps_noportmcast); goto badheadlocked; } INP_RLOCK(last); INP_INFO_RUNLOCK(pcbinfo); UDP_PROBE(receive, NULL, last, ip6, last, uh); if (udp6_append(last, m, off, fromsa) == 0) INP_RUNLOCK(last); inp_lost: return (IPPROTO_DONE); } /* * Locate pcb for datagram. */ /* * Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */ if ((m->m_flags & M_IP6_NEXTHOP) && (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) { struct sockaddr_in6 *next_hop6; next_hop6 = (struct sockaddr_in6 *)(fwd_tag + 1); /* * Transparently forwarded. Pretend to be the destination. * Already got one like this? */ inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src, uh->uh_sport, &ip6->ip6_dst, uh->uh_dport, INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif, m); if (!inp) { /* * It's new. Try to find the ambushing socket. * Because we've rewritten the destination address, * any hardware-generated hash is ignored. */ inp = in6_pcblookup(pcbinfo, &ip6->ip6_src, uh->uh_sport, &next_hop6->sin6_addr, next_hop6->sin6_port ? htons(next_hop6->sin6_port) : uh->uh_dport, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif); } /* Remove the tag from the packet. We don't need it anymore. */ m_tag_delete(m, fwd_tag); m->m_flags &= ~M_IP6_NEXTHOP; } else inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_src, uh->uh_sport, &ip6->ip6_dst, uh->uh_dport, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif, m); if (inp == NULL) { if (udp_log_in_vain) { char ip6bufs[INET6_ADDRSTRLEN]; char ip6bufd[INET6_ADDRSTRLEN]; log(LOG_INFO, "Connection attempt to UDP [%s]:%d from [%s]:%d\n", ip6_sprintf(ip6bufd, &ip6->ip6_dst), ntohs(uh->uh_dport), ip6_sprintf(ip6bufs, &ip6->ip6_src), ntohs(uh->uh_sport)); } UDPSTAT_INC(udps_noport); if (m->m_flags & M_MCAST) { printf("UDP6: M_MCAST is set in a unicast packet.\n"); UDPSTAT_INC(udps_noportmcast); goto badunlocked; } if (V_udp_blackhole) - goto badunlocked; - if (badport_bandlim(BANDLIM_ICMP6_UNREACH) < 0) goto badunlocked; icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0); return (IPPROTO_DONE); } INP_RLOCK_ASSERT(inp); up = intoudpcb(inp); if (cscov_partial) { if (up->u_rxcslen == 0 || up->u_rxcslen > ulen) { INP_RUNLOCK(inp); m_freem(m); return (IPPROTO_DONE); } } UDP_PROBE(receive, NULL, inp, ip6, inp, uh); if (udp6_append(inp, m, off, fromsa) == 0) INP_RUNLOCK(inp); return (IPPROTO_DONE); badheadlocked: INP_INFO_RUNLOCK(pcbinfo); badunlocked: if (m) m_freem(m); return (IPPROTO_DONE); } static void udp6_common_ctlinput(int cmd, struct sockaddr *sa, void *d, struct inpcbinfo *pcbinfo) { struct udphdr uh; struct ip6_hdr *ip6; struct mbuf *m; int off = 0; struct ip6ctlparam *ip6cp = NULL; const struct sockaddr_in6 *sa6_src = NULL; void *cmdarg; struct inpcb *(*notify)(struct inpcb *, int) = udp_notify; struct udp_portonly { u_int16_t uh_sport; u_int16_t uh_dport; } *uhp; if (sa->sa_family != AF_INET6 || sa->sa_len != sizeof(struct sockaddr_in6)) return; if ((unsigned)cmd >= PRC_NCMDS) return; if (PRC_IS_REDIRECT(cmd)) notify = in6_rtchange, d = NULL; else if (cmd == PRC_HOSTDEAD) d = NULL; else if (inet6ctlerrmap[cmd] == 0) return; /* if the parameter is from icmp6, decode it. */ if (d != NULL) { ip6cp = (struct ip6ctlparam *)d; m = ip6cp->ip6c_m; ip6 = ip6cp->ip6c_ip6; off = ip6cp->ip6c_off; cmdarg = ip6cp->ip6c_cmdarg; sa6_src = ip6cp->ip6c_src; } else { m = NULL; ip6 = NULL; cmdarg = NULL; sa6_src = &sa6_any; } if (ip6) { /* * XXX: We assume that when IPV6 is non NULL, * M and OFF are valid. */ /* Check if we can safely examine src and dst ports. */ if (m->m_pkthdr.len < off + sizeof(*uhp)) return; bzero(&uh, sizeof(uh)); m_copydata(m, off, sizeof(*uhp), (caddr_t)&uh); if (!PRC_IS_REDIRECT(cmd)) { /* Check to see if its tunneled */ struct inpcb *inp; inp = in6_pcblookup_mbuf(pcbinfo, &ip6->ip6_dst, uh.uh_dport, &ip6->ip6_src, uh.uh_sport, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, m->m_pkthdr.rcvif, m); if (inp != NULL) { struct udpcb *up; up = intoudpcb(inp); if (up->u_icmp_func) { /* Yes it is. */ INP_RUNLOCK(inp); (*up->u_icmp_func)(cmd, (struct sockaddr *)ip6cp->ip6c_src, d, up->u_tun_ctx); return; } else { /* Can't find it. */ INP_RUNLOCK(inp); } } } (void)in6_pcbnotify(pcbinfo, sa, uh.uh_dport, (struct sockaddr *)ip6cp->ip6c_src, uh.uh_sport, cmd, cmdarg, notify); } else (void)in6_pcbnotify(pcbinfo, sa, 0, (const struct sockaddr *)sa6_src, 0, cmd, cmdarg, notify); } void udp6_ctlinput(int cmd, struct sockaddr *sa, void *d) { return (udp6_common_ctlinput(cmd, sa, d, &V_udbinfo)); } void udplite6_ctlinput(int cmd, struct sockaddr *sa, void *d) { return (udp6_common_ctlinput(cmd, sa, d, &V_ulitecbinfo)); } static int udp6_getcred(SYSCTL_HANDLER_ARGS) { struct xucred xuc; struct sockaddr_in6 addrs[2]; struct inpcb *inp; int error; error = priv_check(req->td, PRIV_NETINET_GETCRED); if (error) return (error); if (req->newlen != sizeof(addrs)) return (EINVAL); if (req->oldlen != sizeof(struct xucred)) return (EINVAL); error = SYSCTL_IN(req, addrs, sizeof(addrs)); if (error) return (error); if ((error = sa6_embedscope(&addrs[0], V_ip6_use_defzone)) != 0 || (error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) { return (error); } inp = in6_pcblookup(&V_udbinfo, &addrs[1].sin6_addr, addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port, INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL); if (inp != NULL) { INP_RLOCK_ASSERT(inp); if (inp->inp_socket == NULL) error = ENOENT; if (error == 0) error = cr_canseesocket(req->td->td_ucred, inp->inp_socket); if (error == 0) cru2x(inp->inp_cred, &xuc); INP_RUNLOCK(inp); } else error = ENOENT; if (error == 0) error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred)); return (error); } SYSCTL_PROC(_net_inet6_udp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 0, 0, udp6_getcred, "S,xucred", "Get the xucred of a UDP6 connection"); static int udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6, struct mbuf *control, struct thread *td) { u_int32_t ulen = m->m_pkthdr.len; u_int32_t plen = sizeof(struct udphdr) + ulen; struct ip6_hdr *ip6; struct udphdr *udp6; struct in6_addr *laddr, *faddr, in6a; struct sockaddr_in6 *sin6 = NULL; int cscov_partial = 0; int scope_ambiguous = 0; u_short fport; int error = 0; uint8_t nxt; uint16_t cscov = 0; struct ip6_pktopts *optp, opt; int af = AF_INET6, hlen = sizeof(struct ip6_hdr); int flags; struct sockaddr_in6 tmp; INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); if (addr6) { /* addr6 has been validated in udp6_send(). */ sin6 = (struct sockaddr_in6 *)addr6; /* protect *sin6 from overwrites */ tmp = *sin6; sin6 = &tmp; /* * Application should provide a proper zone ID or the use of * default zone IDs should be enabled. Unfortunately, some * applications do not behave as it should, so we need a * workaround. Even if an appropriate ID is not determined, * we'll see if we can determine the outgoing interface. If we * can, determine the zone ID based on the interface below. */ if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone) scope_ambiguous = 1; if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) return (error); } nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE; if (control) { if ((error = ip6_setpktopts(control, &opt, inp->in6p_outputopts, td->td_ucred, nxt)) != 0) goto release; optp = &opt; } else optp = inp->in6p_outputopts; if (sin6) { faddr = &sin6->sin6_addr; /* * Since we saw no essential reason for calling in_pcbconnect, * we get rid of such kind of logic, and call in6_selectsrc * and in6_pcbsetport in order to fill in the local address * and the local port. */ if (sin6->sin6_port == 0) { error = EADDRNOTAVAIL; goto release; } if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { /* how about ::ffff:0.0.0.0 case? */ error = EISCONN; goto release; } fport = sin6->sin6_port; /* allow 0 port */ if (IN6_IS_ADDR_V4MAPPED(faddr)) { if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) { /* * I believe we should explicitly discard the * packet when mapped addresses are disabled, * rather than send the packet as an IPv6 one. * If we chose the latter approach, the packet * might be sent out on the wire based on the * default route, the situation which we'd * probably want to avoid. * (20010421 jinmei@kame.net) */ error = EINVAL; goto release; } if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && !IN6_IS_ADDR_V4MAPPED(&inp->in6p_laddr)) { /* * when remote addr is an IPv4-mapped address, * local addr should not be an IPv6 address, * since you cannot determine how to map IPv6 * source address to IPv4. */ error = EINVAL; goto release; } af = AF_INET; } if (!IN6_IS_ADDR_V4MAPPED(faddr)) { error = in6_selectsrc_socket(sin6, optp, inp, td->td_ucred, scope_ambiguous, &in6a, NULL); if (error) goto release; laddr = &in6a; } else laddr = &inp->in6p_laddr; /* XXX */ if (laddr == NULL) { if (error == 0) error = EADDRNOTAVAIL; goto release; } if (inp->inp_lport == 0 && (error = in6_pcbsetport(laddr, inp, td->td_ucred)) != 0) { /* Undo an address bind that may have occurred. */ inp->in6p_laddr = in6addr_any; goto release; } } else { if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { error = ENOTCONN; goto release; } if (IN6_IS_ADDR_V4MAPPED(&inp->in6p_faddr)) { if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) { /* * XXX: this case would happen when the * application sets the V6ONLY flag after * connecting the foreign address. * Such applications should be fixed, * so we bark here. */ log(LOG_INFO, "udp6_output: IPV6_V6ONLY " "option was set for a connected socket\n"); error = EINVAL; goto release; } else af = AF_INET; } laddr = &inp->in6p_laddr; faddr = &inp->in6p_faddr; fport = inp->inp_fport; } if (af == AF_INET) hlen = sizeof(struct ip); /* * Calculate data length and get a mbuf * for UDP and IP6 headers. */ M_PREPEND(m, hlen + sizeof(struct udphdr), M_NOWAIT); if (m == NULL) { error = ENOBUFS; goto release; } /* * Stuff checksum and output datagram. */ udp6 = (struct udphdr *)(mtod(m, caddr_t) + hlen); udp6->uh_sport = inp->inp_lport; /* lport is always set in the PCB */ udp6->uh_dport = fport; if (nxt == IPPROTO_UDPLITE) { struct udpcb *up; up = intoudpcb(inp); cscov = up->u_txcslen; if (cscov >= plen) cscov = 0; udp6->uh_ulen = htons(cscov); /* * For UDP-Lite, checksum coverage length of zero means * the entire UDPLite packet is covered by the checksum. */ cscov_partial = (cscov == 0) ? 0 : 1; } else if (plen <= 0xffff) udp6->uh_ulen = htons((u_short)plen); else udp6->uh_ulen = 0; udp6->uh_sum = 0; switch (af) { case AF_INET6: ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_flow = inp->inp_flow & IPV6_FLOWINFO_MASK; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; ip6->ip6_plen = htons((u_short)plen); ip6->ip6_nxt = nxt; ip6->ip6_hlim = in6_selecthlim(inp, NULL); ip6->ip6_src = *laddr; ip6->ip6_dst = *faddr; if (cscov_partial) { if ((udp6->uh_sum = in6_cksum_partial(m, nxt, sizeof(struct ip6_hdr), plen, cscov)) == 0) udp6->uh_sum = 0xffff; } else { udp6->uh_sum = in6_cksum_pseudo(ip6, plen, nxt, 0); m->m_pkthdr.csum_flags = CSUM_UDP_IPV6; m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); } #ifdef RSS { uint32_t hash_val, hash_type; uint8_t pr; pr = inp->inp_socket->so_proto->pr_protocol; /* * Calculate an appropriate RSS hash for UDP and * UDP Lite. * * The called function will take care of figuring out * whether a 2-tuple or 4-tuple hash is required based * on the currently configured scheme. * * Later later on connected socket values should be * cached in the inpcb and reused, rather than constantly * re-calculating it. * * UDP Lite is a different protocol number and will * likely end up being hashed as a 2-tuple until * RSS / NICs grow UDP Lite protocol awareness. */ if (rss_proto_software_hash_v6(faddr, laddr, fport, inp->inp_lport, pr, &hash_val, &hash_type) == 0) { m->m_pkthdr.flowid = hash_val; M_HASHTYPE_SET(m, hash_type); } } #endif flags = 0; #ifdef RSS /* * Don't override with the inp cached flowid. * * Until the whole UDP path is vetted, it may actually * be incorrect. */ flags |= IP_NODEFAULTFLOWID; #endif UDP_PROBE(send, NULL, inp, ip6, inp, udp6); UDPSTAT_INC(udps_opackets); error = ip6_output(m, optp, &inp->inp_route6, flags, inp->in6p_moptions, NULL, inp); break; case AF_INET: error = EAFNOSUPPORT; goto release; } goto releaseopt; release: m_freem(m); releaseopt: if (control) { ip6_clearpktopts(&opt, -1); m_freem(control); } return (error); } static void udp6_abort(struct socket *so) { struct inpcb *inp; struct inpcbinfo *pcbinfo; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_abort: inp == NULL")); INP_WLOCK(inp); #ifdef INET if (inp->inp_vflag & INP_IPV4) { struct pr_usrreqs *pru; uint8_t nxt; nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE; INP_WUNLOCK(inp); pru = inetsw[ip_protox[nxt]].pr_usrreqs; (*pru->pru_abort)(so); return; } #endif if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { INP_HASH_WLOCK(pcbinfo); in6_pcbdisconnect(inp); inp->in6p_laddr = in6addr_any; INP_HASH_WUNLOCK(pcbinfo); soisdisconnected(so); } INP_WUNLOCK(inp); } static int udp6_attach(struct socket *so, int proto, struct thread *td) { struct inpcb *inp; struct inpcbinfo *pcbinfo; int error; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp == NULL, ("udp6_attach: inp != NULL")); if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { error = soreserve(so, udp_sendspace, udp_recvspace); if (error) return (error); } INP_INFO_WLOCK(pcbinfo); error = in_pcballoc(so, pcbinfo); if (error) { INP_INFO_WUNLOCK(pcbinfo); return (error); } inp = (struct inpcb *)so->so_pcb; inp->inp_vflag |= INP_IPV6; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) inp->inp_vflag |= INP_IPV4; inp->in6p_hops = -1; /* use kernel default */ inp->in6p_cksum = -1; /* just to be sure */ /* * XXX: ugly!! * IPv4 TTL initialization is necessary for an IPv6 socket as well, * because the socket may be bound to an IPv6 wildcard address, * which may match an IPv4-mapped IPv6 address. */ inp->inp_ip_ttl = V_ip_defttl; error = udp_newudpcb(inp); if (error) { in_pcbdetach(inp); in_pcbfree(inp); INP_INFO_WUNLOCK(pcbinfo); return (error); } INP_WUNLOCK(inp); INP_INFO_WUNLOCK(pcbinfo); return (0); } static int udp6_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; struct inpcbinfo *pcbinfo; int error; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_bind: inp == NULL")); INP_WLOCK(inp); INP_HASH_WLOCK(pcbinfo); inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { struct sockaddr_in6 *sin6_p; sin6_p = (struct sockaddr_in6 *)nam; if (IN6_IS_ADDR_UNSPECIFIED(&sin6_p->sin6_addr)) inp->inp_vflag |= INP_IPV4; #ifdef INET else if (IN6_IS_ADDR_V4MAPPED(&sin6_p->sin6_addr)) { struct sockaddr_in sin; in6_sin6_2_sin(&sin, sin6_p); inp->inp_vflag |= INP_IPV4; inp->inp_vflag &= ~INP_IPV6; error = in_pcbbind(inp, (struct sockaddr *)&sin, td->td_ucred); goto out; } #endif } error = in6_pcbbind(inp, nam, td->td_ucred); #ifdef INET out: #endif INP_HASH_WUNLOCK(pcbinfo); INP_WUNLOCK(inp); return (error); } static void udp6_close(struct socket *so) { struct inpcb *inp; struct inpcbinfo *pcbinfo; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_close: inp == NULL")); INP_WLOCK(inp); #ifdef INET if (inp->inp_vflag & INP_IPV4) { struct pr_usrreqs *pru; uint8_t nxt; nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE; INP_WUNLOCK(inp); pru = inetsw[ip_protox[nxt]].pr_usrreqs; (*pru->pru_disconnect)(so); return; } #endif if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { INP_HASH_WLOCK(pcbinfo); in6_pcbdisconnect(inp); inp->in6p_laddr = in6addr_any; INP_HASH_WUNLOCK(pcbinfo); soisdisconnected(so); } INP_WUNLOCK(inp); } static int udp6_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct inpcb *inp; struct inpcbinfo *pcbinfo; struct sockaddr_in6 *sin6; int error; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); sin6 = (struct sockaddr_in6 *)nam; KASSERT(inp != NULL, ("udp6_connect: inp == NULL")); /* * XXXRW: Need to clarify locking of v4/v6 flags. */ INP_WLOCK(inp); #ifdef INET if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { struct sockaddr_in sin; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) { error = EINVAL; goto out; } if ((inp->inp_vflag & INP_IPV4) == 0) { error = EAFNOSUPPORT; goto out; } if (inp->inp_faddr.s_addr != INADDR_ANY) { error = EISCONN; goto out; } in6_sin6_2_sin(&sin, sin6); inp->inp_vflag |= INP_IPV4; inp->inp_vflag &= ~INP_IPV6; error = prison_remote_ip4(td->td_ucred, &sin.sin_addr); if (error != 0) goto out; INP_HASH_WLOCK(pcbinfo); error = in_pcbconnect(inp, (struct sockaddr *)&sin, td->td_ucred); INP_HASH_WUNLOCK(pcbinfo); if (error == 0) soisconnected(so); goto out; } else { if ((inp->inp_vflag & INP_IPV6) == 0) { error = EAFNOSUPPORT; goto out; } } #endif if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { error = EISCONN; goto out; } inp->inp_vflag &= ~INP_IPV4; inp->inp_vflag |= INP_IPV6; error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr); if (error != 0) goto out; INP_HASH_WLOCK(pcbinfo); error = in6_pcbconnect(inp, nam, td->td_ucred); INP_HASH_WUNLOCK(pcbinfo); if (error == 0) soisconnected(so); out: INP_WUNLOCK(inp); return (error); } static void udp6_detach(struct socket *so) { struct inpcb *inp; struct inpcbinfo *pcbinfo; struct udpcb *up; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_detach: inp == NULL")); INP_INFO_WLOCK(pcbinfo); INP_WLOCK(inp); up = intoudpcb(inp); KASSERT(up != NULL, ("%s: up == NULL", __func__)); in_pcbdetach(inp); in_pcbfree(inp); INP_INFO_WUNLOCK(pcbinfo); udp_discardcb(up); } static int udp6_disconnect(struct socket *so) { struct inpcb *inp; struct inpcbinfo *pcbinfo; int error; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_disconnect: inp == NULL")); INP_WLOCK(inp); #ifdef INET if (inp->inp_vflag & INP_IPV4) { struct pr_usrreqs *pru; uint8_t nxt; nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE; INP_WUNLOCK(inp); pru = inetsw[ip_protox[nxt]].pr_usrreqs; (void)(*pru->pru_disconnect)(so); return (0); } #endif if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { error = ENOTCONN; goto out; } INP_HASH_WLOCK(pcbinfo); in6_pcbdisconnect(inp); inp->in6p_laddr = in6addr_any; INP_HASH_WUNLOCK(pcbinfo); SOCK_LOCK(so); so->so_state &= ~SS_ISCONNECTED; /* XXX */ SOCK_UNLOCK(so); out: INP_WUNLOCK(inp); return (0); } static int udp6_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *td) { struct inpcb *inp; struct inpcbinfo *pcbinfo; int error = 0; pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol); inp = sotoinpcb(so); KASSERT(inp != NULL, ("udp6_send: inp == NULL")); INP_WLOCK(inp); if (addr) { if (addr->sa_len != sizeof(struct sockaddr_in6)) { error = EINVAL; goto bad; } if (addr->sa_family != AF_INET6) { error = EAFNOSUPPORT; goto bad; } } #ifdef INET if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) { int hasv4addr; struct sockaddr_in6 *sin6 = NULL; if (addr == NULL) hasv4addr = (inp->inp_vflag & INP_IPV4); else { sin6 = (struct sockaddr_in6 *)addr; hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ? 1 : 0; } if (hasv4addr) { struct pr_usrreqs *pru; uint8_t nxt; nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE; /* * XXXRW: We release UDP-layer locks before calling * udp_send() in order to avoid recursion. However, * this does mean there is a short window where inp's * fields are unstable. Could this lead to a * potential race in which the factors causing us to * select the UDPv4 output routine are invalidated? */ INP_WUNLOCK(inp); if (sin6) in6_sin6_2_sin_in_sock(addr); pru = inetsw[ip_protox[nxt]].pr_usrreqs; /* addr will just be freed in sendit(). */ return ((*pru->pru_send)(so, flags, m, addr, control, td)); } } #endif #ifdef MAC mac_inpcb_create_mbuf(inp, m); #endif INP_HASH_WLOCK(pcbinfo); error = udp6_output(inp, m, addr, control, td); INP_HASH_WUNLOCK(pcbinfo); INP_WUNLOCK(inp); return (error); bad: INP_WUNLOCK(inp); m_freem(m); return (error); } struct pr_usrreqs udp6_usrreqs = { .pru_abort = udp6_abort, .pru_attach = udp6_attach, .pru_bind = udp6_bind, .pru_connect = udp6_connect, .pru_control = in6_control, .pru_detach = udp6_detach, .pru_disconnect = udp6_disconnect, .pru_peeraddr = in6_mapped_peeraddr, .pru_send = udp6_send, .pru_shutdown = udp_shutdown, .pru_sockaddr = in6_mapped_sockaddr, .pru_soreceive = soreceive_dgram, .pru_sosend = sosend_dgram, .pru_sosetlabel = in_pcbsosetlabel, .pru_close = udp6_close }; Index: projects/clang500-import/sys/x86/x86/identcpu.c =================================================================== --- projects/clang500-import/sys/x86/x86/identcpu.c (revision 319250) +++ projects/clang500-import/sys/x86/x86/identcpu.c (revision 319251) @@ -1,2414 +1,2414 @@ /*- * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * Copyright (c) 1997 KATO Takenori. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: Id: machdep.c,v 1.193 1996/06/18 01:22:04 bde Exp */ #include __FBSDID("$FreeBSD$"); #include "opt_cpu.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __i386__ #define IDENTBLUE_CYRIX486 0 #define IDENTBLUE_IBMCPU 1 #define IDENTBLUE_CYRIXM2 2 static void identifycyrix(void); static void print_transmeta_info(void); #endif static u_int find_cpu_vendor_id(void); static void print_AMD_info(void); static void print_INTEL_info(void); static void print_INTEL_TLB(u_int data); static void print_hypervisor_info(void); static void print_svm_info(void); static void print_via_padlock_info(void); static void print_vmx_info(void); #ifdef __i386__ int cpu; /* Are we 386, 386sx, 486, etc? */ int cpu_class; #endif u_int cpu_feature; /* Feature flags */ u_int cpu_feature2; /* Feature flags */ u_int amd_feature; /* AMD feature flags */ u_int amd_feature2; /* AMD feature flags */ u_int amd_pminfo; /* AMD advanced power management info */ u_int via_feature_rng; /* VIA RNG features */ u_int via_feature_xcrypt; /* VIA ACE features */ u_int cpu_high; /* Highest arg to CPUID */ u_int cpu_exthigh; /* Highest arg to extended CPUID */ u_int cpu_id; /* Stepping ID */ u_int cpu_procinfo; /* HyperThreading Info / Brand Index / CLFUSH */ u_int cpu_procinfo2; /* Multicore info */ char cpu_vendor[20]; /* CPU Origin code */ u_int cpu_vendor_id; /* CPU vendor ID */ u_int cpu_fxsr; /* SSE enabled */ u_int cpu_mxcsr_mask; /* Valid bits in mxcsr */ u_int cpu_clflush_line_size = 32; u_int cpu_stdext_feature; u_int cpu_stdext_feature2; u_int cpu_max_ext_state_size; u_int cpu_mon_mwait_flags; /* MONITOR/MWAIT flags (CPUID.05H.ECX) */ u_int cpu_mon_min_size; /* MONITOR minimum range size, bytes */ u_int cpu_mon_max_size; /* MONITOR minimum range size, bytes */ u_int cpu_maxphyaddr; /* Max phys addr width in bits */ char machine[] = MACHINE; SYSCTL_UINT(_hw, OID_AUTO, via_feature_rng, CTLFLAG_RD, &via_feature_rng, 0, "VIA RNG feature available in CPU"); SYSCTL_UINT(_hw, OID_AUTO, via_feature_xcrypt, CTLFLAG_RD, &via_feature_xcrypt, 0, "VIA xcrypt feature available in CPU"); #ifdef __amd64__ #ifdef SCTL_MASK32 extern int adaptive_machine_arch; #endif static int sysctl_hw_machine(SYSCTL_HANDLER_ARGS) { #ifdef SCTL_MASK32 static const char machine32[] = "i386"; #endif int error; #ifdef SCTL_MASK32 if ((req->flags & SCTL_MASK32) != 0 && adaptive_machine_arch) error = SYSCTL_OUT(req, machine32, sizeof(machine32)); else #endif error = SYSCTL_OUT(req, machine, sizeof(machine)); return (error); } SYSCTL_PROC(_hw, HW_MACHINE, machine, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_hw_machine, "A", "Machine class"); #else SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "Machine class"); #endif static char cpu_model[128]; SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD | CTLFLAG_MPSAFE, cpu_model, 0, "Machine model"); static int hw_clockrate; SYSCTL_INT(_hw, OID_AUTO, clockrate, CTLFLAG_RD, &hw_clockrate, 0, "CPU instruction clock rate"); u_int hv_high; char hv_vendor[16]; SYSCTL_STRING(_hw, OID_AUTO, hv_vendor, CTLFLAG_RD | CTLFLAG_MPSAFE, hv_vendor, 0, "Hypervisor vendor"); static eventhandler_tag tsc_post_tag; static char cpu_brand[48]; #ifdef __i386__ #define MAX_BRAND_INDEX 8 static const char *cpu_brandtable[MAX_BRAND_INDEX + 1] = { NULL, /* No brand */ "Intel Celeron", "Intel Pentium III", "Intel Pentium III Xeon", NULL, NULL, NULL, NULL, "Intel Pentium 4" }; static struct { char *cpu_name; int cpu_class; } cpus[] = { { "Intel 80286", CPUCLASS_286 }, /* CPU_286 */ { "i386SX", CPUCLASS_386 }, /* CPU_386SX */ { "i386DX", CPUCLASS_386 }, /* CPU_386 */ { "i486SX", CPUCLASS_486 }, /* CPU_486SX */ { "i486DX", CPUCLASS_486 }, /* CPU_486 */ { "Pentium", CPUCLASS_586 }, /* CPU_586 */ { "Cyrix 486", CPUCLASS_486 }, /* CPU_486DLC */ { "Pentium Pro", CPUCLASS_686 }, /* CPU_686 */ { "Cyrix 5x86", CPUCLASS_486 }, /* CPU_M1SC */ { "Cyrix 6x86", CPUCLASS_486 }, /* CPU_M1 */ { "Blue Lightning", CPUCLASS_486 }, /* CPU_BLUE */ { "Cyrix 6x86MX", CPUCLASS_686 }, /* CPU_M2 */ { "NexGen 586", CPUCLASS_386 }, /* CPU_NX586 (XXX) */ { "Cyrix 486S/DX", CPUCLASS_486 }, /* CPU_CY486DX */ { "Pentium II", CPUCLASS_686 }, /* CPU_PII */ { "Pentium III", CPUCLASS_686 }, /* CPU_PIII */ { "Pentium 4", CPUCLASS_686 }, /* CPU_P4 */ }; #endif static struct { char *vendor; u_int vendor_id; } cpu_vendors[] = { { INTEL_VENDOR_ID, CPU_VENDOR_INTEL }, /* GenuineIntel */ { AMD_VENDOR_ID, CPU_VENDOR_AMD }, /* AuthenticAMD */ { CENTAUR_VENDOR_ID, CPU_VENDOR_CENTAUR }, /* CentaurHauls */ #ifdef __i386__ { NSC_VENDOR_ID, CPU_VENDOR_NSC }, /* Geode by NSC */ { CYRIX_VENDOR_ID, CPU_VENDOR_CYRIX }, /* CyrixInstead */ { TRANSMETA_VENDOR_ID, CPU_VENDOR_TRANSMETA }, /* GenuineTMx86 */ { SIS_VENDOR_ID, CPU_VENDOR_SIS }, /* SiS SiS SiS */ { UMC_VENDOR_ID, CPU_VENDOR_UMC }, /* UMC UMC UMC */ { NEXGEN_VENDOR_ID, CPU_VENDOR_NEXGEN }, /* NexGenDriven */ { RISE_VENDOR_ID, CPU_VENDOR_RISE }, /* RiseRiseRise */ #if 0 /* XXX CPUID 8000_0000h and 8086_0000h, not 0000_0000h */ { "TransmetaCPU", CPU_VENDOR_TRANSMETA }, #endif #endif }; void printcpuinfo(void) { u_int regs[4], i; char *brand; printf("CPU: "); #ifdef __i386__ cpu_class = cpus[cpu].cpu_class; strncpy(cpu_model, cpus[cpu].cpu_name, sizeof (cpu_model)); #else strncpy(cpu_model, "Hammer", sizeof (cpu_model)); #endif /* Check for extended CPUID information and a processor name. */ if (cpu_exthigh >= 0x80000004) { brand = cpu_brand; for (i = 0x80000002; i < 0x80000005; i++) { do_cpuid(i, regs); memcpy(brand, regs, sizeof(regs)); brand += sizeof(regs); } } switch (cpu_vendor_id) { case CPU_VENDOR_INTEL: #ifdef __i386__ if ((cpu_id & 0xf00) > 0x300) { u_int brand_index; cpu_model[0] = '\0'; switch (cpu_id & 0x3000) { case 0x1000: strcpy(cpu_model, "Overdrive "); break; case 0x2000: strcpy(cpu_model, "Dual "); break; } switch (cpu_id & 0xf00) { case 0x400: strcat(cpu_model, "i486 "); /* Check the particular flavor of 486 */ switch (cpu_id & 0xf0) { case 0x00: case 0x10: strcat(cpu_model, "DX"); break; case 0x20: strcat(cpu_model, "SX"); break; case 0x30: strcat(cpu_model, "DX2"); break; case 0x40: strcat(cpu_model, "SL"); break; case 0x50: strcat(cpu_model, "SX2"); break; case 0x70: strcat(cpu_model, "DX2 Write-Back Enhanced"); break; case 0x80: strcat(cpu_model, "DX4"); break; } break; case 0x500: /* Check the particular flavor of 586 */ strcat(cpu_model, "Pentium"); switch (cpu_id & 0xf0) { case 0x00: strcat(cpu_model, " A-step"); break; case 0x10: strcat(cpu_model, "/P5"); break; case 0x20: strcat(cpu_model, "/P54C"); break; case 0x30: strcat(cpu_model, "/P24T"); break; case 0x40: strcat(cpu_model, "/P55C"); break; case 0x70: strcat(cpu_model, "/P54C"); break; case 0x80: strcat(cpu_model, "/P55C (quarter-micron)"); break; default: /* nothing */ break; } #if defined(I586_CPU) && !defined(NO_F00F_HACK) /* * XXX - If/when Intel fixes the bug, this * should also check the version of the * CPU, not just that it's a Pentium. */ has_f00f_bug = 1; #endif break; case 0x600: /* Check the particular flavor of 686 */ switch (cpu_id & 0xf0) { case 0x00: strcat(cpu_model, "Pentium Pro A-step"); break; case 0x10: strcat(cpu_model, "Pentium Pro"); break; case 0x30: case 0x50: case 0x60: strcat(cpu_model, "Pentium II/Pentium II Xeon/Celeron"); cpu = CPU_PII; break; case 0x70: case 0x80: case 0xa0: case 0xb0: strcat(cpu_model, "Pentium III/Pentium III Xeon/Celeron"); cpu = CPU_PIII; break; default: strcat(cpu_model, "Unknown 80686"); break; } break; case 0xf00: strcat(cpu_model, "Pentium 4"); cpu = CPU_P4; break; default: strcat(cpu_model, "unknown"); break; } /* * If we didn't get a brand name from the extended * CPUID, try to look it up in the brand table. */ if (cpu_high > 0 && *cpu_brand == '\0') { brand_index = cpu_procinfo & CPUID_BRAND_INDEX; if (brand_index <= MAX_BRAND_INDEX && cpu_brandtable[brand_index] != NULL) strcpy(cpu_brand, cpu_brandtable[brand_index]); } } #else /* Please make up your mind folks! */ strcat(cpu_model, "EM64T"); #endif break; case CPU_VENDOR_AMD: /* * Values taken from AMD Processor Recognition * http://www.amd.com/K6/k6docs/pdf/20734g.pdf * (also describes ``Features'' encodings. */ strcpy(cpu_model, "AMD "); #ifdef __i386__ switch (cpu_id & 0xFF0) { case 0x410: strcat(cpu_model, "Standard Am486DX"); break; case 0x430: strcat(cpu_model, "Enhanced Am486DX2 Write-Through"); break; case 0x470: strcat(cpu_model, "Enhanced Am486DX2 Write-Back"); break; case 0x480: strcat(cpu_model, "Enhanced Am486DX4/Am5x86 Write-Through"); break; case 0x490: strcat(cpu_model, "Enhanced Am486DX4/Am5x86 Write-Back"); break; case 0x4E0: strcat(cpu_model, "Am5x86 Write-Through"); break; case 0x4F0: strcat(cpu_model, "Am5x86 Write-Back"); break; case 0x500: strcat(cpu_model, "K5 model 0"); break; case 0x510: strcat(cpu_model, "K5 model 1"); break; case 0x520: strcat(cpu_model, "K5 PR166 (model 2)"); break; case 0x530: strcat(cpu_model, "K5 PR200 (model 3)"); break; case 0x560: strcat(cpu_model, "K6"); break; case 0x570: strcat(cpu_model, "K6 266 (model 1)"); break; case 0x580: strcat(cpu_model, "K6-2"); break; case 0x590: strcat(cpu_model, "K6-III"); break; case 0x5a0: strcat(cpu_model, "Geode LX"); break; default: strcat(cpu_model, "Unknown"); break; } #else if ((cpu_id & 0xf00) == 0xf00) strcat(cpu_model, "AMD64 Processor"); else strcat(cpu_model, "Unknown"); #endif break; #ifdef __i386__ case CPU_VENDOR_CYRIX: strcpy(cpu_model, "Cyrix "); switch (cpu_id & 0xff0) { case 0x440: strcat(cpu_model, "MediaGX"); break; case 0x520: strcat(cpu_model, "6x86"); break; case 0x540: cpu_class = CPUCLASS_586; strcat(cpu_model, "GXm"); break; case 0x600: strcat(cpu_model, "6x86MX"); break; default: /* * Even though CPU supports the cpuid * instruction, it can be disabled. * Therefore, this routine supports all Cyrix * CPUs. */ switch (cyrix_did & 0xf0) { case 0x00: switch (cyrix_did & 0x0f) { case 0x00: strcat(cpu_model, "486SLC"); break; case 0x01: strcat(cpu_model, "486DLC"); break; case 0x02: strcat(cpu_model, "486SLC2"); break; case 0x03: strcat(cpu_model, "486DLC2"); break; case 0x04: strcat(cpu_model, "486SRx"); break; case 0x05: strcat(cpu_model, "486DRx"); break; case 0x06: strcat(cpu_model, "486SRx2"); break; case 0x07: strcat(cpu_model, "486DRx2"); break; case 0x08: strcat(cpu_model, "486SRu"); break; case 0x09: strcat(cpu_model, "486DRu"); break; case 0x0a: strcat(cpu_model, "486SRu2"); break; case 0x0b: strcat(cpu_model, "486DRu2"); break; default: strcat(cpu_model, "Unknown"); break; } break; case 0x10: switch (cyrix_did & 0x0f) { case 0x00: strcat(cpu_model, "486S"); break; case 0x01: strcat(cpu_model, "486S2"); break; case 0x02: strcat(cpu_model, "486Se"); break; case 0x03: strcat(cpu_model, "486S2e"); break; case 0x0a: strcat(cpu_model, "486DX"); break; case 0x0b: strcat(cpu_model, "486DX2"); break; case 0x0f: strcat(cpu_model, "486DX4"); break; default: strcat(cpu_model, "Unknown"); break; } break; case 0x20: if ((cyrix_did & 0x0f) < 8) strcat(cpu_model, "6x86"); /* Where did you get it? */ else strcat(cpu_model, "5x86"); break; case 0x30: strcat(cpu_model, "6x86"); break; case 0x40: if ((cyrix_did & 0xf000) == 0x3000) { cpu_class = CPUCLASS_586; strcat(cpu_model, "GXm"); } else strcat(cpu_model, "MediaGX"); break; case 0x50: strcat(cpu_model, "6x86MX"); break; case 0xf0: switch (cyrix_did & 0x0f) { case 0x0d: strcat(cpu_model, "Overdrive CPU"); break; case 0x0e: strcpy(cpu_model, "Texas Instruments 486SXL"); break; case 0x0f: strcat(cpu_model, "486SLC/DLC"); break; default: strcat(cpu_model, "Unknown"); break; } break; default: strcat(cpu_model, "Unknown"); break; } break; } break; case CPU_VENDOR_RISE: strcpy(cpu_model, "Rise "); switch (cpu_id & 0xff0) { case 0x500: /* 6401 and 6441 (Kirin) */ case 0x520: /* 6510 (Lynx) */ strcat(cpu_model, "mP6"); break; default: strcat(cpu_model, "Unknown"); } break; #endif case CPU_VENDOR_CENTAUR: #ifdef __i386__ switch (cpu_id & 0xff0) { case 0x540: strcpy(cpu_model, "IDT WinChip C6"); break; case 0x580: strcpy(cpu_model, "IDT WinChip 2"); break; case 0x590: strcpy(cpu_model, "IDT WinChip 3"); break; case 0x660: strcpy(cpu_model, "VIA C3 Samuel"); break; case 0x670: if (cpu_id & 0x8) strcpy(cpu_model, "VIA C3 Ezra"); else strcpy(cpu_model, "VIA C3 Samuel 2"); break; case 0x680: strcpy(cpu_model, "VIA C3 Ezra-T"); break; case 0x690: strcpy(cpu_model, "VIA C3 Nehemiah"); break; case 0x6a0: case 0x6d0: strcpy(cpu_model, "VIA C7 Esther"); break; case 0x6f0: strcpy(cpu_model, "VIA Nano"); break; default: strcpy(cpu_model, "VIA/IDT Unknown"); } #else strcpy(cpu_model, "VIA "); if ((cpu_id & 0xff0) == 0x6f0) strcat(cpu_model, "Nano Processor"); else strcat(cpu_model, "Unknown"); #endif break; #ifdef __i386__ case CPU_VENDOR_IBM: strcpy(cpu_model, "Blue Lightning CPU"); break; case CPU_VENDOR_NSC: switch (cpu_id & 0xff0) { case 0x540: strcpy(cpu_model, "Geode SC1100"); cpu = CPU_GEODE1100; break; default: strcpy(cpu_model, "Geode/NSC unknown"); break; } break; #endif default: strcat(cpu_model, "Unknown"); break; } /* * Replace cpu_model with cpu_brand minus leading spaces if * we have one. */ brand = cpu_brand; while (*brand == ' ') ++brand; if (*brand != '\0') strcpy(cpu_model, brand); printf("%s (", cpu_model); if (tsc_freq != 0) { hw_clockrate = (tsc_freq + 5000) / 1000000; printf("%jd.%02d-MHz ", (intmax_t)(tsc_freq + 4999) / 1000000, (u_int)((tsc_freq + 4999) / 10000) % 100); } #ifdef __i386__ switch(cpu_class) { case CPUCLASS_286: printf("286"); break; case CPUCLASS_386: printf("386"); break; #if defined(I486_CPU) case CPUCLASS_486: printf("486"); break; #endif #if defined(I586_CPU) case CPUCLASS_586: printf("586"); break; #endif #if defined(I686_CPU) case CPUCLASS_686: printf("686"); break; #endif default: printf("Unknown"); /* will panic below... */ } #else printf("K8"); #endif printf("-class CPU)\n"); if (*cpu_vendor) printf(" Origin=\"%s\"", cpu_vendor); if (cpu_id) printf(" Id=0x%x", cpu_id); if (cpu_vendor_id == CPU_VENDOR_INTEL || cpu_vendor_id == CPU_VENDOR_AMD || cpu_vendor_id == CPU_VENDOR_CENTAUR || #ifdef __i386__ cpu_vendor_id == CPU_VENDOR_TRANSMETA || cpu_vendor_id == CPU_VENDOR_RISE || cpu_vendor_id == CPU_VENDOR_NSC || (cpu_vendor_id == CPU_VENDOR_CYRIX && ((cpu_id & 0xf00) > 0x500)) || #endif 0) { printf(" Family=0x%x", CPUID_TO_FAMILY(cpu_id)); printf(" Model=0x%x", CPUID_TO_MODEL(cpu_id)); printf(" Stepping=%u", cpu_id & CPUID_STEPPING); #ifdef __i386__ if (cpu_vendor_id == CPU_VENDOR_CYRIX) printf("\n DIR=0x%04x", cyrix_did); #endif /* * AMD CPUID Specification * http://support.amd.com/us/Embedded_TechDocs/25481.pdf * * Intel Processor Identification and CPUID Instruction * http://www.intel.com/assets/pdf/appnote/241618.pdf */ if (cpu_high > 0) { /* * Here we should probably set up flags indicating * whether or not various features are available. * The interesting ones are probably VME, PSE, PAE, * and PGE. The code already assumes without bothering * to check that all CPUs >= Pentium have a TSC and * MSRs. */ printf("\n Features=0x%b", cpu_feature, "\020" "\001FPU" /* Integral FPU */ "\002VME" /* Extended VM86 mode support */ "\003DE" /* Debugging Extensions (CR4.DE) */ "\004PSE" /* 4MByte page tables */ "\005TSC" /* Timestamp counter */ "\006MSR" /* Machine specific registers */ "\007PAE" /* Physical address extension */ "\010MCE" /* Machine Check support */ "\011CX8" /* CMPEXCH8 instruction */ "\012APIC" /* SMP local APIC */ "\013oldMTRR" /* Previous implementation of MTRR */ "\014SEP" /* Fast System Call */ "\015MTRR" /* Memory Type Range Registers */ "\016PGE" /* PG_G (global bit) support */ "\017MCA" /* Machine Check Architecture */ "\020CMOV" /* CMOV instruction */ "\021PAT" /* Page attributes table */ "\022PSE36" /* 36 bit address space support */ "\023PN" /* Processor Serial number */ "\024CLFLUSH" /* Has the CLFLUSH instruction */ "\025" "\026DTS" /* Debug Trace Store */ "\027ACPI" /* ACPI support */ "\030MMX" /* MMX instructions */ "\031FXSR" /* FXSAVE/FXRSTOR */ "\032SSE" /* Streaming SIMD Extensions */ "\033SSE2" /* Streaming SIMD Extensions #2 */ "\034SS" /* Self snoop */ "\035HTT" /* Hyperthreading (see EBX bit 16-23) */ "\036TM" /* Thermal Monitor clock slowdown */ "\037IA64" /* CPU can execute IA64 instructions */ "\040PBE" /* Pending Break Enable */ ); if (cpu_feature2 != 0) { printf("\n Features2=0x%b", cpu_feature2, "\020" "\001SSE3" /* SSE3 */ "\002PCLMULQDQ" /* Carry-Less Mul Quadword */ "\003DTES64" /* 64-bit Debug Trace */ "\004MON" /* MONITOR/MWAIT Instructions */ "\005DS_CPL" /* CPL Qualified Debug Store */ "\006VMX" /* Virtual Machine Extensions */ "\007SMX" /* Safer Mode Extensions */ "\010EST" /* Enhanced SpeedStep */ "\011TM2" /* Thermal Monitor 2 */ "\012SSSE3" /* SSSE3 */ "\013CNXT-ID" /* L1 context ID available */ "\014SDBG" /* IA32 silicon debug */ "\015FMA" /* Fused Multiply Add */ "\016CX16" /* CMPXCHG16B Instruction */ "\017xTPR" /* Send Task Priority Messages*/ "\020PDCM" /* Perf/Debug Capability MSR */ "\021" "\022PCID" /* Process-context Identifiers*/ "\023DCA" /* Direct Cache Access */ "\024SSE4.1" /* SSE 4.1 */ "\025SSE4.2" /* SSE 4.2 */ "\026x2APIC" /* xAPIC Extensions */ "\027MOVBE" /* MOVBE Instruction */ "\030POPCNT" /* POPCNT Instruction */ "\031TSCDLT" /* TSC-Deadline Timer */ "\032AESNI" /* AES Crypto */ "\033XSAVE" /* XSAVE/XRSTOR States */ "\034OSXSAVE" /* OS-Enabled State Management*/ "\035AVX" /* Advanced Vector Extensions */ "\036F16C" /* Half-precision conversions */ "\037RDRAND" /* RDRAND Instruction */ "\040HV" /* Hypervisor */ ); } if (amd_feature != 0) { printf("\n AMD Features=0x%b", amd_feature, "\020" /* in hex */ "\001" /* Same */ "\002" /* Same */ "\003" /* Same */ "\004" /* Same */ "\005" /* Same */ "\006" /* Same */ "\007" /* Same */ "\010" /* Same */ "\011" /* Same */ "\012" /* Same */ "\013" /* Undefined */ "\014SYSCALL" /* Have SYSCALL/SYSRET */ "\015" /* Same */ "\016" /* Same */ "\017" /* Same */ "\020" /* Same */ "\021" /* Same */ "\022" /* Same */ "\023" /* Reserved, unknown */ "\024MP" /* Multiprocessor Capable */ "\025NX" /* Has EFER.NXE, NX */ "\026" /* Undefined */ "\027MMX+" /* AMD MMX Extensions */ "\030" /* Same */ "\031" /* Same */ "\032FFXSR" /* Fast FXSAVE/FXRSTOR */ "\033Page1GB" /* 1-GB large page support */ "\034RDTSCP" /* RDTSCP */ "\035" /* Undefined */ "\036LM" /* 64 bit long mode */ "\0373DNow!+" /* AMD 3DNow! Extensions */ "\0403DNow!" /* AMD 3DNow! */ ); } if (amd_feature2 != 0) { printf("\n AMD Features2=0x%b", amd_feature2, "\020" "\001LAHF" /* LAHF/SAHF in long mode */ "\002CMP" /* CMP legacy */ "\003SVM" /* Secure Virtual Mode */ "\004ExtAPIC" /* Extended APIC register */ "\005CR8" /* CR8 in legacy mode */ "\006ABM" /* LZCNT instruction */ "\007SSE4A" /* SSE4A */ "\010MAS" /* Misaligned SSE mode */ "\011Prefetch" /* 3DNow! Prefetch/PrefetchW */ "\012OSVW" /* OS visible workaround */ "\013IBS" /* Instruction based sampling */ "\014XOP" /* XOP extended instructions */ "\015SKINIT" /* SKINIT/STGI */ "\016WDT" /* Watchdog timer */ "\017" "\020LWP" /* Lightweight Profiling */ "\021FMA4" /* 4-operand FMA instructions */ "\022TCE" /* Translation Cache Extension */ "\023" "\024NodeId" /* NodeId MSR support */ "\025" "\026TBM" /* Trailing Bit Manipulation */ "\027Topology" /* Topology Extensions */ "\030PCXC" /* Core perf count */ "\031PNXC" /* NB perf count */ "\032" "\033DBE" /* Data Breakpoint extension */ "\034PTSC" /* Performance TSC */ "\035PL2I" /* L2I perf count */ - "\036MWAITX" /* MONITORX/MWAITX instructions */ + "\036MWAITX" /* MONITORX/MWAITX instructions */ "\037" "\040" ); } if (cpu_stdext_feature != 0) { printf("\n Structured Extended Features=0x%b", cpu_stdext_feature, "\020" /* RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */ "\001FSGSBASE" "\002TSCADJ" "\003SGX" /* Bit Manipulation Instructions */ "\004BMI1" /* Hardware Lock Elision */ "\005HLE" /* Advanced Vector Instructions 2 */ "\006AVX2" /* FDP_EXCPTN_ONLY */ "\007FDPEXC" /* Supervisor Mode Execution Prot. */ "\010SMEP" /* Bit Manipulation Instructions */ "\011BMI2" "\012ERMS" /* Invalidate Processor Context ID */ "\013INVPCID" /* Restricted Transactional Memory */ "\014RTM" "\015PQM" "\016NFPUSG" /* Intel Memory Protection Extensions */ "\017MPX" "\020PQE" /* AVX512 Foundation */ "\021AVX512F" "\022AVX512DQ" /* Enhanced NRBG */ "\023RDSEED" /* ADCX + ADOX */ "\024ADX" /* Supervisor Mode Access Prevention */ "\025SMAP" "\026AVX512IFMA" "\027PCOMMIT" "\030CLFLUSHOPT" "\031CLWB" "\032PROCTRACE" "\033AVX512PF" "\034AVX512ER" "\035AVX512CD" "\036SHA" "\037AVX512BW" ); } if (cpu_stdext_feature2 != 0) { printf("\n Structured Extended Features2=0x%b", cpu_stdext_feature2, "\020" "\001PREFETCHWT1" "\002AVX512VBMI" "\003UMIP" "\004PKU" "\005OSPKE" "\027RDPID" "\037SGXLC" ); } if ((cpu_feature2 & CPUID2_XSAVE) != 0) { cpuid_count(0xd, 0x1, regs); if (regs[0] != 0) { printf("\n XSAVE Features=0x%b", regs[0], "\020" "\001XSAVEOPT" "\002XSAVEC" "\003XINUSE" "\004XSAVES"); } } if (via_feature_rng != 0 || via_feature_xcrypt != 0) print_via_padlock_info(); if (cpu_feature2 & CPUID2_VMX) print_vmx_info(); if (amd_feature2 & AMDID2_SVM) print_svm_info(); if ((cpu_feature & CPUID_HTT) && cpu_vendor_id == CPU_VENDOR_AMD) cpu_feature &= ~CPUID_HTT; /* * If this CPU supports P-state invariant TSC then * mention the capability. */ if (tsc_is_invariant) { printf("\n TSC: P-state invariant"); if (tsc_perf_stat) printf(", performance statistics"); } } #ifdef __i386__ } else if (cpu_vendor_id == CPU_VENDOR_CYRIX) { printf(" DIR=0x%04x", cyrix_did); printf(" Stepping=%u", (cyrix_did & 0xf000) >> 12); printf(" Revision=%u", (cyrix_did & 0x0f00) >> 8); #ifndef CYRIX_CACHE_REALLY_WORKS if (cpu == CPU_M1 && (cyrix_did & 0xff00) < 0x1700) printf("\n CPU cache: write-through mode"); #endif #endif } /* Avoid ugly blank lines: only print newline when we have to. */ if (*cpu_vendor || cpu_id) printf("\n"); if (bootverbose) { if (cpu_vendor_id == CPU_VENDOR_AMD) print_AMD_info(); else if (cpu_vendor_id == CPU_VENDOR_INTEL) print_INTEL_info(); #ifdef __i386__ else if (cpu_vendor_id == CPU_VENDOR_TRANSMETA) print_transmeta_info(); #endif } print_hypervisor_info(); } #ifdef __i386__ void panicifcpuunsupported(void) { #if !defined(lint) #if !defined(I486_CPU) && !defined(I586_CPU) && !defined(I686_CPU) #error This kernel is not configured for one of the supported CPUs #endif #else /* lint */ #endif /* lint */ /* * Now that we have told the user what they have, * let them know if that machine type isn't configured. */ switch (cpu_class) { case CPUCLASS_286: /* a 286 should not make it this far, anyway */ case CPUCLASS_386: #if !defined(I486_CPU) case CPUCLASS_486: #endif #if !defined(I586_CPU) case CPUCLASS_586: #endif #if !defined(I686_CPU) case CPUCLASS_686: #endif panic("CPU class not configured"); default: break; } } static volatile u_int trap_by_rdmsr; /* * Special exception 6 handler. * The rdmsr instruction generates invalid opcodes fault on 486-class * Cyrix CPU. Stacked eip register points the rdmsr instruction in the * function identblue() when this handler is called. Stacked eip should * be advanced. */ inthand_t bluetrap6; #ifdef __GNUCLIKE_ASM __asm (" \n\ .text \n\ .p2align 2,0x90 \n\ .type " __XSTRING(CNAME(bluetrap6)) ",@function \n\ " __XSTRING(CNAME(bluetrap6)) ": \n\ ss \n\ movl $0xa8c1d," __XSTRING(CNAME(trap_by_rdmsr)) " \n\ addl $2, (%esp) /* rdmsr is a 2-byte instruction */ \n\ iret \n\ "); #endif /* * Special exception 13 handler. * Accessing non-existent MSR generates general protection fault. */ inthand_t bluetrap13; #ifdef __GNUCLIKE_ASM __asm (" \n\ .text \n\ .p2align 2,0x90 \n\ .type " __XSTRING(CNAME(bluetrap13)) ",@function \n\ " __XSTRING(CNAME(bluetrap13)) ": \n\ ss \n\ movl $0xa89c4," __XSTRING(CNAME(trap_by_rdmsr)) " \n\ popl %eax /* discard error code */ \n\ addl $2, (%esp) /* rdmsr is a 2-byte instruction */ \n\ iret \n\ "); #endif /* * Distinguish IBM Blue Lightning CPU from Cyrix CPUs that does not * support cpuid instruction. This function should be called after * loading interrupt descriptor table register. * * I don't like this method that handles fault, but I couldn't get * information for any other methods. Does blue giant know? */ static int identblue(void) { trap_by_rdmsr = 0; /* * Cyrix 486-class CPU does not support rdmsr instruction. * The rdmsr instruction generates invalid opcode fault, and exception * will be trapped by bluetrap6() on Cyrix 486-class CPU. The * bluetrap6() set the magic number to trap_by_rdmsr. */ setidt(IDT_UD, bluetrap6, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* * Certain BIOS disables cpuid instruction of Cyrix 6x86MX CPU. * In this case, rdmsr generates general protection fault, and * exception will be trapped by bluetrap13(). */ setidt(IDT_GP, bluetrap13, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); rdmsr(0x1002); /* Cyrix CPU generates fault. */ if (trap_by_rdmsr == 0xa8c1d) return IDENTBLUE_CYRIX486; else if (trap_by_rdmsr == 0xa89c4) return IDENTBLUE_CYRIXM2; return IDENTBLUE_IBMCPU; } /* * identifycyrix() set lower 16 bits of cyrix_did as follows: * * F E D C B A 9 8 7 6 5 4 3 2 1 0 * +-------+-------+---------------+ * | SID | RID | Device ID | * | (DIR 1) | (DIR 0) | * +-------+-------+---------------+ */ static void identifycyrix(void) { register_t saveintr; int ccr2_test = 0, dir_test = 0; u_char ccr2, ccr3; saveintr = intr_disable(); ccr2 = read_cyrix_reg(CCR2); write_cyrix_reg(CCR2, ccr2 ^ CCR2_LOCK_NW); read_cyrix_reg(CCR2); if (read_cyrix_reg(CCR2) != ccr2) ccr2_test = 1; write_cyrix_reg(CCR2, ccr2); ccr3 = read_cyrix_reg(CCR3); write_cyrix_reg(CCR3, ccr3 ^ CCR3_MAPEN3); read_cyrix_reg(CCR3); if (read_cyrix_reg(CCR3) != ccr3) dir_test = 1; /* CPU supports DIRs. */ write_cyrix_reg(CCR3, ccr3); if (dir_test) { /* Device ID registers are available. */ cyrix_did = read_cyrix_reg(DIR1) << 8; cyrix_did += read_cyrix_reg(DIR0); } else if (ccr2_test) cyrix_did = 0x0010; /* 486S A-step */ else cyrix_did = 0x00ff; /* Old 486SLC/DLC and TI486SXLC/SXL */ intr_restore(saveintr); } #endif /* Update TSC freq with the value indicated by the caller. */ static void tsc_freq_changed(void *arg __unused, const struct cf_level *level, int status) { /* If there was an error during the transition, don't do anything. */ if (status != 0) return; /* Total setting for this level gives the new frequency in MHz. */ hw_clockrate = level->total_set.freq; } static void hook_tsc_freq(void *arg __unused) { if (tsc_is_invariant) return; tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change, tsc_freq_changed, NULL, EVENTHANDLER_PRI_ANY); } SYSINIT(hook_tsc_freq, SI_SUB_CONFIGURE, SI_ORDER_ANY, hook_tsc_freq, NULL); static const char *const vm_bnames[] = { "QEMU", /* QEMU */ "Plex86", /* Plex86 */ "Bochs", /* Bochs */ "Xen", /* Xen */ "BHYVE", /* bhyve */ "Seabios", /* KVM */ NULL }; static const char *const vm_pnames[] = { "VMware Virtual Platform", /* VMWare VM */ "Virtual Machine", /* Microsoft VirtualPC */ "VirtualBox", /* Sun xVM VirtualBox */ "Parallels Virtual Platform", /* Parallels VM */ "KVM", /* KVM */ NULL }; static void identify_hypervisor(void) { u_int regs[4]; char *p; int i; /* * [RFC] CPUID usage for interaction between Hypervisors and Linux. * http://lkml.org/lkml/2008/10/1/246 * * KB1009458: Mechanisms to determine if software is running in * a VMware virtual machine * http://kb.vmware.com/kb/1009458 */ if (cpu_feature2 & CPUID2_HV) { vm_guest = VM_GUEST_VM; do_cpuid(0x40000000, regs); if (regs[0] >= 0x40000000) { hv_high = regs[0]; ((u_int *)&hv_vendor)[0] = regs[1]; ((u_int *)&hv_vendor)[1] = regs[2]; ((u_int *)&hv_vendor)[2] = regs[3]; hv_vendor[12] = '\0'; if (strcmp(hv_vendor, "VMwareVMware") == 0) vm_guest = VM_GUEST_VMWARE; else if (strcmp(hv_vendor, "Microsoft Hv") == 0) vm_guest = VM_GUEST_HV; else if (strcmp(hv_vendor, "KVMKVMKVM") == 0) vm_guest = VM_GUEST_KVM; } return; } /* * Examine SMBIOS strings for older hypervisors. */ p = kern_getenv("smbios.system.serial"); if (p != NULL) { if (strncmp(p, "VMware-", 7) == 0 || strncmp(p, "VMW", 3) == 0) { vmware_hvcall(VMW_HVCMD_GETVERSION, regs); if (regs[1] == VMW_HVMAGIC) { vm_guest = VM_GUEST_VMWARE; freeenv(p); return; } } freeenv(p); } /* * XXX: Some of these entries may not be needed since they were * added to FreeBSD before the checks above. */ p = kern_getenv("smbios.bios.vendor"); if (p != NULL) { for (i = 0; vm_bnames[i] != NULL; i++) if (strcmp(p, vm_bnames[i]) == 0) { vm_guest = VM_GUEST_VM; freeenv(p); return; } freeenv(p); } p = kern_getenv("smbios.system.product"); if (p != NULL) { for (i = 0; vm_pnames[i] != NULL; i++) if (strcmp(p, vm_pnames[i]) == 0) { vm_guest = VM_GUEST_VM; freeenv(p); return; } freeenv(p); } } bool fix_cpuid(void) { uint64_t msr; /* * Clear "Limit CPUID Maxval" bit and return true if the caller should * get the largest standard CPUID function number again if it is set * from BIOS. It is necessary for probing correct CPU topology later * and for the correct operation of the AVX-aware userspace. */ if (cpu_vendor_id == CPU_VENDOR_INTEL && ((CPUID_TO_FAMILY(cpu_id) == 0xf && CPUID_TO_MODEL(cpu_id) >= 0x3) || (CPUID_TO_FAMILY(cpu_id) == 0x6 && CPUID_TO_MODEL(cpu_id) >= 0xe))) { msr = rdmsr(MSR_IA32_MISC_ENABLE); if ((msr & IA32_MISC_EN_LIMCPUID) != 0) { msr &= ~IA32_MISC_EN_LIMCPUID; wrmsr(MSR_IA32_MISC_ENABLE, msr); return (true); } } /* * Re-enable AMD Topology Extension that could be disabled by BIOS * on some notebook processors. Without the extension it's really * hard to determine the correct CPU cache topology. * See BIOS and Kernel Developer’s Guide (BKDG) for AMD Family 15h * Models 60h-6Fh Processors, Publication # 50742. */ if (cpu_vendor_id == CPU_VENDOR_AMD && CPUID_TO_FAMILY(cpu_id) == 0x15) { msr = rdmsr(MSR_EXTFEATURES); if ((msr & ((uint64_t)1 << 54)) == 0) { msr |= (uint64_t)1 << 54; wrmsr(MSR_EXTFEATURES, msr); return (true); } } return (false); } /* * Final stage of CPU identification. */ #ifdef __i386__ void finishidentcpu(void) #else void identify_cpu(void) #endif { u_int regs[4], cpu_stdext_disable; #ifdef __i386__ u_char ccr3; #endif #ifdef __amd64__ do_cpuid(0, regs); cpu_high = regs[0]; ((u_int *)&cpu_vendor)[0] = regs[1]; ((u_int *)&cpu_vendor)[1] = regs[3]; ((u_int *)&cpu_vendor)[2] = regs[2]; cpu_vendor[12] = '\0'; do_cpuid(1, regs); cpu_id = regs[0]; cpu_procinfo = regs[1]; cpu_feature = regs[3]; cpu_feature2 = regs[2]; #endif identify_hypervisor(); cpu_vendor_id = find_cpu_vendor_id(); if (fix_cpuid()) { do_cpuid(0, regs); cpu_high = regs[0]; } if (cpu_high >= 5 && (cpu_feature2 & CPUID2_MON) != 0) { do_cpuid(5, regs); cpu_mon_mwait_flags = regs[2]; cpu_mon_min_size = regs[0] & CPUID5_MON_MIN_SIZE; cpu_mon_max_size = regs[1] & CPUID5_MON_MAX_SIZE; } if (cpu_high >= 7) { cpuid_count(7, 0, regs); cpu_stdext_feature = regs[1]; /* * Some hypervisors fail to filter out unsupported * extended features. For now, disable the * extensions, activation of which requires setting a * bit in CR4, and which VM monitors do not support. */ if (cpu_feature2 & CPUID2_HV) { cpu_stdext_disable = CPUID_STDEXT_FSGSBASE | CPUID_STDEXT_SMEP; } else cpu_stdext_disable = 0; TUNABLE_INT_FETCH("hw.cpu_stdext_disable", &cpu_stdext_disable); cpu_stdext_feature &= ~cpu_stdext_disable; cpu_stdext_feature2 = regs[2]; } #ifdef __i386__ if (cpu_high > 0 && (cpu_vendor_id == CPU_VENDOR_INTEL || cpu_vendor_id == CPU_VENDOR_AMD || cpu_vendor_id == CPU_VENDOR_TRANSMETA || cpu_vendor_id == CPU_VENDOR_CENTAUR || cpu_vendor_id == CPU_VENDOR_NSC)) { do_cpuid(0x80000000, regs); if (regs[0] >= 0x80000000) cpu_exthigh = regs[0]; } #else if (cpu_vendor_id == CPU_VENDOR_INTEL || cpu_vendor_id == CPU_VENDOR_AMD || cpu_vendor_id == CPU_VENDOR_CENTAUR) { do_cpuid(0x80000000, regs); cpu_exthigh = regs[0]; } #endif if (cpu_exthigh >= 0x80000001) { do_cpuid(0x80000001, regs); amd_feature = regs[3] & ~(cpu_feature & 0x0183f3ff); amd_feature2 = regs[2]; } if (cpu_exthigh >= 0x80000007) { do_cpuid(0x80000007, regs); amd_pminfo = regs[3]; } if (cpu_exthigh >= 0x80000008) { do_cpuid(0x80000008, regs); cpu_maxphyaddr = regs[0] & 0xff; cpu_procinfo2 = regs[2]; } else { cpu_maxphyaddr = (cpu_feature & CPUID_PAE) != 0 ? 36 : 32; } #ifdef __i386__ if (cpu_vendor_id == CPU_VENDOR_CYRIX) { if (cpu == CPU_486) { /* * These conditions are equivalent to: * - CPU does not support cpuid instruction. * - Cyrix/IBM CPU is detected. */ if (identblue() == IDENTBLUE_IBMCPU) { strcpy(cpu_vendor, "IBM"); cpu_vendor_id = CPU_VENDOR_IBM; cpu = CPU_BLUE; return; } } switch (cpu_id & 0xf00) { case 0x600: /* * Cyrix's datasheet does not describe DIRs. * Therefor, I assume it does not have them * and use the result of the cpuid instruction. * XXX they seem to have it for now at least. -Peter */ identifycyrix(); cpu = CPU_M2; break; default: identifycyrix(); /* * This routine contains a trick. * Don't check (cpu_id & 0x00f0) == 0x50 to detect M2, now. */ switch (cyrix_did & 0x00f0) { case 0x00: case 0xf0: cpu = CPU_486DLC; break; case 0x10: cpu = CPU_CY486DX; break; case 0x20: if ((cyrix_did & 0x000f) < 8) cpu = CPU_M1; else cpu = CPU_M1SC; break; case 0x30: cpu = CPU_M1; break; case 0x40: /* MediaGX CPU */ cpu = CPU_M1SC; break; default: /* M2 and later CPUs are treated as M2. */ cpu = CPU_M2; /* * enable cpuid instruction. */ ccr3 = read_cyrix_reg(CCR3); write_cyrix_reg(CCR3, CCR3_MAPEN0); write_cyrix_reg(CCR4, read_cyrix_reg(CCR4) | CCR4_CPUID); write_cyrix_reg(CCR3, ccr3); do_cpuid(0, regs); cpu_high = regs[0]; /* eax */ do_cpuid(1, regs); cpu_id = regs[0]; /* eax */ cpu_feature = regs[3]; /* edx */ break; } } } else if (cpu == CPU_486 && *cpu_vendor == '\0') { /* * There are BlueLightning CPUs that do not change * undefined flags by dividing 5 by 2. In this case, * the CPU identification routine in locore.s leaves * cpu_vendor null string and puts CPU_486 into the * cpu. */ if (identblue() == IDENTBLUE_IBMCPU) { strcpy(cpu_vendor, "IBM"); cpu_vendor_id = CPU_VENDOR_IBM; cpu = CPU_BLUE; return; } } #endif } static u_int find_cpu_vendor_id(void) { int i; for (i = 0; i < nitems(cpu_vendors); i++) if (strcmp(cpu_vendor, cpu_vendors[i].vendor) == 0) return (cpu_vendors[i].vendor_id); return (0); } static void print_AMD_assoc(int i) { if (i == 255) printf(", fully associative\n"); else printf(", %d-way associative\n", i); } static void print_AMD_l2_assoc(int i) { switch (i & 0x0f) { case 0: printf(", disabled/not present\n"); break; case 1: printf(", direct mapped\n"); break; case 2: printf(", 2-way associative\n"); break; case 4: printf(", 4-way associative\n"); break; case 6: printf(", 8-way associative\n"); break; case 8: printf(", 16-way associative\n"); break; case 15: printf(", fully associative\n"); break; default: printf(", reserved configuration\n"); break; } } static void print_AMD_info(void) { #ifdef __i386__ uint64_t amd_whcr; #endif u_int regs[4]; if (cpu_exthigh >= 0x80000005) { do_cpuid(0x80000005, regs); printf("L1 2MB data TLB: %d entries", (regs[0] >> 16) & 0xff); print_AMD_assoc(regs[0] >> 24); printf("L1 2MB instruction TLB: %d entries", regs[0] & 0xff); print_AMD_assoc((regs[0] >> 8) & 0xff); printf("L1 4KB data TLB: %d entries", (regs[1] >> 16) & 0xff); print_AMD_assoc(regs[1] >> 24); printf("L1 4KB instruction TLB: %d entries", regs[1] & 0xff); print_AMD_assoc((regs[1] >> 8) & 0xff); printf("L1 data cache: %d kbytes", regs[2] >> 24); printf(", %d bytes/line", regs[2] & 0xff); printf(", %d lines/tag", (regs[2] >> 8) & 0xff); print_AMD_assoc((regs[2] >> 16) & 0xff); printf("L1 instruction cache: %d kbytes", regs[3] >> 24); printf(", %d bytes/line", regs[3] & 0xff); printf(", %d lines/tag", (regs[3] >> 8) & 0xff); print_AMD_assoc((regs[3] >> 16) & 0xff); } if (cpu_exthigh >= 0x80000006) { do_cpuid(0x80000006, regs); if ((regs[0] >> 16) != 0) { printf("L2 2MB data TLB: %d entries", (regs[0] >> 16) & 0xfff); print_AMD_l2_assoc(regs[0] >> 28); printf("L2 2MB instruction TLB: %d entries", regs[0] & 0xfff); print_AMD_l2_assoc((regs[0] >> 28) & 0xf); } else { printf("L2 2MB unified TLB: %d entries", regs[0] & 0xfff); print_AMD_l2_assoc((regs[0] >> 28) & 0xf); } if ((regs[1] >> 16) != 0) { printf("L2 4KB data TLB: %d entries", (regs[1] >> 16) & 0xfff); print_AMD_l2_assoc(regs[1] >> 28); printf("L2 4KB instruction TLB: %d entries", (regs[1] >> 16) & 0xfff); print_AMD_l2_assoc((regs[1] >> 28) & 0xf); } else { printf("L2 4KB unified TLB: %d entries", (regs[1] >> 16) & 0xfff); print_AMD_l2_assoc((regs[1] >> 28) & 0xf); } printf("L2 unified cache: %d kbytes", regs[2] >> 16); printf(", %d bytes/line", regs[2] & 0xff); printf(", %d lines/tag", (regs[2] >> 8) & 0x0f); print_AMD_l2_assoc((regs[2] >> 12) & 0x0f); } #ifdef __i386__ if (((cpu_id & 0xf00) == 0x500) && (((cpu_id & 0x0f0) > 0x80) || (((cpu_id & 0x0f0) == 0x80) && (cpu_id & 0x00f) > 0x07))) { /* K6-2(new core [Stepping 8-F]), K6-III or later */ amd_whcr = rdmsr(0xc0000082); if (!(amd_whcr & (0x3ff << 22))) { printf("Write Allocate Disable\n"); } else { printf("Write Allocate Enable Limit: %dM bytes\n", (u_int32_t)((amd_whcr & (0x3ff << 22)) >> 22) * 4); printf("Write Allocate 15-16M bytes: %s\n", (amd_whcr & (1 << 16)) ? "Enable" : "Disable"); } } else if (((cpu_id & 0xf00) == 0x500) && ((cpu_id & 0x0f0) > 0x50)) { /* K6, K6-2(old core) */ amd_whcr = rdmsr(0xc0000082); if (!(amd_whcr & (0x7f << 1))) { printf("Write Allocate Disable\n"); } else { printf("Write Allocate Enable Limit: %dM bytes\n", (u_int32_t)((amd_whcr & (0x7f << 1)) >> 1) * 4); printf("Write Allocate 15-16M bytes: %s\n", (amd_whcr & 0x0001) ? "Enable" : "Disable"); printf("Hardware Write Allocate Control: %s\n", (amd_whcr & 0x0100) ? "Enable" : "Disable"); } } #endif /* * Opteron Rev E shows a bug as in very rare occasions a read memory * barrier is not performed as expected if it is followed by a * non-atomic read-modify-write instruction. * As long as that bug pops up very rarely (intensive machine usage * on other operating systems generally generates one unexplainable * crash any 2 months) and as long as a model specific fix would be * impractical at this stage, print out a warning string if the broken * model and family are identified. */ if (CPUID_TO_FAMILY(cpu_id) == 0xf && CPUID_TO_MODEL(cpu_id) >= 0x20 && CPUID_TO_MODEL(cpu_id) <= 0x3f) printf("WARNING: This architecture revision has known SMP " "hardware bugs which may cause random instability\n"); } static void print_INTEL_info(void) { u_int regs[4]; u_int rounds, regnum; u_int nwaycode, nway; if (cpu_high >= 2) { rounds = 0; do { do_cpuid(0x2, regs); if (rounds == 0 && (rounds = (regs[0] & 0xff)) == 0) break; /* we have a buggy CPU */ for (regnum = 0; regnum <= 3; ++regnum) { if (regs[regnum] & (1<<31)) continue; if (regnum != 0) print_INTEL_TLB(regs[regnum] & 0xff); print_INTEL_TLB((regs[regnum] >> 8) & 0xff); print_INTEL_TLB((regs[regnum] >> 16) & 0xff); print_INTEL_TLB((regs[regnum] >> 24) & 0xff); } } while (--rounds > 0); } if (cpu_exthigh >= 0x80000006) { do_cpuid(0x80000006, regs); nwaycode = (regs[2] >> 12) & 0x0f; if (nwaycode >= 0x02 && nwaycode <= 0x08) nway = 1 << (nwaycode / 2); else nway = 0; printf("L2 cache: %u kbytes, %u-way associative, %u bytes/line\n", (regs[2] >> 16) & 0xffff, nway, regs[2] & 0xff); } } static void print_INTEL_TLB(u_int data) { switch (data) { case 0x0: case 0x40: default: break; case 0x1: printf("Instruction TLB: 4 KB pages, 4-way set associative, 32 entries\n"); break; case 0x2: printf("Instruction TLB: 4 MB pages, fully associative, 2 entries\n"); break; case 0x3: printf("Data TLB: 4 KB pages, 4-way set associative, 64 entries\n"); break; case 0x4: printf("Data TLB: 4 MB Pages, 4-way set associative, 8 entries\n"); break; case 0x6: printf("1st-level instruction cache: 8 KB, 4-way set associative, 32 byte line size\n"); break; case 0x8: printf("1st-level instruction cache: 16 KB, 4-way set associative, 32 byte line size\n"); break; case 0x9: printf("1st-level instruction cache: 32 KB, 4-way set associative, 64 byte line size\n"); break; case 0xa: printf("1st-level data cache: 8 KB, 2-way set associative, 32 byte line size\n"); break; case 0xb: printf("Instruction TLB: 4 MByte pages, 4-way set associative, 4 entries\n"); break; case 0xc: printf("1st-level data cache: 16 KB, 4-way set associative, 32 byte line size\n"); break; case 0xd: printf("1st-level data cache: 16 KBytes, 4-way set associative, 64 byte line size"); break; case 0xe: printf("1st-level data cache: 24 KBytes, 6-way set associative, 64 byte line size\n"); break; case 0x1d: printf("2nd-level cache: 128 KBytes, 2-way set associative, 64 byte line size\n"); break; case 0x21: printf("2nd-level cache: 256 KBytes, 8-way set associative, 64 byte line size\n"); break; case 0x22: printf("3rd-level cache: 512 KB, 4-way set associative, sectored cache, 64 byte line size\n"); break; case 0x23: printf("3rd-level cache: 1 MB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x24: printf("2nd-level cache: 1 MBytes, 16-way set associative, 64 byte line size\n"); break; case 0x25: printf("3rd-level cache: 2 MB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x29: printf("3rd-level cache: 4 MB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x2c: printf("1st-level data cache: 32 KB, 8-way set associative, 64 byte line size\n"); break; case 0x30: printf("1st-level instruction cache: 32 KB, 8-way set associative, 64 byte line size\n"); break; case 0x39: /* De-listed in SDM rev. 54 */ printf("2nd-level cache: 128 KB, 4-way set associative, sectored cache, 64 byte line size\n"); break; case 0x3b: /* De-listed in SDM rev. 54 */ printf("2nd-level cache: 128 KB, 2-way set associative, sectored cache, 64 byte line size\n"); break; case 0x3c: /* De-listed in SDM rev. 54 */ printf("2nd-level cache: 256 KB, 4-way set associative, sectored cache, 64 byte line size\n"); break; case 0x41: printf("2nd-level cache: 128 KB, 4-way set associative, 32 byte line size\n"); break; case 0x42: printf("2nd-level cache: 256 KB, 4-way set associative, 32 byte line size\n"); break; case 0x43: printf("2nd-level cache: 512 KB, 4-way set associative, 32 byte line size\n"); break; case 0x44: printf("2nd-level cache: 1 MB, 4-way set associative, 32 byte line size\n"); break; case 0x45: printf("2nd-level cache: 2 MB, 4-way set associative, 32 byte line size\n"); break; case 0x46: printf("3rd-level cache: 4 MB, 4-way set associative, 64 byte line size\n"); break; case 0x47: printf("3rd-level cache: 8 MB, 8-way set associative, 64 byte line size\n"); break; case 0x48: printf("2nd-level cache: 3MByte, 12-way set associative, 64 byte line size\n"); break; case 0x49: if (CPUID_TO_FAMILY(cpu_id) == 0xf && CPUID_TO_MODEL(cpu_id) == 0x6) printf("3rd-level cache: 4MB, 16-way set associative, 64-byte line size\n"); else printf("2nd-level cache: 4 MByte, 16-way set associative, 64 byte line size"); break; case 0x4a: printf("3rd-level cache: 6MByte, 12-way set associative, 64 byte line size\n"); break; case 0x4b: printf("3rd-level cache: 8MByte, 16-way set associative, 64 byte line size\n"); break; case 0x4c: printf("3rd-level cache: 12MByte, 12-way set associative, 64 byte line size\n"); break; case 0x4d: printf("3rd-level cache: 16MByte, 16-way set associative, 64 byte line size\n"); break; case 0x4e: printf("2nd-level cache: 6MByte, 24-way set associative, 64 byte line size\n"); break; case 0x4f: printf("Instruction TLB: 4 KByte pages, 32 entries\n"); break; case 0x50: printf("Instruction TLB: 4 KB, 2 MB or 4 MB pages, fully associative, 64 entries\n"); break; case 0x51: printf("Instruction TLB: 4 KB, 2 MB or 4 MB pages, fully associative, 128 entries\n"); break; case 0x52: printf("Instruction TLB: 4 KB, 2 MB or 4 MB pages, fully associative, 256 entries\n"); break; case 0x55: printf("Instruction TLB: 2-MByte or 4-MByte pages, fully associative, 7 entries\n"); break; case 0x56: printf("Data TLB0: 4 MByte pages, 4-way set associative, 16 entries\n"); break; case 0x57: printf("Data TLB0: 4 KByte pages, 4-way associative, 16 entries\n"); break; case 0x59: printf("Data TLB0: 4 KByte pages, fully associative, 16 entries\n"); break; case 0x5a: printf("Data TLB0: 2-MByte or 4 MByte pages, 4-way set associative, 32 entries\n"); break; case 0x5b: printf("Data TLB: 4 KB or 4 MB pages, fully associative, 64 entries\n"); break; case 0x5c: printf("Data TLB: 4 KB or 4 MB pages, fully associative, 128 entries\n"); break; case 0x5d: printf("Data TLB: 4 KB or 4 MB pages, fully associative, 256 entries\n"); break; case 0x60: printf("1st-level data cache: 16 KB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x61: printf("Instruction TLB: 4 KByte pages, fully associative, 48 entries\n"); break; case 0x63: printf("Data TLB: 2 MByte or 4 MByte pages, 4-way set associative, 32 entries and a separate array with 1 GByte pages, 4-way set associative, 4 entries\n"); break; case 0x64: printf("Data TLB: 4 KBytes pages, 4-way set associative, 512 entries\n"); break; case 0x66: printf("1st-level data cache: 8 KB, 4-way set associative, sectored cache, 64 byte line size\n"); break; case 0x67: printf("1st-level data cache: 16 KB, 4-way set associative, sectored cache, 64 byte line size\n"); break; case 0x68: printf("1st-level data cache: 32 KB, 4 way set associative, sectored cache, 64 byte line size\n"); break; case 0x6a: printf("uTLB: 4KByte pages, 8-way set associative, 64 entries\n"); break; case 0x6b: printf("DTLB: 4KByte pages, 8-way set associative, 256 entries\n"); break; case 0x6c: printf("DTLB: 2M/4M pages, 8-way set associative, 128 entries\n"); break; case 0x6d: printf("DTLB: 1 GByte pages, fully associative, 16 entries\n"); break; case 0x70: printf("Trace cache: 12K-uops, 8-way set associative\n"); break; case 0x71: printf("Trace cache: 16K-uops, 8-way set associative\n"); break; case 0x72: printf("Trace cache: 32K-uops, 8-way set associative\n"); break; case 0x76: printf("Instruction TLB: 2M/4M pages, fully associative, 8 entries\n"); break; case 0x78: printf("2nd-level cache: 1 MB, 4-way set associative, 64-byte line size\n"); break; case 0x79: printf("2nd-level cache: 128 KB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x7a: printf("2nd-level cache: 256 KB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x7b: printf("2nd-level cache: 512 KB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x7c: printf("2nd-level cache: 1 MB, 8-way set associative, sectored cache, 64 byte line size\n"); break; case 0x7d: printf("2nd-level cache: 2-MB, 8-way set associative, 64-byte line size\n"); break; case 0x7f: printf("2nd-level cache: 512-KB, 2-way set associative, 64-byte line size\n"); break; case 0x80: printf("2nd-level cache: 512 KByte, 8-way set associative, 64-byte line size\n"); break; case 0x82: printf("2nd-level cache: 256 KB, 8-way set associative, 32 byte line size\n"); break; case 0x83: printf("2nd-level cache: 512 KB, 8-way set associative, 32 byte line size\n"); break; case 0x84: printf("2nd-level cache: 1 MB, 8-way set associative, 32 byte line size\n"); break; case 0x85: printf("2nd-level cache: 2 MB, 8-way set associative, 32 byte line size\n"); break; case 0x86: printf("2nd-level cache: 512 KB, 4-way set associative, 64 byte line size\n"); break; case 0x87: printf("2nd-level cache: 1 MB, 8-way set associative, 64 byte line size\n"); break; case 0xa0: printf("DTLB: 4k pages, fully associative, 32 entries\n"); break; case 0xb0: printf("Instruction TLB: 4 KB Pages, 4-way set associative, 128 entries\n"); break; case 0xb1: printf("Instruction TLB: 2M pages, 4-way, 8 entries or 4M pages, 4-way, 4 entries\n"); break; case 0xb2: printf("Instruction TLB: 4KByte pages, 4-way set associative, 64 entries\n"); break; case 0xb3: printf("Data TLB: 4 KB Pages, 4-way set associative, 128 entries\n"); break; case 0xb4: printf("Data TLB1: 4 KByte pages, 4-way associative, 256 entries\n"); break; case 0xb5: printf("Instruction TLB: 4KByte pages, 8-way set associative, 64 entries\n"); break; case 0xb6: printf("Instruction TLB: 4KByte pages, 8-way set associative, 128 entries\n"); break; case 0xba: printf("Data TLB1: 4 KByte pages, 4-way associative, 64 entries\n"); break; case 0xc0: printf("Data TLB: 4 KByte and 4 MByte pages, 4-way associative, 8 entries\n"); break; case 0xc1: printf("Shared 2nd-Level TLB: 4 KByte/2MByte pages, 8-way associative, 1024 entries\n"); break; case 0xc2: printf("DTLB: 4 KByte/2 MByte pages, 4-way associative, 16 entries\n"); break; case 0xc3: printf("Shared 2nd-Level TLB: 4 KByte /2 MByte pages, 6-way associative, 1536 entries. Also 1GBbyte pages, 4-way, 16 entries\n"); break; case 0xc4: printf("DTLB: 2M/4M Byte pages, 4-way associative, 32 entries\n"); break; case 0xca: printf("Shared 2nd-Level TLB: 4 KByte pages, 4-way associative, 512 entries\n"); break; case 0xd0: printf("3rd-level cache: 512 KByte, 4-way set associative, 64 byte line size\n"); break; case 0xd1: printf("3rd-level cache: 1 MByte, 4-way set associative, 64 byte line size\n"); break; case 0xd2: printf("3rd-level cache: 2 MByte, 4-way set associative, 64 byte line size\n"); break; case 0xd6: printf("3rd-level cache: 1 MByte, 8-way set associative, 64 byte line size\n"); break; case 0xd7: printf("3rd-level cache: 2 MByte, 8-way set associative, 64 byte line size\n"); break; case 0xd8: printf("3rd-level cache: 4 MByte, 8-way set associative, 64 byte line size\n"); break; case 0xdc: printf("3rd-level cache: 1.5 MByte, 12-way set associative, 64 byte line size\n"); break; case 0xdd: printf("3rd-level cache: 3 MByte, 12-way set associative, 64 byte line size\n"); break; case 0xde: printf("3rd-level cache: 6 MByte, 12-way set associative, 64 byte line size\n"); break; case 0xe2: printf("3rd-level cache: 2 MByte, 16-way set associative, 64 byte line size\n"); break; case 0xe3: printf("3rd-level cache: 4 MByte, 16-way set associative, 64 byte line size\n"); break; case 0xe4: printf("3rd-level cache: 8 MByte, 16-way set associative, 64 byte line size\n"); break; case 0xea: printf("3rd-level cache: 12MByte, 24-way set associative, 64 byte line size\n"); break; case 0xeb: printf("3rd-level cache: 18MByte, 24-way set associative, 64 byte line size\n"); break; case 0xec: printf("3rd-level cache: 24MByte, 24-way set associative, 64 byte line size\n"); break; case 0xf0: printf("64-Byte prefetching\n"); break; case 0xf1: printf("128-Byte prefetching\n"); break; } } static void print_svm_info(void) { u_int features, regs[4]; uint64_t msr; int comma; printf("\n SVM: "); do_cpuid(0x8000000A, regs); features = regs[3]; msr = rdmsr(MSR_VM_CR); if ((msr & VM_CR_SVMDIS) == VM_CR_SVMDIS) printf("(disabled in BIOS) "); if (!bootverbose) { comma = 0; if (features & (1 << 0)) { printf("%sNP", comma ? "," : ""); comma = 1; } if (features & (1 << 3)) { printf("%sNRIP", comma ? "," : ""); comma = 1; } if (features & (1 << 5)) { printf("%sVClean", comma ? "," : ""); comma = 1; } if (features & (1 << 6)) { printf("%sAFlush", comma ? "," : ""); comma = 1; } if (features & (1 << 7)) { printf("%sDAssist", comma ? "," : ""); comma = 1; } printf("%sNAsids=%d", comma ? "," : "", regs[1]); return; } printf("Features=0x%b", features, "\020" "\001NP" /* Nested paging */ "\002LbrVirt" /* LBR virtualization */ "\003SVML" /* SVM lock */ "\004NRIPS" /* NRIP save */ "\005TscRateMsr" /* MSR based TSC rate control */ "\006VmcbClean" /* VMCB clean bits */ "\007FlushByAsid" /* Flush by ASID */ "\010DecodeAssist" /* Decode assist */ "\011" "\012" "\013PauseFilter" /* PAUSE intercept filter */ "\014" "\015PauseFilterThreshold" /* PAUSE filter threshold */ "\016AVIC" /* virtual interrupt controller */ ); printf("\nRevision=%d, ASIDs=%d", regs[0] & 0xff, regs[1]); } #ifdef __i386__ static void print_transmeta_info(void) { u_int regs[4], nreg = 0; do_cpuid(0x80860000, regs); nreg = regs[0]; if (nreg >= 0x80860001) { do_cpuid(0x80860001, regs); printf(" Processor revision %u.%u.%u.%u\n", (regs[1] >> 24) & 0xff, (regs[1] >> 16) & 0xff, (regs[1] >> 8) & 0xff, regs[1] & 0xff); } if (nreg >= 0x80860002) { do_cpuid(0x80860002, regs); printf(" Code Morphing Software revision %u.%u.%u-%u-%u\n", (regs[1] >> 24) & 0xff, (regs[1] >> 16) & 0xff, (regs[1] >> 8) & 0xff, regs[1] & 0xff, regs[2]); } if (nreg >= 0x80860006) { char info[65]; do_cpuid(0x80860003, (u_int*) &info[0]); do_cpuid(0x80860004, (u_int*) &info[16]); do_cpuid(0x80860005, (u_int*) &info[32]); do_cpuid(0x80860006, (u_int*) &info[48]); info[64] = 0; printf(" %s\n", info); } } #endif static void print_via_padlock_info(void) { u_int regs[4]; do_cpuid(0xc0000001, regs); printf("\n VIA Padlock Features=0x%b", regs[3], "\020" "\003RNG" /* RNG */ "\007AES" /* ACE */ "\011AES-CTR" /* ACE2 */ "\013SHA1,SHA256" /* PHE */ "\015RSA" /* PMM */ ); } static uint32_t vmx_settable(uint64_t basic, int msr, int true_msr) { uint64_t val; if (basic & (1ULL << 55)) val = rdmsr(true_msr); else val = rdmsr(msr); /* Just report the controls that can be set to 1. */ return (val >> 32); } static void print_vmx_info(void) { uint64_t basic, msr; uint32_t entry, exit, mask, pin, proc, proc2; int comma; printf("\n VT-x: "); msr = rdmsr(MSR_IA32_FEATURE_CONTROL); if (!(msr & IA32_FEATURE_CONTROL_VMX_EN)) printf("(disabled in BIOS) "); basic = rdmsr(MSR_VMX_BASIC); pin = vmx_settable(basic, MSR_VMX_PINBASED_CTLS, MSR_VMX_TRUE_PINBASED_CTLS); proc = vmx_settable(basic, MSR_VMX_PROCBASED_CTLS, MSR_VMX_TRUE_PROCBASED_CTLS); if (proc & PROCBASED_SECONDARY_CONTROLS) proc2 = vmx_settable(basic, MSR_VMX_PROCBASED_CTLS2, MSR_VMX_PROCBASED_CTLS2); else proc2 = 0; exit = vmx_settable(basic, MSR_VMX_EXIT_CTLS, MSR_VMX_TRUE_EXIT_CTLS); entry = vmx_settable(basic, MSR_VMX_ENTRY_CTLS, MSR_VMX_TRUE_ENTRY_CTLS); if (!bootverbose) { comma = 0; if (exit & VM_EXIT_SAVE_PAT && exit & VM_EXIT_LOAD_PAT && entry & VM_ENTRY_LOAD_PAT) { printf("%sPAT", comma ? "," : ""); comma = 1; } if (proc & PROCBASED_HLT_EXITING) { printf("%sHLT", comma ? "," : ""); comma = 1; } if (proc & PROCBASED_MTF) { printf("%sMTF", comma ? "," : ""); comma = 1; } if (proc & PROCBASED_PAUSE_EXITING) { printf("%sPAUSE", comma ? "," : ""); comma = 1; } if (proc2 & PROCBASED2_ENABLE_EPT) { printf("%sEPT", comma ? "," : ""); comma = 1; } if (proc2 & PROCBASED2_UNRESTRICTED_GUEST) { printf("%sUG", comma ? "," : ""); comma = 1; } if (proc2 & PROCBASED2_ENABLE_VPID) { printf("%sVPID", comma ? "," : ""); comma = 1; } if (proc & PROCBASED_USE_TPR_SHADOW && proc2 & PROCBASED2_VIRTUALIZE_APIC_ACCESSES && proc2 & PROCBASED2_VIRTUALIZE_X2APIC_MODE && proc2 & PROCBASED2_APIC_REGISTER_VIRTUALIZATION && proc2 & PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY) { printf("%sVID", comma ? "," : ""); comma = 1; if (pin & PINBASED_POSTED_INTERRUPT) printf(",PostIntr"); } return; } mask = basic >> 32; printf("Basic Features=0x%b", mask, "\020" "\02132PA" /* 32-bit physical addresses */ "\022SMM" /* SMM dual-monitor */ "\027INS/OUTS" /* VM-exit info for INS and OUTS */ "\030TRUE" /* TRUE_CTLS MSRs */ ); printf("\n Pin-Based Controls=0x%b", pin, "\020" "\001ExtINT" /* External-interrupt exiting */ "\004NMI" /* NMI exiting */ "\006VNMI" /* Virtual NMIs */ "\007PreTmr" /* Activate VMX-preemption timer */ "\010PostIntr" /* Process posted interrupts */ ); printf("\n Primary Processor Controls=0x%b", proc, "\020" "\003INTWIN" /* Interrupt-window exiting */ "\004TSCOff" /* Use TSC offsetting */ "\010HLT" /* HLT exiting */ "\012INVLPG" /* INVLPG exiting */ "\013MWAIT" /* MWAIT exiting */ "\014RDPMC" /* RDPMC exiting */ "\015RDTSC" /* RDTSC exiting */ "\020CR3-LD" /* CR3-load exiting */ "\021CR3-ST" /* CR3-store exiting */ "\024CR8-LD" /* CR8-load exiting */ "\025CR8-ST" /* CR8-store exiting */ "\026TPR" /* Use TPR shadow */ "\027NMIWIN" /* NMI-window exiting */ "\030MOV-DR" /* MOV-DR exiting */ "\031IO" /* Unconditional I/O exiting */ "\032IOmap" /* Use I/O bitmaps */ "\034MTF" /* Monitor trap flag */ "\035MSRmap" /* Use MSR bitmaps */ "\036MONITOR" /* MONITOR exiting */ "\037PAUSE" /* PAUSE exiting */ ); if (proc & PROCBASED_SECONDARY_CONTROLS) printf("\n Secondary Processor Controls=0x%b", proc2, "\020" "\001APIC" /* Virtualize APIC accesses */ "\002EPT" /* Enable EPT */ "\003DT" /* Descriptor-table exiting */ "\004RDTSCP" /* Enable RDTSCP */ "\005x2APIC" /* Virtualize x2APIC mode */ "\006VPID" /* Enable VPID */ "\007WBINVD" /* WBINVD exiting */ "\010UG" /* Unrestricted guest */ "\011APIC-reg" /* APIC-register virtualization */ "\012VID" /* Virtual-interrupt delivery */ "\013PAUSE-loop" /* PAUSE-loop exiting */ "\014RDRAND" /* RDRAND exiting */ "\015INVPCID" /* Enable INVPCID */ "\016VMFUNC" /* Enable VM functions */ "\017VMCS" /* VMCS shadowing */ "\020EPT#VE" /* EPT-violation #VE */ "\021XSAVES" /* Enable XSAVES/XRSTORS */ ); printf("\n Exit Controls=0x%b", mask, "\020" "\003DR" /* Save debug controls */ /* Ignore Host address-space size */ "\015PERF" /* Load MSR_PERF_GLOBAL_CTRL */ "\020AckInt" /* Acknowledge interrupt on exit */ "\023PAT-SV" /* Save MSR_PAT */ "\024PAT-LD" /* Load MSR_PAT */ "\025EFER-SV" /* Save MSR_EFER */ "\026EFER-LD" /* Load MSR_EFER */ "\027PTMR-SV" /* Save VMX-preemption timer value */ ); printf("\n Entry Controls=0x%b", mask, "\020" "\003DR" /* Save debug controls */ /* Ignore IA-32e mode guest */ /* Ignore Entry to SMM */ /* Ignore Deactivate dual-monitor treatment */ "\016PERF" /* Load MSR_PERF_GLOBAL_CTRL */ "\017PAT" /* Load MSR_PAT */ "\020EFER" /* Load MSR_EFER */ ); if (proc & PROCBASED_SECONDARY_CONTROLS && (proc2 & (PROCBASED2_ENABLE_EPT | PROCBASED2_ENABLE_VPID)) != 0) { msr = rdmsr(MSR_VMX_EPT_VPID_CAP); mask = msr; printf("\n EPT Features=0x%b", mask, "\020" "\001XO" /* Execute-only translations */ "\007PW4" /* Page-walk length of 4 */ "\011UC" /* EPT paging-structure mem can be UC */ "\017WB" /* EPT paging-structure mem can be WB */ "\0212M" /* EPT PDE can map a 2-Mbyte page */ "\0221G" /* EPT PDPTE can map a 1-Gbyte page */ "\025INVEPT" /* INVEPT is supported */ "\026AD" /* Accessed and dirty flags for EPT */ "\032single" /* INVEPT single-context type */ "\033all" /* INVEPT all-context type */ ); mask = msr >> 32; printf("\n VPID Features=0x%b", mask, "\020" "\001INVVPID" /* INVVPID is supported */ "\011individual" /* INVVPID individual-address type */ "\012single" /* INVVPID single-context type */ "\013all" /* INVVPID all-context type */ /* INVVPID single-context-retaining-globals type */ "\014single-globals" ); } } static void print_hypervisor_info(void) { if (*hv_vendor) printf("Hypervisor: Origin = \"%s\"\n", hv_vendor); } Index: projects/clang500-import/tools/build/options/WITHOUT_BLACKLIST =================================================================== --- projects/clang500-import/tools/build/options/WITHOUT_BLACKLIST (revision 319250) +++ projects/clang500-import/tools/build/options/WITHOUT_BLACKLIST (revision 319251) @@ -1,2 +1,5 @@ .\" $FreeBSD$ -Set this if you do not want to build blacklistd/blacklistctl. +Set this if you do not want to build +.Xr blacklistd 8 +and +.Xr blacklistctl 8 . Index: projects/clang500-import/tools/build/options/WITHOUT_BLACKLIST_SUPPORT =================================================================== --- projects/clang500-import/tools/build/options/WITHOUT_BLACKLIST_SUPPORT (revision 319250) +++ projects/clang500-import/tools/build/options/WITHOUT_BLACKLIST_SUPPORT (revision 319251) @@ -1,8 +1,10 @@ .\" $FreeBSD$ -Set to build some programs without blacklistd support, like +Set to build some programs without +.Xr libblacklist 3 +support, like .Xr fingerd 8 , .Xr ftpd 8 , .Xr rlogind 8 , .Xr rshd 8 , and .Xr sshd 8 . Index: projects/clang500-import =================================================================== --- projects/clang500-import (revision 319250) +++ projects/clang500-import (revision 319251) Property changes on: projects/clang500-import ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r319165-319250